From: Tim Starling Date: Sun, 27 Mar 2005 16:05:33 +0000 (+0000) Subject: Basic static HTML dump generator, experimental X-Git-Tag: 1.5.0alpha1~488 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/modifier.php?a=commitdiff_plain;h=37169d17d74a9bae164204ebf4b9edd27c07f13d;p=lhc%2Fweb%2Fwiklou.git Basic static HTML dump generator, experimental --- diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 4e4aeae39d..61c2c92da4 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -102,6 +102,11 @@ $wgTmpDirectory = "{$wgUploadDirectory}/tmp"; $wgUploadBaseUrl = ""; /**#@-*/ +/** + * Produce hashed HTML article paths. Used internally, do not set. + */ +$wgMakeDumpLinks = false; + /** * To set 'pretty' URL paths for actions other than * plain page views, add to this array. For instance: diff --git a/includes/Title.php b/includes/Title.php index e6454a386c..63390a1ce2 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -616,10 +616,9 @@ class Title { * @access public */ function getFullURL( $query = '' ) { - global $wgContLang, $wgArticlePath, $wgServer, $wgScript; + global $wgContLang, $wgServer, $wgScript; if ( '' == $this->mInterwiki ) { - $p = $wgArticlePath; return $wgServer . $this->getLocalUrl( $query ); } else { $baseUrl = $this->getInterwikiLink( $this->mInterwiki ); @@ -644,6 +643,35 @@ class Title { } } + /** + * Get a relative directory for putting an HTML version of this article into + */ + function getHashedDirectory() { + $dbkey = $this->getPrefixedDBkey(); + if ( strlen( $dbkey ) < 2 ) { + $dbkey = sprintf( "%2s", $dbkey ); + } + $dir = ''; + for ( $i=0; $i<=1; $i++ ) { + if ( $i ) { + $dir .= '/'; + } + if ( ord( $dbkey{$i} ) < 128 && ord( $dbkey{$i} ) > 32 ) { + $dir .= strtolower( $dbkey{$i} ); + } else { + $dir .= sprintf( "%02X", ord( $dbkey{$i} ) ); + } + } + return $dir; + } + + function getHashedFilename() { + $dbkey = $this->getPrefixedDBkey(); + $dir = $this->getHashedDirectory(); + $friendlyName = strtr( $dbkey, '/\\:*?"<>|', '_________' ); + return "$dir/$friendlyName.html"; + } + /** * Get a URL with no fragment or server name * @param string $query an optional query string; if not specified, @@ -652,14 +680,16 @@ class Title { * @access public */ function getLocalURL( $query = '' ) { - global $wgLang, $wgArticlePath, $wgScript; + global $wgLang, $wgArticlePath, $wgScript, $wgMakeDumpLinks; if ( $this->isExternal() ) { return $this->getFullURL(); } - + $dbkey = wfUrlencode( $this->getPrefixedDBkey() ); - if ( $query == '' ) { + if ( $wgMakeDumpLinks ) { + $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename() ), $wgArticlePath ); + } elseif ( $query == '' ) { $url = str_replace( '$1', $dbkey, $wgArticlePath ); } else { if( preg_match( '/^(.*&|)action=([^&]*)(&(.*)|)$/', $query, $matches ) ) { diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc new file mode 100644 index 0000000000..7c145a9cda --- /dev/null +++ b/maintenance/dumpHTML.inc @@ -0,0 +1,107 @@ +selectField( 'cur', 'max(cur_id)', false ); + + /*global $wgValidSkinNames; + var_dump( $wgValidSkinNames ); + exit;*/ + + print("Creating static HTML dump. Starting from cur_id $start of $end.\n"); + + $wgUser = new User; + $wgUser->setOption( 'skin', 'htmldump' ); + $sk =& $wgUser->getSkin(); + + if ( !is_dir( $dest ) ) { + if ( !mkdir( $dest, 0644 ) ) { + print("Can't make directory $dir, exiting\n"); + return; + } + } + + for ($id = $start; $id <= $end; $id++) { + if ( !($id % REPORTING_INTERVAL) ) { + print("$id\n"); + } + + $wgOut = new OutputPage; + $wgOut->setArticleFlag( true ); + $wgOut->setRobotpolicy( 'index,follow' ); + + $wgTitle = Title::newFromID( $id ); + if ( is_null( $wgTitle ) ) { + continue; + } + + $wgArticle = new Article( $wgTitle ); + $text = $wgArticle->getContent( true ); + $wgLinkCache = new LinkCache; + $wgLinkCache->forUpdate( true ); + + global $wgLinkHolders; + $wgLinkHolders = array( + 'namespaces' => array(), + 'dbkeys' => array(), + 'queries' => array(), + 'texts' => array(), + 'titles' => array() + ); + + + # Parse the text and replace links with placeholders + $wgOut->setPageTitle( $wgTitle->getPrefixedText() ); + $wgOut->addWikiText( $text ); + $wgOut->transformBuffer(); + + # Execute skin to get complete HTML + ob_start(); + $sk->outputPage( $wgOut ); + $text = ob_get_contents(); + ob_end_clean(); + + # Write to file + $fname = $wgTitle->getHashedFilename(); + $bits = explode( '/', $fname ); + $parentDir = "$dest/{$bits[0]}"; + $fullDir = "$dest/{$bits[0]}/{$bits[1]}"; + $fullName = "$dest/$fname"; + + + if ( !is_dir( $parentDir ) ) { + if ( !mkdir( $parentDir, 0644 ) ) { + print("Can't write to directory $parentDir\n"); + return; + } + } + if ( !is_dir( $fullDir ) ) { + if ( !mkdir( $fullDir, 0644 ) ) { + print("Can't write to directory $fullDir\n"); + return; + } + } + + $file = fopen( $fullName, 'w' ); + if ( !$file ) { + print("Can't open file $fullName for writing\n"); + return; + } + + fwrite( $file, $text ); + fclose( $file ); + } +} + +# vim: syn=php +?> diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php new file mode 100644 index 0000000000..6ba3fc58c9 --- /dev/null +++ b/maintenance/dumpHTML.php @@ -0,0 +1,34 @@ +