From 9ab23190d05e73253b8ab5033ea6f51142c85a12 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sat, 8 Oct 2005 11:13:03 +0000 Subject: [PATCH] assorted improvements --- maintenance/dumpHTML.inc | 38 ++++++++++++++++++++++++++++++++++---- maintenance/dumpHTML.php | 32 ++++++++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc index d508e387a0..be9f4b414a 100644 --- a/maintenance/dumpHTML.inc +++ b/maintenance/dumpHTML.inc @@ -54,9 +54,13 @@ class DumpHTML { for ($id = $start; $id <= $end; $id++) { + wfWaitForSlaves( 20 ); if ( !($id % REPORTING_INTERVAL) ) { print "Processing ID: $id\r"; } + if ( !($id % (REPORTING_INTERVAL*10) ) ) { + print "\n"; + } $title = Title::newFromID( $id ); if ( $title ) { $ns = $title->getNamespace() ; @@ -120,6 +124,7 @@ class DumpHTML { print "Writing image description pages for local images\n"; $num = $dbr->numRows( $res ); while ( $row = $dbr->fetchObject( $res ) ) { + wfWaitForSlaves( 10 ); if ( !( ++$i % REPORTING_INTERVAL ) ) { print "Done $i of $num\r"; } @@ -168,6 +173,7 @@ class DumpHTML { print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; $i = 0; while ( $row = $dbr->fetchObject( $res ) ) { + wfWaitForSlaves( 10 ); if ( !(++$i % REPORTING_INTERVAL ) ) { print "$i\r"; } @@ -265,6 +271,7 @@ class DumpHTML { $wgHideInterlangageLinks = !$this->interwiki; $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; $wgEnableParserCache = false; + $wgMathPath = "$wgScriptPath/math"; $wgUser = new User; $wgUser->setOption( 'skin', 'htmldump' ); @@ -276,10 +283,11 @@ class DumpHTML { /** Reads the content of a title object, executes the skin and captures the result */ function getArticleHTML( &$title ) { - global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic; + global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic, $wgLinkCache; $wgOut = new OutputPage; $wgOut->setParserOptions( new ParserOptions ); + $wgLinkCache = new LinkCache; $wgTitle = $title; if ( is_null( $wgTitle ) ) { @@ -297,7 +305,14 @@ class DumpHTML { } else { $wgArticle = new Article( $wgTitle ); } - $wgArticle->view(); + $rt = Title::newFromRedirect( $wgArticle->fetchContent() ); + if ( $rt != NULL ) { + $wgOut->addMeta( 'http:Refresh', '3;url=' . $rt->escapeLocalURL() ); + $wgOut->setPageTitle( $wgTitle->getPrefixedText() ); + $wgOut->addWikiText( wfMsg( 'redirectingto', $rt->getPrefixedText() ) ); + } else { + $wgArticle->view(); + } } $sk =& $wgUser->getSkin(); @@ -327,12 +342,18 @@ class DumpHTML { * This is necessary even if you intend to distribute all of commons, because * the directory contents is used to work out which image description pages * are needed. + * + * Also copies math images + * */ function copyImages( $images ) { - global $wgSharedUploadPath, $wgSharedUploadDirectory; + global $wgSharedUploadPath, $wgSharedUploadDirectory, $wgMathPath, $wgMathDirectory; # Find shared uploads and copy them into the static directory $sharedPathLength = strlen( $wgSharedUploadPath ); - foreach ( $images as $image => $dummy ) { + $mathPathLength = strlen( $wgMathPath ); + foreach ( $images as $escapedImage => $dummy ) { + $image = urldecode( $escapedImage ); + # Is it shared? if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) { # Reconstruct full filename @@ -367,6 +388,15 @@ class DumpHTML { } } } + } else + # Is it math? + if ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) { + $rel = substr( $image, $mathPathLength + 1 ); // +1 for slash + $source = "$wgMathDirectory/$rel"; + $dest = "{$this->dest}/math/$rel"; + if ( !file_exists( $dest ) ) { + copy( $source, $dest ); + } } } } diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php index bfde002d96..8f52f1cd63 100644 --- a/maintenance/dumpHTML.php +++ b/maintenance/dumpHTML.php @@ -22,6 +22,18 @@ $optionsWithArgs = array( 's', 'd', 'e' ); +$profiling = false; + +if ( $profiling ) { + define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' ); + function wfSetupDump() { + global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate; + $wgProfiling = true; + $wgProfileToDatabase = false; + $wgProfileSampleRate = 1; + } +} + require_once( "commandLine.inc" ); require_once( "dumpHTML.inc" ); @@ -64,10 +76,16 @@ if ( $options['special'] ) { } else { print("Creating static HTML dump in directory $dest. \n". "Starting from page_id $start of $end.\n"); + + $dbr =& wfGetDB( DB_SLAVE ); + print "Using database {$dbr->mServer}\n"; + $d->doArticles( $start, $end ); - $d->doImageDescriptions(); - $d->doCategories(); - $d->doSpecials(); + if ( !isset( $options['e'] ) ) { + $d->doImageDescriptions(); + $d->doCategories(); + $d->doSpecials(); + } /* if ( $end - $start > CHUNK_SIZE * 2 ) { @@ -94,6 +112,12 @@ if ( $options['special'] ) { */ } -exit(); +if ( isset( $options['debug'] ) ) { + print_r($GLOBALS); +} + +if ( $profiling ) { + echo $wgProfiler->getOutput(); +} ?> -- 2.20.1