From fcbed4f8061d968bae3ec386d97e95826846bdc9 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sun, 24 Jul 2005 06:51:14 +0000 Subject: [PATCH] Assorted improvements --- maintenance/dumpHTML.inc | 96 +++++++++++++++++++++++++++++++--------- maintenance/dumpHTML.php | 38 ++++++++++++++-- 2 files changed, 109 insertions(+), 25 deletions(-) diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc index baba866e88..d508e387a0 100644 --- a/maintenance/dumpHTML.inc +++ b/maintenance/dumpHTML.inc @@ -10,12 +10,32 @@ require_once( 'includes/ImagePage.php' ); require_once( 'includes/CategoryPage.php' ); class DumpHTML { - var $dest, $interwiki, $depth, $sharedStaticPath; - - function DumpHTML( $dest, $interwiki = true, $depth = 3 ) { - $this->dest = $dest; - $this->interwiki = $interwiki; - $this->depth = $depth; + # Destination directory + var $dest; + + # Show interlanguage links? + var $interwiki = true; + + # Depth of HTML directory tree + var $depth = 3; + + # Directory that commons images are copied into + var $sharedStaticPath; + + # Relative path to image directory + var $imageRel = 'upload'; + + # Copy commons images instead of symlinking + var $forceCopy = false; + + # Make links assuming the script path is in the same directory as + # the destination + var $alternateScriptPath = false; + + function DumpHTML( $settings ) { + foreach ( $settings as $var => $value ) { + $this->$var = $value; + } } /** @@ -35,16 +55,17 @@ class DumpHTML { for ($id = $start; $id <= $end; $id++) { if ( !($id % REPORTING_INTERVAL) ) { - print "Processing ID: $id".chr(13); + print "Processing ID: $id\r"; } $title = Title::newFromID( $id ); if ( $title ) { $ns = $title->getNamespace() ; - if ( $ns != NS_CATEGORY && $ns != NS_IMAGE ) { + if ( $ns != NS_CATEGORY ) { $this->doArticle( $title ); } } } + print "\n"; } function doSpecials() { @@ -96,10 +117,11 @@ class DumpHTML { $res = $dbr->select( 'image', array( 'img_name' ), false, $fname ); $i = 0; - print "Writing " . $dbr->numRows( $res ) . " image description pages for local images\n"; + print "Writing image description pages for local images\n"; + $num = $dbr->numRows( $res ); while ( $row = $dbr->fetchObject( $res ) ) { if ( !( ++$i % REPORTING_INTERVAL ) ) { - print "$i\t{$row->img_name}\n"; + print "Done $i of $num\r"; } $title = Title::makeTitle( NS_IMAGE, $row->img_name ); if ( $title->getArticleID() ) { @@ -108,6 +130,8 @@ class DumpHTML { } $this->doArticle( $title ); } + print "\n"; + /** * Dump images which only have a real description page on commons */ @@ -121,13 +145,14 @@ class DumpHTML { foreach ( $paths as $path ) { $file = basename( $path ); if ( !(++$i % REPORTING_INTERVAL ) ) { - print "$i\t$file\n"; + print "$i\r"; } $title = Title::makeTitle( NS_IMAGE, $file ); $this->doArticle( $title ); } } + print "\n"; } function doCategories() { @@ -144,11 +169,12 @@ class DumpHTML { $i = 0; while ( $row = $dbr->fetchObject( $res ) ) { if ( !(++$i % REPORTING_INTERVAL ) ) { - print "$i\t{$row->cl_to}\n"; + print "$i\r"; } $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); $this->doArticle( $title ); } + print "\n"; } @@ -177,7 +203,7 @@ class DumpHTML { $fullDir = dirname( $fullName ); wfMkdirParents( $fullDir, 0755 ); - + $file = fopen( $fullName, 'w' ); if ( !$file ) { print("Can't open file $fullName for writing\n"); @@ -194,6 +220,8 @@ class DumpHTML { global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; global $wgSharedThumbnailScriptPath, $wgEnableParserCache; + + static $oldLogo = NULL; if ( is_null( $depth ) ) { $wgMakeDumpLinks = $this->depth; @@ -201,12 +229,38 @@ class DumpHTML { $wgMakeDumpLinks = $depth; } - $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks ); + if ( $this->alternateScriptPath ) { + if ( $wgMakeDumpLinks == 0 ) { + $wgScriptPath = '.'; + } else { + $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 ); + } + } else { + $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks ); + } + $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1'; + + # Logo image + # Allow for repeated setup + if ( !is_null( $oldLogo ) ) { + $wgLogo = $oldLogo; + } else { + $oldLogo = $wgLogo; + } + + if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) { + # If it's in the upload directory, rewrite it to the new upload directory + $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 ); + } elseif ( $wgLogo{0} == '/' ) { + # This is basically heuristic + # Rewrite an absolute logo path to one relative to the the script path + $wgLogo = $wgScriptPath . $wgLogo; + } + $wgStylePath = "$wgScriptPath/skins"; - $wgUploadPath = "$wgScriptPath/images"; + $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; $wgSharedUploadPath = "$wgUploadPath/shared"; - $wgLogo = "$wgStylePath/common/images/wiki.png"; $wgMaxCredits = -1; $wgHideInterlangageLinks = !$this->interwiki; $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; @@ -227,7 +281,7 @@ class DumpHTML { $wgOut = new OutputPage; $wgOut->setParserOptions( new ParserOptions ); - $wgTitle =& $title; + $wgTitle = $title; if ( is_null( $wgTitle ) ) { return false; } @@ -289,8 +343,8 @@ class DumpHTML { # Copy to static directory if ( !file_exists( $staticLoc ) ) { wfMkdirParents( dirname( $staticLoc ), 0755 ); - if ( function_exists( 'symlink' ) ) { - symlink( $staticLoc, $sourceLoc ); + if ( function_exists( 'symlink' ) && !$this->forceCopy ) { + symlink( $sourceLoc, $staticLoc ); } else { copy( $sourceLoc, $staticLoc ); } @@ -306,8 +360,8 @@ class DumpHTML { #print "Copying $sourceLoc to $staticLoc\n"; if ( !file_exists( $staticLoc ) ) { wfMkdirParents( dirname( $staticLoc ), 0755 ); - if ( function_exists( 'symlink' ) ) { - symlink( $staticLoc, $sourceLoc ); + if ( function_exists( 'symlink' ) && !$this->forceCopy ) { + symlink( $sourceLoc, $staticLoc ); } else { copy( $sourceLoc, $staticLoc ); } diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php index 4bdb424fd1..bfde002d96 100644 --- a/maintenance/dumpHTML.php +++ b/maintenance/dumpHTML.php @@ -5,7 +5,20 @@ * @subpackage Maintenance */ -/** */ +/** + * Usage: + * php dumpHTML.php [options...] + * + * -d destination directory + * -s start ID + * -e end ID + * --images only do image description pages + * --categories only do category pages + * --special only do miscellaneous stuff + * --force-copy copy commons instead of symlink, needed for Wikimedia + * --interlang allow interlanguage links + */ + $optionsWithArgs = array( 's', 'd', 'e' ); @@ -34,7 +47,13 @@ if ( !empty( $options['d'] ) ) { $dest = 'static'; } -$d = new DumpHTML( $dest, true, 3 ); +$d = new DumpHTML( array( + 'dest' => $dest, + 'forceCopy' => $options['force-copy'], + 'alternateScriptPath' => $options['interlang'], + 'interwiki' => $options['interlang'], +)); + if ( $options['special'] ) { $d->doSpecials(); @@ -43,17 +62,27 @@ if ( $options['special'] ) { } elseif ( $options['categories'] ) { $d->doCategories(); } else { + print("Creating static HTML dump in directory $dest. \n". + "Starting from page_id $start of $end.\n"); + $d->doArticles( $start, $end ); + $d->doImageDescriptions(); + $d->doCategories(); + $d->doSpecials(); + + /* if ( $end - $start > CHUNK_SIZE * 2 ) { // Split the problem into smaller chunks, run them in different PHP instances // This is a memory/resource leak workaround - print("Creating static HTML dump. Starting from page_id $start of $end.\n"); + print("Creating static HTML dump in directory $dest. \n". + "Starting from page_id $start of $end.\n"); + chdir( "maintenance" ); for ( $chunkStart = $start; $chunkStart < $end; $chunkStart += CHUNK_SIZE ) { $chunkEnd = $chunkStart + CHUNK_SIZE - 1; if ( $chunkEnd > $end ) { $chunkEnd = $end; } - passthru( "php dumpHTML.php -s $chunkStart -e $chunkEnd" ); + passthru( "php dumpHTML.php -d " . wfEscapeShellArg( $dest ) . " -s $chunkStart -e $chunkEnd" ); } chdir( ".." ); $d->doImageDescriptions(); @@ -62,6 +91,7 @@ if ( $options['special'] ) { } else { $d->doArticles( $start, $end ); } + */ } exit(); -- 2.20.1