From da81194225bbca09c146f916055d6ce6e06927b0 Mon Sep 17 00:00:00 2001 From: Ariel Glenn Date: Mon, 22 Aug 2011 22:01:32 +0000 Subject: [PATCH] add functions that support close and rename of output files as they are being written, used to write out checkpoint files at regular intervals during XML dump production --- includes/Export.php | 170 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/includes/Export.php b/includes/Export.php index 5242df3915..cc14e41db7 100644 --- a/includes/Export.php +++ b/includes/Export.php @@ -354,6 +354,9 @@ class WikiExporter { * @ingroup Dump */ class XmlDumpWriter { + var $firstPageWritten = 0; + var $lastPageWritten = 0; + var $pageInProgress = 0; /** * Returns the export schema version. @@ -458,6 +461,7 @@ class XmlDumpWriter { $title = Title::makeTitle( $row->page_namespace, $row->page_title ); $out .= ' ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n"; $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; + $this->pageInProgress = $row->page_id; if ( $row->page_is_redirect ) { $out .= ' ' . Xml::element( 'redirect', array() ) . "\n"; } @@ -478,6 +482,10 @@ class XmlDumpWriter { */ function closePage() { return " \n"; + if (! $this->firstPageWritten) { + $this->firstPageWritten = $this->pageInProgress; + } + $this->lastPageWritten = $this->pageInProgress; } /** @@ -691,6 +699,18 @@ class DumpOutput { function write( $string ) { print $string; } + + function closeRenameAndReopen( $newname ) { + return; + } + + function rename( $newname ) { + return; + } + + function getFilename() { + return NULL; + } } /** @@ -699,14 +719,56 @@ class DumpOutput { */ class DumpFileOutput extends DumpOutput { var $handle; + var $filename; function __construct( $file ) { $this->handle = fopen( $file, "wt" ); + $this->filename = $file; } function write( $string ) { fputs( $this->handle, $string ); } + + /** + * Close the old file, move it to a specified name, + * and reopen new file with the old name. Use this + * for writing out a file in multiple pieces + * at specified checkpoints (e.g. every n hours). + */ + function closeRenameAndReopen( $newname ) { + if ( is_array($newname) ) { + if (count($newname) > 1) { + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + fclose( $this->handle ); + rename( $this->filename, $newname ); + $this->handle = fopen( $this->filename, "wt" ); + } + } + + function rename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) > 1) { + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + rename( $this->filename, $newname ); + } + } + + function getFilename() { + return $this->filename; + } } /** @@ -716,11 +778,51 @@ class DumpFileOutput extends DumpOutput { * @ingroup Dump */ class DumpPipeOutput extends DumpFileOutput { + var $command; + function __construct( $command, $file = null ) { if ( !is_null( $file ) ) { $command .= " > " . wfEscapeShellArg( $file ); } $this->handle = popen( $command, "w" ); + $this->command = $command; + $this->filename = $file; + } + + /** + * Close the old file, move it to a specified name, + * and reopen new file with the old name. + */ + function closeRenameAndReopen( $newname ) { + if ( is_array($newname) ) { + if (count($newname) > 1) { + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + pclose( $this->handle ); + rename( $this->filename, $newname ); + $command = $this->command; + $command .= " > " . wfEscapeShellArg( $this->filename ); + $this->handle = popen( $command, "w" ); + } + } + + function rename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) > 1) { + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + rename( $this->filename, $newname ); + } } } @@ -749,12 +851,47 @@ class DumpBZip2Output extends DumpPipeOutput { * @ingroup Dump */ class Dump7ZipOutput extends DumpPipeOutput { + var $filename; + function __construct( $file ) { $command = "7za a -bd -si " . wfEscapeShellArg( $file ); // Suppress annoying useless crap from p7zip // Unfortunately this could suppress real error messages too $command .= ' >' . wfGetNull() . ' 2>&1'; parent::__construct( $command ); + $this->filename = $file; + } + + function closeRenameAndReopen( $newname ) { + if ( is_array($newname) ) { + if (count($newname) > 1) { + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + pclose( $this->handle ); + rename( $this->filename, $newname ); + $command = "7za a -bd -si " . wfEscapeShellArg( $file ); + $command .= ' >' . wfGetNull() . ' 2>&1'; + $this->handle = popen( $command, "w" ); + } + } + + function rename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) > 1) { + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + rename( $this->filename, $newname ); + } } } @@ -803,6 +940,18 @@ class DumpFilter { $this->sink->writeRevision( $rev, $string ); } + function closeRenameAndReopen( $newname ) { + $this->sink->closeRenameAndReopen( $newname ); + } + + function rename( $newname ) { + $this->sink->rename( $newname ); + } + + function getFilename() { + return $this->sink->getFilename(); + } + /** * Override for page-based filter types. * @return bool @@ -950,6 +1099,27 @@ class DumpMultiWriter { $this->sinks[$i]->writeRevision( $rev, $string ); } } + + function closeRenameAndReopen( $newnames ) { + for( $i = 0; $i < $this->count; $i++ ) { + $this->sinks[$i]->closeRenameAndReopen( $newnames[$i] ); + } + } + + function rename( $newnames ) { + for( $i = 0; $i < $this->count; $i++ ) { + $this->sinks[$i]->rename( $newnames[$i] ); + } + } + + function getFilename() { + $filenames = array(); + for( $i = 0; $i < $this->count; $i++ ) { + $filenames[] = $this->sinks[$i]->getFilename(); + } + return $filenames; + } + } function xmlsafe( $string ) { -- 2.20.1