From: Chad Horohoe Date: Mon, 8 Jan 2018 21:10:35 +0000 (-0800) Subject: Move BaseDump into includes/export/ X-Git-Tag: 1.31.0-rc.0~960^2 X-Git-Url: http://git.cyclocoop.org/%22%20.%20generer_url_ecrire%28%22auteur_infos%22%2C%20%22id_auteur=%24id%22%29%20.%20%22?a=commitdiff_plain;h=93d44c9a4209457ca55a39b9649661f022456dd2;p=lhc%2Fweb%2Fwiklou.git Move BaseDump into includes/export/ There's no reason for this to have to live in Maintenance land. It's generally useful and lets us avoid some random require/include calls Change-Id: I60419c7f9fc52313905053bbeb3aa81666c9160c --- diff --git a/autoload.php b/autoload.php index 351136dc3e..058d4f6442 100644 --- a/autoload.php +++ b/autoload.php @@ -175,7 +175,7 @@ $wgAutoloadLocalClasses = [ 'BadRequestError' => __DIR__ . '/includes/exception/BadRequestError.php', 'BadTitleError' => __DIR__ . '/includes/exception/BadTitleError.php', 'BagOStuff' => __DIR__ . '/includes/libs/objectcache/BagOStuff.php', - 'BaseDump' => __DIR__ . '/maintenance/backupPrefetch.inc', + 'BaseDump' => __DIR__ . '/includes/export/BaseDump.php', 'BaseTemplate' => __DIR__ . '/includes/skins/BaseTemplate.php', 'BashkirUppercaseCollation' => __DIR__ . '/includes/collation/BashkirUppercaseCollation.php', 'BatchRowIterator' => __DIR__ . '/includes/utils/BatchRowIterator.php', diff --git a/includes/export/BaseDump.php b/includes/export/BaseDump.php new file mode 100644 index 0000000000..6a2d3bf626 --- /dev/null +++ b/includes/export/BaseDump.php @@ -0,0 +1,219 @@ + + * https://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +/** + * Readahead helper for making large MediaWiki data dumps; + * reads in a previous XML dump to sequentially prefetch text + * records already normalized and decompressed. + * + * This can save load on the external database servers, hopefully. + * + * Assumes that dumps will be recorded in the canonical order: + * - ascending by page_id + * - ascending by rev_id within each page + * - text contents are immutable and should not change once + * recorded, so the previous dump is a reliable source + * + * @ingroup Maintenance + */ +class BaseDump { + /** @var XMLReader */ + protected $reader = null; + protected $atEnd = false; + protected $atPageEnd = false; + protected $lastPage = 0; + protected $lastRev = 0; + protected $infiles = null; + + public function __construct( $infile ) { + $this->infiles = explode( ';', $infile ); + $this->reader = new XMLReader(); + $infile = array_shift( $this->infiles ); + if ( defined( 'LIBXML_PARSEHUGE' ) ) { + $this->reader->open( $infile, null, LIBXML_PARSEHUGE ); + } else { + $this->reader->open( $infile ); + } + } + + /** + * Attempts to fetch the text of a particular page revision + * from the dump stream. May return null if the page is + * unavailable. + * + * @param int $page ID number of page to read + * @param int $rev ID number of revision to read + * @return string|null + */ + function prefetch( $page, $rev ) { + $page = intval( $page ); + $rev = intval( $rev ); + while ( $this->lastPage < $page && !$this->atEnd ) { + $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" ); + $this->nextPage(); + } + if ( $this->lastPage > $page || $this->atEnd ) { + $this->debug( "BaseDump::prefetch already past page $page " + . "looking for rev $rev [$this->lastPage, $this->lastRev]" ); + + return null; + } + while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) { + $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, " + . "looking for $page, $rev" ); + $this->nextRev(); + } + if ( $this->lastRev == $rev && !$this->atEnd ) { + $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" ); + + return $this->nextText(); + } else { + $this->debug( "BaseDump::prefetch already past rev $rev on page $page " + . "[$this->lastPage, $this->lastRev]" ); + + return null; + } + } + + function debug( $str ) { + wfDebug( $str . "\n" ); + // global $dumper; + // $dumper->progress( $str ); + } + + /** + * @access private + */ + function nextPage() { + if ( $this->skipTo( 'page', 'mediawiki' ) ) { + if ( $this->skipTo( 'id' ) ) { + $this->lastPage = intval( $this->nodeContents() ); + $this->lastRev = 0; + $this->atPageEnd = false; + } + } else { + $this->close(); + if ( count( $this->infiles ) ) { + $infile = array_shift( $this->infiles ); + $this->reader->open( $infile ); + $this->atEnd = false; + } + } + } + + /** + * @access private + */ + function nextRev() { + if ( $this->skipTo( 'revision' ) ) { + if ( $this->skipTo( 'id' ) ) { + $this->lastRev = intval( $this->nodeContents() ); + } + } else { + $this->atPageEnd = true; + } + } + + /** + * @access private + * @return string + */ + function nextText() { + $this->skipTo( 'text' ); + + return strval( $this->nodeContents() ); + } + + /** + * @access private + * @param string $name + * @param string $parent + * @return bool|null + */ + function skipTo( $name, $parent = 'page' ) { + if ( $this->atEnd ) { + return false; + } + while ( $this->reader->read() ) { + if ( $this->reader->nodeType == XMLReader::ELEMENT + && $this->reader->name == $name + ) { + return true; + } + if ( $this->reader->nodeType == XMLReader::END_ELEMENT + && $this->reader->name == $parent + ) { + $this->debug( "BaseDump::skipTo found searching for <$name>" ); + + return false; + } + } + + return $this->close(); + } + + /** + * Shouldn't something like this be built-in to XMLReader? + * Fetches text contents of the current element, assuming + * no sub-elements or such scary things. + * + * @return string + * @access private + */ + function nodeContents() { + if ( $this->atEnd ) { + return null; + } + if ( $this->reader->isEmptyElement ) { + return ""; + } + $buffer = ""; + while ( $this->reader->read() ) { + switch ( $this->reader->nodeType ) { + case XMLReader::TEXT: + // case XMLReader::WHITESPACE: + case XMLReader::SIGNIFICANT_WHITESPACE: + $buffer .= $this->reader->value; + break; + case XMLReader::END_ELEMENT: + return $buffer; + } + } + + return $this->close(); + } + + /** + * @access private + * @return null + */ + function close() { + $this->reader->close(); + $this->atEnd = true; + + return null; + } +} diff --git a/maintenance/backupPrefetch.inc b/maintenance/backupPrefetch.inc deleted file mode 100644 index 6a2d3bf626..0000000000 --- a/maintenance/backupPrefetch.inc +++ /dev/null @@ -1,219 +0,0 @@ - - * https://www.mediawiki.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Maintenance - */ - -/** - * Readahead helper for making large MediaWiki data dumps; - * reads in a previous XML dump to sequentially prefetch text - * records already normalized and decompressed. - * - * This can save load on the external database servers, hopefully. - * - * Assumes that dumps will be recorded in the canonical order: - * - ascending by page_id - * - ascending by rev_id within each page - * - text contents are immutable and should not change once - * recorded, so the previous dump is a reliable source - * - * @ingroup Maintenance - */ -class BaseDump { - /** @var XMLReader */ - protected $reader = null; - protected $atEnd = false; - protected $atPageEnd = false; - protected $lastPage = 0; - protected $lastRev = 0; - protected $infiles = null; - - public function __construct( $infile ) { - $this->infiles = explode( ';', $infile ); - $this->reader = new XMLReader(); - $infile = array_shift( $this->infiles ); - if ( defined( 'LIBXML_PARSEHUGE' ) ) { - $this->reader->open( $infile, null, LIBXML_PARSEHUGE ); - } else { - $this->reader->open( $infile ); - } - } - - /** - * Attempts to fetch the text of a particular page revision - * from the dump stream. May return null if the page is - * unavailable. - * - * @param int $page ID number of page to read - * @param int $rev ID number of revision to read - * @return string|null - */ - function prefetch( $page, $rev ) { - $page = intval( $page ); - $rev = intval( $rev ); - while ( $this->lastPage < $page && !$this->atEnd ) { - $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" ); - $this->nextPage(); - } - if ( $this->lastPage > $page || $this->atEnd ) { - $this->debug( "BaseDump::prefetch already past page $page " - . "looking for rev $rev [$this->lastPage, $this->lastRev]" ); - - return null; - } - while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) { - $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, " - . "looking for $page, $rev" ); - $this->nextRev(); - } - if ( $this->lastRev == $rev && !$this->atEnd ) { - $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" ); - - return $this->nextText(); - } else { - $this->debug( "BaseDump::prefetch already past rev $rev on page $page " - . "[$this->lastPage, $this->lastRev]" ); - - return null; - } - } - - function debug( $str ) { - wfDebug( $str . "\n" ); - // global $dumper; - // $dumper->progress( $str ); - } - - /** - * @access private - */ - function nextPage() { - if ( $this->skipTo( 'page', 'mediawiki' ) ) { - if ( $this->skipTo( 'id' ) ) { - $this->lastPage = intval( $this->nodeContents() ); - $this->lastRev = 0; - $this->atPageEnd = false; - } - } else { - $this->close(); - if ( count( $this->infiles ) ) { - $infile = array_shift( $this->infiles ); - $this->reader->open( $infile ); - $this->atEnd = false; - } - } - } - - /** - * @access private - */ - function nextRev() { - if ( $this->skipTo( 'revision' ) ) { - if ( $this->skipTo( 'id' ) ) { - $this->lastRev = intval( $this->nodeContents() ); - } - } else { - $this->atPageEnd = true; - } - } - - /** - * @access private - * @return string - */ - function nextText() { - $this->skipTo( 'text' ); - - return strval( $this->nodeContents() ); - } - - /** - * @access private - * @param string $name - * @param string $parent - * @return bool|null - */ - function skipTo( $name, $parent = 'page' ) { - if ( $this->atEnd ) { - return false; - } - while ( $this->reader->read() ) { - if ( $this->reader->nodeType == XMLReader::ELEMENT - && $this->reader->name == $name - ) { - return true; - } - if ( $this->reader->nodeType == XMLReader::END_ELEMENT - && $this->reader->name == $parent - ) { - $this->debug( "BaseDump::skipTo found searching for <$name>" ); - - return false; - } - } - - return $this->close(); - } - - /** - * Shouldn't something like this be built-in to XMLReader? - * Fetches text contents of the current element, assuming - * no sub-elements or such scary things. - * - * @return string - * @access private - */ - function nodeContents() { - if ( $this->atEnd ) { - return null; - } - if ( $this->reader->isEmptyElement ) { - return ""; - } - $buffer = ""; - while ( $this->reader->read() ) { - switch ( $this->reader->nodeType ) { - case XMLReader::TEXT: - // case XMLReader::WHITESPACE: - case XMLReader::SIGNIFICANT_WHITESPACE: - $buffer .= $this->reader->value; - break; - case XMLReader::END_ELEMENT: - return $buffer; - } - } - - return $this->close(); - } - - /** - * @access private - * @return null - */ - function close() { - $this->reader->close(); - $this->atEnd = true; - - return null; - } -} diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php index 0604f4886b..e18d0b8723 100644 --- a/maintenance/dumpTextPass.php +++ b/maintenance/dumpTextPass.php @@ -143,8 +143,6 @@ TEXT } function processOptions() { - global $IP; - parent::processOptions(); if ( $this->hasOption( 'buffersize' ) ) { @@ -152,7 +150,6 @@ TEXT } if ( $this->hasOption( 'prefetch' ) ) { - require_once "$IP/maintenance/backupPrefetch.inc"; $url = $this->processFileOpt( $this->getOption( 'prefetch' ) ); $this->prefetch = new BaseDump( $url ); } diff --git a/tests/phan/config.php b/tests/phan/config.php index 0df835749b..52a565c5fa 100644 --- a/tests/phan/config.php +++ b/tests/phan/config.php @@ -41,7 +41,6 @@ return [ [ 'maintenance/7zip.inc', 'maintenance/backup.inc', - 'maintenance/backupPrefetch.inc', 'maintenance/cleanupTable.inc', 'maintenance/CodeCleanerGlobalsPass.inc', 'maintenance/commandLine.inc', diff --git a/tests/phpunit/maintenance/backupPrefetchTest.php b/tests/phpunit/maintenance/backupPrefetchTest.php index 010bb171d1..0e2e1453ea 100644 --- a/tests/phpunit/maintenance/backupPrefetchTest.php +++ b/tests/phpunit/maintenance/backupPrefetchTest.php @@ -1,7 +1,5 @@