X-Git-Url: https://git.cyclocoop.org/?a=blobdiff_plain;f=maintenance%2FdumpTextPass.php;h=72d7d97cbb89be9b10a38a0b4607be9a0d2039be;hb=95686fdb6ae34c7db75b0a519701a0c09c672ff6;hp=6b3b2ab0b8fc7255e850cd5029f0fdf09f5e1ff2;hpb=e8cdf678de9dfa862508dc5a0ec7d19a15521a7f;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php index 6b3b2ab0b8..72d7d97cbb 100644 --- a/maintenance/dumpTextPass.php +++ b/maintenance/dumpTextPass.php @@ -1,205 +1,41 @@ * http://www.mediawiki.org/ - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage SpecialPage + * @file + * @ingroup Maintenance */ $originalDir = getcwd(); -$optionsWithArgs = array( 'server', 'pagelist', 'start', 'end' ); - -require_once( 'commandLine.inc' ); -require_once( 'SpecialExport.php' ); -require_once( 'maintenance/backup.inc' ); - -class TextPassDumper extends BackupDumper { - var $prefetch = null; - - function dump() { - # This shouldn't happen if on console... ;) - header( 'Content-type: text/html; charset=UTF-8' ); - - # Notice messages will foul up your XML output even if they're - # relatively harmless. -// ini_set( 'display_errors', false ); - - $this->startTime = wfTime(); - - $this->db =& wfGetDB( DB_SLAVE ); - $this->maxCount = $this->db->selectField( 'page', 'MAX(page_id)', '', 'BackupDumper::dump' ); - $this->startTime = wfTime(); - - $this->egress = new ExportProgressFilter( $this->sink, $this ); - - $input = fopen( "php://stdin", "rt" ); - $result = $this->readDump( $input ); - - if( WikiError::isError( $result ) ) { - $this->progress( $result->getMessage() ); - } - - $this->report( true ); - } - - function processOption( $opt, $val, $param ) { - if( $opt == 'prefetch' ) { - require_once 'maintenance/backupPrefetch.inc'; - switch( $val ) { - case "file": - $filename = $param; - break; - case "gzip": - $filename = "compress.gzip://$param"; - break; - case "bzip2": - $filename = "compress.bzip2://$param"; - break; - default: - $filename = $val; - } - $this->prefetch = new BaseDump( $filename ); - } - } - - function readDump( $input ) { - $this->buffer = ""; - $this->openElement = false; - $this->atStart = true; - $this->state = ""; - $this->lastName = ""; - $this->thisPage = 0; - $this->thisRev = 0; - - $parser = xml_parser_create( "UTF-8" ); - xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); - - xml_set_element_handler( $parser, array( &$this, 'startElement' ), array( &$this, 'endElement' ) ); - xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) ); - - $offset = 0; // for context extraction on error reporting - $bufferSize = 512 * 1024; - do { - $chunk = fread( $input, $bufferSize ); - if( !xml_parse( $parser, $chunk, feof( $input ) ) ) { - wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" ); - return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset ); - } - $offset += strlen( $chunk ); - } while( $chunk !== false && !feof( $input ) ); - xml_parser_free( $parser ); - } - - function getText( $id ) { - if( isset( $this->prefetch ) ) { - $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); - if( !is_null( $text ) ) - return $text; - } - $id = intval( $id ); - $row = $this->db->selectRow( 'text', - array( 'old_text', 'old_flags' ), - array( 'old_id' => $id ), - 'TextPassDumper::getText' ); - $text = Revision::getRevisionText( $row ); - $stripped = str_replace( "\r", "", $text ); - $normalized = UtfNormal::cleanUp( $stripped ); - return $normalized; - } - - function startElement( $parser, $name, $attribs ) { - $this->clearOpenElement( null ); - $this->lastName = $name; - - if( $name == 'revision' ) { - $this->state = $name; - $this->egress->writeOpenPage( null, $this->buffer ); - $this->buffer = ""; - } elseif( $name == 'page' ) { - $this->state = $name; - if( $this->atStart ) { - $this->egress->writeOpenStream( $this->buffer ); - $this->buffer = ""; - $this->atStart = false; - } - } - - if( $name == "text" && isset( $attribs['id'] ) ) { - $text = $this->getText( $attribs['id'] ); - $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) ); - if( strlen( $text ) > 0 ) { - $this->characterData( $parser, $text ); - } - } else { - $this->openElement = array( $name, $attribs ); - } - } - - function endElement( $parser, $name ) { - if( $this->openElement ) { - $this->clearOpenElement( "" ); - } else { - $this->buffer .= ""; - } - - if( $name == 'revision' ) { - $this->egress->writeRevision( null, $this->buffer ); - $this->buffer = ""; - $this->thisRev = ""; - } elseif( $name == 'page' ) { - $this->egress->writeClosePage( $this->buffer ); - $this->buffer = ""; - $this->thisPage = ""; - } elseif( $name == 'mediawiki' ) { - $this->egress->writeCloseStream( $this->buffer ); - $this->buffer = ""; - } - } - - function characterData( $parser, $data ) { - $this->clearOpenElement( null ); - if( $this->lastName == "id" ) { - if( $this->state == "revision" ) { - $this->thisRev .= $data; - } elseif( $this->state == "page" ) { - $this->thisPage .= $data; - } - } - $this->buffer .= htmlspecialchars( $data ); - } - - function clearOpenElement( $style ) { - if( $this->openElement ) { - $this->buffer .= wfElement( $this->openElement[0], $this->openElement[1], $style ); - $this->openElement = false; - } - } -} +require_once( __DIR__ . '/commandLine.inc' ); +require_once( __DIR__ . '/backupTextPass.inc' ); $dumper = new TextPassDumper( $argv ); -if( true ) { - $dumper->dump(); +if ( !isset( $options['help'] ) ) { + $dumper->dump( true ); } else { - $dumper->progress( <<progress( <<] Options: - --prefetch Use a prior dump file as a text source where possible. - (Requires PHP 5.0+ and the XMLReader PECL extension) - --quiet Don't dump status reports to stderr. + --stub=: To load a compressed stub dump instead of stdin + --prefetch=: Use a prior dump file as a text source, to save + pressure on the database. + (Requires the XMLReader extension) + --maxtime= Write out checkpoint file after this many minutes (writing + out complete page, closing xml file properly, and opening new one + with header). This option requires the checkpointfile option. + --checkpointfile= Use this string for checkpoint filenames, + substituting first pageid written for the first %s (required) and the + last pageid written for the second %s if it exists. + --quiet Don't dump status reports to stderr. --report=n Report position and speed after every n pages processed. - (Default: 100) -END + (Default: 100) + --server=h Force reading from MySQL server h + --current Base ETA on number of pages in database instead of all revisions + --spawn Spawn a subprocess for loading text records + --help Display this help message +ENDS ); } -?> +