X-Git-Url: https://git.cyclocoop.org/?a=blobdiff_plain;f=maintenance%2FdumpTextPass.php;h=72d7d97cbb89be9b10a38a0b4607be9a0d2039be;hb=95686fdb6ae34c7db75b0a519701a0c09c672ff6;hp=fc8c15eede18febaf039a2f9073e5a10eb3bbdce;hpb=ae741e3ec7f65b61c938444354674e08402f9879;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php index fc8c15eede..72d7d97cbb 100644 --- a/maintenance/dumpTextPass.php +++ b/maintenance/dumpTextPass.php @@ -1,5 +1,7 @@ * http://www.mediawiki.org/ * @@ -15,204 +17,25 @@ * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage SpecialPage + * @file + * @ingroup Maintenance */ $originalDir = getcwd(); -require_once( 'commandLine.inc' ); -require_once( 'SpecialExport.php' ); -require_once( 'maintenance/backup.inc' ); - -class TextPassDumper extends BackupDumper { - var $prefetch = null; - var $input = "php://stdin"; - var $history = MW_EXPORT_FULL; - - function dump() { - # This shouldn't happen if on console... ;) - header( 'Content-type: text/html; charset=UTF-8' ); - - # Notice messages will foul up your XML output even if they're - # relatively harmless. -// ini_set( 'display_errors', false ); - - $this->initProgress( $this->history ); - - $this->db =& $this->backupDb(); - - $this->egress = new ExportProgressFilter( $this->sink, $this ); - - $input = fopen( $this->input, "rt" ); - $result = $this->readDump( $input ); - - if( WikiError::isError( $result ) ) { - wfDie( $result->getMessage() ); - } - - $this->report( true ); - } - - function processOption( $opt, $val, $param ) { - $url = $this->processFileOpt( $val, $param ); - - switch( $opt ) { - case 'prefetch': - require_once 'maintenance/backupPrefetch.inc'; - $this->prefetch = new BaseDump( $url ); - break; - case 'stub': - $this->input = $url; - break; - case 'current': - $this->history = MW_EXPORT_CURRENT; - break; - case 'full': - $this->history = MW_EXPORT_FULL; - break; - } - } - - function processFileOpt( $val, $param ) { - switch( $val ) { - case "file": - return $param; - case "gzip": - return "compress.zlib://$param"; - case "bzip2": - return "compress.bzip2://$param"; - default: - return $val; - } - } - - function readDump( $input ) { - $this->buffer = ""; - $this->openElement = false; - $this->atStart = true; - $this->state = ""; - $this->lastName = ""; - $this->thisPage = 0; - $this->thisRev = 0; - - $parser = xml_parser_create( "UTF-8" ); - xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); - - xml_set_element_handler( $parser, array( &$this, 'startElement' ), array( &$this, 'endElement' ) ); - xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) ); - - $offset = 0; // for context extraction on error reporting - $bufferSize = 512 * 1024; - do { - $chunk = fread( $input, $bufferSize ); - if( !xml_parse( $parser, $chunk, feof( $input ) ) ) { - wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" ); - return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset ); - } - $offset += strlen( $chunk ); - } while( $chunk !== false && !feof( $input ) ); - xml_parser_free( $parser ); - - return true; - } - - function getText( $id ) { - if( isset( $this->prefetch ) ) { - $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); - if( !is_null( $text ) ) - return $text; - } - $id = intval( $id ); - $row = $this->db->selectRow( 'text', - array( 'old_text', 'old_flags' ), - array( 'old_id' => $id ), - 'TextPassDumper::getText' ); - $text = Revision::getRevisionText( $row ); - $stripped = str_replace( "\r", "", $text ); - $normalized = UtfNormal::cleanUp( $stripped ); - return $normalized; - } - - function startElement( $parser, $name, $attribs ) { - $this->clearOpenElement( null ); - $this->lastName = $name; - - if( $name == 'revision' ) { - $this->state = $name; - $this->egress->writeOpenPage( null, $this->buffer ); - $this->buffer = ""; - } elseif( $name == 'page' ) { - $this->state = $name; - if( $this->atStart ) { - $this->egress->writeOpenStream( $this->buffer ); - $this->buffer = ""; - $this->atStart = false; - } - } - - if( $name == "text" && isset( $attribs['id'] ) ) { - $text = $this->getText( $attribs['id'] ); - $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) ); - if( strlen( $text ) > 0 ) { - $this->characterData( $parser, $text ); - } - } else { - $this->openElement = array( $name, $attribs ); - } - } - - function endElement( $parser, $name ) { - if( $this->openElement ) { - $this->clearOpenElement( "" ); - } else { - $this->buffer .= ""; - } - - if( $name == 'revision' ) { - $this->egress->writeRevision( null, $this->buffer ); - $this->buffer = ""; - $this->thisRev = ""; - } elseif( $name == 'page' ) { - $this->egress->writeClosePage( $this->buffer ); - $this->buffer = ""; - $this->thisPage = ""; - } elseif( $name == 'mediawiki' ) { - $this->egress->writeCloseStream( $this->buffer ); - $this->buffer = ""; - } - } - - function characterData( $parser, $data ) { - $this->clearOpenElement( null ); - if( $this->lastName == "id" ) { - if( $this->state == "revision" ) { - $this->thisRev .= $data; - } elseif( $this->state == "page" ) { - $this->thisPage .= $data; - } - } - $this->buffer .= htmlspecialchars( $data ); - } - - function clearOpenElement( $style ) { - if( $this->openElement ) { - $this->buffer .= wfElement( $this->openElement[0], $this->openElement[1], $style ); - $this->openElement = false; - } - } -} +require_once( __DIR__ . '/commandLine.inc' ); +require_once( __DIR__ . '/backupTextPass.inc' ); $dumper = new TextPassDumper( $argv ); -if( true ) { - $dumper->dump(); +if ( !isset( $options['help'] ) ) { + $dumper->dump( true ); } else { - $dumper->progress( <<progress( <<] Options: --stub=: To load a compressed stub dump instead of stdin --prefetch=: Use a prior dump file as a text source, to save - pressure on the database. - (Requires PHP 5.0+ and the XMLReader PECL extension) - --quiet Don't dump status reports to stderr. + pressure on the database. + (Requires the XMLReader extension) + --maxtime= Write out checkpoint file after this many minutes (writing + out complete page, closing xml file properly, and opening new one + with header). This option requires the checkpointfile option. + --checkpointfile= Use this string for checkpoint filenames, + substituting first pageid written for the first %s (required) and the + last pageid written for the second %s if it exists. + --quiet Don't dump status reports to stderr. --report=n Report position and speed after every n pages processed. - (Default: 100) + (Default: 100) --server=h Force reading from MySQL server h - --current Base ETA on number of pages in database instead of all revisions -END + --current Base ETA on number of pages in database instead of all revisions + --spawn Spawn a subprocess for loading text records + --help Display this help message +ENDS ); } -?> +