reader = new XMLReader(); $this->reader->open( $infile ); } /** * Attempts to fetch the text of a particular page revision * from the dump stream. May return null if the page is * unavailable. * * @param int $page ID number of page to read * @param int $rev ID number of revision to read * @return string or null */ function prefetch( $page, $rev ) { while( $this->lastPage < $page && !$this->atEnd ) { $this->nextPage(); } if( $this->lastPage > $page || $this->atEnd ) { $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev\n" ); return null; } while( $this->lastRev < $rev && !$this->atEnd ) { $this->nextRev(); } if( $this->lastRev == $rev ) { $this->debug( "BaseDump::prefetch hit on $page, $rev\n" ); return $this->nextText(); } else { $this->debug( "BaseDump::prefetch already past rev $rev on page $page\n" ); return null; } } function debug( $str ) { wfDebug( $str ); //global $dumper; //$dumper->progress( $str ); } /** * @access private */ function nextPage() { $this->skipTo( 'page' ); $this->skipTo( 'id' ); $this->lastPage = intval( $this->nodeContents() ); $this->lastRev = 0; } /** * @access private */ function nextRev() { $this->skipTo( 'revision' ); $this->skipTo( 'id' ); $this->lastRev = intval( $this->nodeContents() ); } /** * @access private */ function nextText() { $this->skipTo( 'text' ); return strval( $this->nodeContents() ); } /** * @access private */ function skipTo( $name ) { while( $this->reader->read() ) { if( $this->reader->nodeType == XMLREADER_ELEMENT && $this->reader->name == $name ) { return true; } } return $this->close(); } /** * Shouldn't something like this be built-in to XMLReader? * Fetches text contents of the current element, assuming * no sub-elements or such scary things. * @return string * @access private */ function nodeContents() { if( $this->reader->isEmptyElement ) { return ""; } $buffer = ""; while( $this->reader->read() ) { switch( $this->reader->nodeType ) { case XMLREADER_TEXT: // case XMLREADER_WHITESPACE: case XMLREADER_SIGNIFICANT_WHITESPACE: $buffer .= $this->reader->value; break; case XMLREADER_END_ELEMENT: return $buffer; } } return $this->close(); } /** * @access private */ function close() { $this->reader->close(); $this->reader = null; $this->atEnd = true; return false; } } ?>