class BaseDump {
var $reader = null;
var $atEnd = false;
+ var $atPageEnd = false;
var $lastPage = 0;
var $lastRev = 0;
* @return string or null
*/
function prefetch( $page, $rev ) {
+ $page = intval( $page );
+ $rev = intval( $rev );
while( $this->lastPage < $page && !$this->atEnd ) {
+ $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
$this->nextPage();
}
if( $this->lastPage > $page || $this->atEnd ) {
- $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev\n" );
+ $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" );
return null;
}
- while( $this->lastRev < $rev && !$this->atEnd ) {
+ while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
+ $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
$this->nextRev();
}
- if( $this->lastRev == $rev ) {
- $this->debug( "BaseDump::prefetch hit on $page, $rev\n" );
+ if( $this->lastRev == $rev && !$this->atEnd ) {
+ $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
return $this->nextText();
} else {
- $this->debug( "BaseDump::prefetch already past rev $rev on page $page\n" );
+ $this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" );
return null;
}
}
function debug( $str ) {
- wfDebug( $str );
+ wfDebug( $str . "\n" );
//global $dumper;
//$dumper->progress( $str );
}
* @access private
*/
function nextPage() {
- $this->skipTo( 'page' );
- $this->skipTo( 'id' );
- $this->lastPage = intval( $this->nodeContents() );
- $this->lastRev = 0;
+ if( $this->skipTo( 'page', 'mediawiki' ) ) {
+ if( $this->skipTo( 'id' ) ) {
+ $this->lastPage = intval( $this->nodeContents() );
+ $this->lastRev = 0;
+ $this->atPageEnd = false;
+ }
+ } else {
+ $this->atEnd = true;
+ }
}
/**
* @access private
*/
function nextRev() {
- $this->skipTo( 'revision' );
- $this->skipTo( 'id' );
- $this->lastRev = intval( $this->nodeContents() );
+ if( $this->skipTo( 'revision' ) ) {
+ if( $this->skipTo( 'id' ) ) {
+ $this->lastRev = intval( $this->nodeContents() );
+ }
+ } else {
+ $this->atPageEnd = true;
+ }
}
/**
/**
* @access private
*/
- function skipTo( $name ) {
+ function skipTo( $name, $parent='page' ) {
if( $this->atEnd ) {
return false;
}
$this->reader->name == $name ) {
return true;
}
+ if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
+ $this->reader->name == $parent ) {
+ $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
+ return false;
+ }
}
return $this->close();
}
*/
function nodeContents() {
if( $this->atEnd ) {
- return false;
+ return null;
}
if( $this->reader->isEmptyElement ) {
return "";
function close() {
$this->reader->close();
$this->atEnd = true;
- return false;
+ return null;
}
}
if( $name == 'revision' ) {
$this->egress->writeRevision( null, $this->buffer );
$this->buffer = "";
+ $this->thisRev = "";
} elseif( $name == 'page' ) {
$this->egress->writeClosePage( $this->buffer );
$this->buffer = "";
+ $this->thisPage = "";
} elseif( $name == 'mediawiki' ) {
$this->egress->writeCloseStream( $this->buffer );
$this->buffer = "";
$this->clearOpenElement( null );
if( $this->lastName == "id" ) {
if( $this->state == "revision" ) {
- $this->thisRev = intval( $data );
+ $this->thisRev .= $data;
} elseif( $this->state == "page" ) {
- $this->thisPage = intval( $data );
+ $this->thisPage .= $data;
}
}
$this->buffer .= htmlspecialchars( $data );
--- /dev/null
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+<siteinfo>
+ <sitename>DemoWiki</sitename>
+ <base>http://example.com/wiki/Main_Page</base>
+ <generator>MediaWiki 1.5.0</generator>
+ <case>first-letter</case>
+ <namespaces>
+ <namespace key="-2">Media</namespace>
+ <namespace key="-1">Special</namespace>
+ <namespace key="0"></namespace>
+ <namespace key="1">Talk</namespace>
+ <namespace key="2">User</namespace>
+ <namespace key="3">User talk</namespace>
+ <namespace key="4">DemoWiki</namespace>
+ <namespace key="5">DemoWIki talk</namespace>
+ <namespace key="6">Image</namespace>
+ <namespace key="7">Image talk</namespace>
+ <namespace key="8">MediaWiki</namespace>
+ <namespace key="9">MediaWiki talk</namespace>
+ <namespace key="10">Template</namespace>
+ <namespace key="11">Template talk</namespace>
+ <namespace key="12">Help</namespace>
+ <namespace key="13">Help talk</namespace>
+ <namespace key="14">Category</namespace>
+ <namespace key="15">Category talk</namespace>
+ </namespaces>
+</siteinfo>
+<page>
+ <title>First page</title>
+ <id>1</id>
+ <revision>
+ <id>1</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 1</comment>
+ <text>page 1, rev 1</text>
+ </revision>
+ <revision>
+ <id>2</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 2</comment>
+ <text>page 1, rev 2</text>
+ </revision>
+ <revision>
+ <id>4</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 4</comment>
+ <text>page 1, rev 4</text>
+ </revision>
+</page>
+<page>
+ <title>Second page</title>
+ <id>2</id>
+ <revision>
+ <id>3</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 2, rev 3</comment>
+ <text>page 2, rev 3</text>
+ </revision>
+</page>
+<page>
+ <title>Third page</title>
+ <id>3</id>
+ <revision>
+ <id>5</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 3, rev 5</comment>
+ <text>page 3, rev 5</text>
+ </revision>
+</page>
+</mediawiki>
--- /dev/null
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+<siteinfo>
+ <sitename>DemoWiki</sitename>
+ <base>http://example.com/wiki/Main_Page</base>
+ <generator>MediaWiki 1.5.0</generator>
+ <case>first-letter</case>
+ <namespaces>
+ <namespace key="-2">Media</namespace>
+ <namespace key="-1">Special</namespace>
+ <namespace key="0"></namespace>
+ <namespace key="1">Talk</namespace>
+ <namespace key="2">User</namespace>
+ <namespace key="3">User talk</namespace>
+ <namespace key="4">DemoWiki</namespace>
+ <namespace key="5">DemoWIki talk</namespace>
+ <namespace key="6">Image</namespace>
+ <namespace key="7">Image talk</namespace>
+ <namespace key="8">MediaWiki</namespace>
+ <namespace key="9">MediaWiki talk</namespace>
+ <namespace key="10">Template</namespace>
+ <namespace key="11">Template talk</namespace>
+ <namespace key="12">Help</namespace>
+ <namespace key="13">Help talk</namespace>
+ <namespace key="14">Category</namespace>
+ <namespace key="15">Category talk</namespace>
+ </namespaces>
+</siteinfo>
+<page>
+ <title>First page</title>
+ <id>1</id>
+ <revision>
+ <id>1</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 1</comment>
+ <text>page 1, rev 1</text>
+ </revision>
+ <revision>
+ <id>2</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 2</comment>
+ <text>page 1, rev 2</text>
+ </revision>
+</page>
+<page>
+ <title>Second page</title>
+ <id>2</id>
+ <revision>
+ <id>3</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 2, rev 3</comment>
+ <text>page 2, rev 3</text>
+ </revision>
+</page>
+</mediawiki>
--- /dev/null
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+<siteinfo>
+ <sitename>DemoWiki</sitename>
+ <base>http://example.com/wiki/Main_Page</base>
+ <generator>MediaWiki 1.5.0</generator>
+ <case>first-letter</case>
+ <namespaces>
+ <namespace key="-2">Media</namespace>
+ <namespace key="-1">Special</namespace>
+ <namespace key="0"></namespace>
+ <namespace key="1">Talk</namespace>
+ <namespace key="2">User</namespace>
+ <namespace key="3">User talk</namespace>
+ <namespace key="4">DemoWiki</namespace>
+ <namespace key="5">DemoWIki talk</namespace>
+ <namespace key="6">Image</namespace>
+ <namespace key="7">Image talk</namespace>
+ <namespace key="8">MediaWiki</namespace>
+ <namespace key="9">MediaWiki talk</namespace>
+ <namespace key="10">Template</namespace>
+ <namespace key="11">Template talk</namespace>
+ <namespace key="12">Help</namespace>
+ <namespace key="13">Help talk</namespace>
+ <namespace key="14">Category</namespace>
+ <namespace key="15">Category talk</namespace>
+ </namespaces>
+</siteinfo>
+<page>
+ <title>First page</title>
+ <id>1</id>
+ <revision>
+ <id>1</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 1</comment>
+ <text id="1" />
+ </revision>
+ <revision>
+ <id>2</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 2</comment>
+ <text id="2" />
+ </revision>
+ <revision>
+ <id>4</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 1, rev 4</comment>
+ <text id="4" />
+ </revision>
+</page>
+<page>
+ <title>Second page</title>
+ <id>2</id>
+ <revision>
+ <id>3</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 2, rev 3</comment>
+ <text id="3" />
+ </revision>
+</page>
+<page>
+ <title>Third page</title>
+ <id>3</id>
+ <revision>
+ <id>5</id>
+ <timestamp>2001-01-15T12:00:00Z</timestamp>
+ <contributor><ip>10.0.0.1</ip></contributor>
+ <comment>page 3, rev 5</comment>
+ <text id="5" />
+ </revision>
+</page>
+</mediawiki>