Test case files and fixes for text-prefetch two-and-a-half-pass dump.
authorBrion Vibber <brion@users.mediawiki.org>
Wed, 19 Oct 2005 00:05:22 +0000 (00:05 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Wed, 19 Oct 2005 00:05:22 +0000 (00:05 +0000)
Will want to test a little more before making another dump. :P

maintenance/backupPrefetch.inc
maintenance/dumpTextPass.php
tests/test-prefetch-current.xml [new file with mode: 0644]
tests/test-prefetch-previous.xml [new file with mode: 0644]
tests/test-prefetch-stub.xml [new file with mode: 0644]

index 759220d..f40bc89 100644 (file)
@@ -18,6 +18,7 @@
 class BaseDump {
        var $reader = null;
        var $atEnd = false;
+       var $atPageEnd = false;
        var $lastPage = 0;
        var $lastRev = 0;
        
@@ -36,27 +37,31 @@ class BaseDump {
         * @return string or null
         */
        function prefetch( $page, $rev ) {
+               $page = intval( $page );
+               $rev = intval( $rev );
                while( $this->lastPage < $page && !$this->atEnd ) {
+                       $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
                        $this->nextPage();
                }
                if( $this->lastPage > $page || $this->atEnd ) {
-                       $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev\n" );
+                       $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev  [$this->lastPage, $this->lastRev]" );
                        return null;
                }
-               while( $this->lastRev < $rev && !$this->atEnd ) {
+               while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
+                       $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
                        $this->nextRev();
                }
-               if( $this->lastRev == $rev ) {
-                       $this->debug( "BaseDump::prefetch hit on $page, $rev\n" );
+               if( $this->lastRev == $rev && !$this->atEnd ) {
+                       $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
                        return $this->nextText();
                } else {
-                       $this->debug( "BaseDump::prefetch already past rev $rev on page $page\n" );
+                       $this->debug( "BaseDump::prefetch already past rev $rev on page $page  [$this->lastPage, $this->lastRev]" );
                        return null;
                }
        }
        
        function debug( $str ) {
-               wfDebug( $str );
+               wfDebug( $str . "\n" );
                //global $dumper;
                //$dumper->progress( $str );
        }
@@ -65,19 +70,28 @@ class BaseDump {
         * @access private
         */
        function nextPage() {
-               $this->skipTo( 'page' );
-               $this->skipTo( 'id' );
-               $this->lastPage = intval( $this->nodeContents() );
-               $this->lastRev = 0;
+               if( $this->skipTo( 'page', 'mediawiki' ) ) {
+                       if( $this->skipTo( 'id' ) ) {
+                               $this->lastPage = intval( $this->nodeContents() );
+                               $this->lastRev = 0;
+                               $this->atPageEnd = false;
+                       }
+               } else {
+                       $this->atEnd = true;
+               }
        }
        
        /**
         * @access private
         */
        function nextRev() {
-               $this->skipTo( 'revision' );
-               $this->skipTo( 'id' );
-               $this->lastRev = intval( $this->nodeContents() );
+               if( $this->skipTo( 'revision' ) ) {
+                       if( $this->skipTo( 'id' ) ) {
+                               $this->lastRev = intval( $this->nodeContents() );
+                       }
+               } else {
+                       $this->atPageEnd = true;
+               }
        }
        
        /**
@@ -91,7 +105,7 @@ class BaseDump {
        /**
         * @access private
         */
-       function skipTo( $name ) {
+       function skipTo( $name, $parent='page' ) {
                if( $this->atEnd ) {
                        return false;
                }
@@ -100,6 +114,11 @@ class BaseDump {
                                $this->reader->name == $name ) {
                                return true;
                        }
+                       if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
+                               $this->reader->name == $parent ) {
+                               $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
+                               return false;
+                       }
                }
                return $this->close();
        }
@@ -113,7 +132,7 @@ class BaseDump {
         */
        function nodeContents() {
                if( $this->atEnd ) {
-                       return false;
+                       return null;
                }
                if( $this->reader->isEmptyElement ) {
                        return "";
@@ -139,7 +158,7 @@ class BaseDump {
        function close() {
                $this->reader->close();
                $this->atEnd = true;
-               return false;
+               return null;
        }
 }
 
index ae55ec8..235196b 100644 (file)
@@ -159,9 +159,11 @@ class TextPassDumper extends BackupDumper {
                if( $name == 'revision' ) {
                        $this->egress->writeRevision( null, $this->buffer );
                        $this->buffer = "";
+                       $this->thisRev = "";
                } elseif( $name == 'page' ) {
                        $this->egress->writeClosePage( $this->buffer );
                        $this->buffer = "";
+                       $this->thisPage = "";
                } elseif( $name == 'mediawiki' ) {
                        $this->egress->writeCloseStream( $this->buffer );
                        $this->buffer = "";
@@ -172,9 +174,9 @@ class TextPassDumper extends BackupDumper {
                $this->clearOpenElement( null );
                if( $this->lastName == "id" ) {
                        if( $this->state == "revision" ) {
-                               $this->thisRev = intval( $data );
+                               $this->thisRev .= $data;
                        } elseif( $this->state == "page" ) {
-                               $this->thisPage = intval( $data );
+                               $this->thisPage .= $data;
                        }
                }
                $this->buffer .= htmlspecialchars( $data );
diff --git a/tests/test-prefetch-current.xml b/tests/test-prefetch-current.xml
new file mode 100644 (file)
index 0000000..a4c8bda
--- /dev/null
@@ -0,0 +1,75 @@
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+<siteinfo>
+  <sitename>DemoWiki</sitename>
+  <base>http://example.com/wiki/Main_Page</base>
+  <generator>MediaWiki 1.5.0</generator>
+  <case>first-letter</case>
+  <namespaces>
+    <namespace key="-2">Media</namespace>
+    <namespace key="-1">Special</namespace>
+    <namespace key="0"></namespace>
+    <namespace key="1">Talk</namespace>
+    <namespace key="2">User</namespace>
+    <namespace key="3">User talk</namespace>
+    <namespace key="4">DemoWiki</namespace>
+    <namespace key="5">DemoWIki talk</namespace>
+    <namespace key="6">Image</namespace>
+    <namespace key="7">Image talk</namespace>
+    <namespace key="8">MediaWiki</namespace>
+    <namespace key="9">MediaWiki talk</namespace>
+    <namespace key="10">Template</namespace>
+    <namespace key="11">Template talk</namespace>
+    <namespace key="12">Help</namespace>
+    <namespace key="13">Help talk</namespace>
+    <namespace key="14">Category</namespace>
+    <namespace key="15">Category talk</namespace>
+  </namespaces>
+</siteinfo>
+<page>
+  <title>First page</title>
+  <id>1</id>
+  <revision>
+    <id>1</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 1</comment>
+    <text>page 1, rev 1</text>
+  </revision>
+  <revision>
+    <id>2</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 2</comment>
+    <text>page 1, rev 2</text>
+  </revision>
+  <revision>
+    <id>4</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 4</comment>
+    <text>page 1, rev 4</text>
+  </revision>
+</page>
+<page>
+  <title>Second page</title>
+  <id>2</id>
+  <revision>
+    <id>3</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 2, rev 3</comment>
+    <text>page 2, rev 3</text>
+  </revision>
+</page>
+<page>
+  <title>Third page</title>
+  <id>3</id>
+  <revision>
+    <id>5</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 3, rev 5</comment>
+    <text>page 3, rev 5</text>
+  </revision>
+</page>
+</mediawiki>
diff --git a/tests/test-prefetch-previous.xml b/tests/test-prefetch-previous.xml
new file mode 100644 (file)
index 0000000..95eb82d
--- /dev/null
@@ -0,0 +1,57 @@
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+<siteinfo>
+  <sitename>DemoWiki</sitename>
+  <base>http://example.com/wiki/Main_Page</base>
+  <generator>MediaWiki 1.5.0</generator>
+  <case>first-letter</case>
+  <namespaces>
+    <namespace key="-2">Media</namespace>
+    <namespace key="-1">Special</namespace>
+    <namespace key="0"></namespace>
+    <namespace key="1">Talk</namespace>
+    <namespace key="2">User</namespace>
+    <namespace key="3">User talk</namespace>
+    <namespace key="4">DemoWiki</namespace>
+    <namespace key="5">DemoWIki talk</namespace>
+    <namespace key="6">Image</namespace>
+    <namespace key="7">Image talk</namespace>
+    <namespace key="8">MediaWiki</namespace>
+    <namespace key="9">MediaWiki talk</namespace>
+    <namespace key="10">Template</namespace>
+    <namespace key="11">Template talk</namespace>
+    <namespace key="12">Help</namespace>
+    <namespace key="13">Help talk</namespace>
+    <namespace key="14">Category</namespace>
+    <namespace key="15">Category talk</namespace>
+  </namespaces>
+</siteinfo>
+<page>
+  <title>First page</title>
+  <id>1</id>
+  <revision>
+    <id>1</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 1</comment>
+    <text>page 1, rev 1</text>
+  </revision>
+  <revision>
+    <id>2</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 2</comment>
+    <text>page 1, rev 2</text>
+  </revision>
+</page>
+<page>
+  <title>Second page</title>
+  <id>2</id>
+  <revision>
+    <id>3</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 2, rev 3</comment>
+    <text>page 2, rev 3</text>
+  </revision>
+</page>
+</mediawiki>
diff --git a/tests/test-prefetch-stub.xml b/tests/test-prefetch-stub.xml
new file mode 100644 (file)
index 0000000..59d43d2
--- /dev/null
@@ -0,0 +1,75 @@
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+<siteinfo>
+  <sitename>DemoWiki</sitename>
+  <base>http://example.com/wiki/Main_Page</base>
+  <generator>MediaWiki 1.5.0</generator>
+  <case>first-letter</case>
+  <namespaces>
+    <namespace key="-2">Media</namespace>
+    <namespace key="-1">Special</namespace>
+    <namespace key="0"></namespace>
+    <namespace key="1">Talk</namespace>
+    <namespace key="2">User</namespace>
+    <namespace key="3">User talk</namespace>
+    <namespace key="4">DemoWiki</namespace>
+    <namespace key="5">DemoWIki talk</namespace>
+    <namespace key="6">Image</namespace>
+    <namespace key="7">Image talk</namespace>
+    <namespace key="8">MediaWiki</namespace>
+    <namespace key="9">MediaWiki talk</namespace>
+    <namespace key="10">Template</namespace>
+    <namespace key="11">Template talk</namespace>
+    <namespace key="12">Help</namespace>
+    <namespace key="13">Help talk</namespace>
+    <namespace key="14">Category</namespace>
+    <namespace key="15">Category talk</namespace>
+  </namespaces>
+</siteinfo>
+<page>
+  <title>First page</title>
+  <id>1</id>
+  <revision>
+    <id>1</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 1</comment>
+    <text id="1" />
+  </revision>
+  <revision>
+    <id>2</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 2</comment>
+    <text id="2" />
+  </revision>
+  <revision>
+    <id>4</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 1, rev 4</comment>
+    <text id="4" />
+  </revision>
+</page>
+<page>
+  <title>Second page</title>
+  <id>2</id>
+  <revision>
+    <id>3</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 2, rev 3</comment>
+    <text id="3" />
+  </revision>
+</page>
+<page>
+  <title>Third page</title>
+  <id>3</id>
+  <revision>
+    <id>5</id>
+    <timestamp>2001-01-15T12:00:00Z</timestamp>
+    <contributor><ip>10.0.0.1</ip></contributor>
+    <comment>page 3, rev 5</comment>
+    <text id="5" />
+  </revision>
+</page>
+</mediawiki>