3 require_once dirname( __FILE__
) . "/../../../maintenance/backupTextPass.inc";
6 * Tests for page dumps of BackupDumper
11 class TextPassDumperTest
extends DumpTestCase
{
13 // We'll add several pages, revision and texts. The following variables hold the
15 private $pageId1, $pageId2, $pageId3, $pageId4;
16 private static $numOfPages = 4;
17 private $revId1_1, $textId1_1;
18 private $revId2_1, $textId2_1, $revId2_2, $textId2_2;
19 private $revId2_3, $textId2_3, $revId2_4, $textId2_4;
20 private $revId3_1, $textId3_1, $revId3_2, $textId3_2;
21 private $revId4_1, $textId4_1;
22 private static $numOfRevs = 8;
24 function addDBData() {
25 $this->tablesUsed
[] = 'page';
26 $this->tablesUsed
[] = 'revision';
27 $this->tablesUsed
[] = 'text';
31 $title = Title
::newFromText( 'BackupDumperTestP1' );
32 $page = WikiPage
::factory( $title );
33 list( $this->revId1_1
, $this->textId1_1
) = $this->addRevision( $page,
34 "BackupDumperTestP1Text1", "BackupDumperTestP1Summary1" );
35 $this->pageId1
= $page->getId();
37 // Page with more than one revision
38 $title = Title
::newFromText( 'BackupDumperTestP2' );
39 $page = WikiPage
::factory( $title );
40 list( $this->revId2_1
, $this->textId2_1
) = $this->addRevision( $page,
41 "BackupDumperTestP2Text1", "BackupDumperTestP2Summary1" );
42 list( $this->revId2_2
, $this->textId2_2
) = $this->addRevision( $page,
43 "BackupDumperTestP2Text2", "BackupDumperTestP2Summary2" );
44 list( $this->revId2_3
, $this->textId2_3
) = $this->addRevision( $page,
45 "BackupDumperTestP2Text3", "BackupDumperTestP2Summary3" );
46 list( $this->revId2_4
, $this->textId2_4
) = $this->addRevision( $page,
47 "BackupDumperTestP2Text4 some additional Text ",
48 "BackupDumperTestP2Summary4 extra " );
49 $this->pageId2
= $page->getId();
52 $title = Title
::newFromText( 'BackupDumperTestP3' );
53 $page = WikiPage
::factory( $title );
54 list( $this->revId3_1
, $this->textId3_1
) = $this->addRevision( $page,
55 "BackupDumperTestP3Text1", "BackupDumperTestP2Summary1" );
56 list( $this->revId3_2
, $this->textId3_2
) = $this->addRevision( $page,
57 "BackupDumperTestP3Text2", "BackupDumperTestP2Summary2" );
58 $this->pageId3
= $page->getId();
59 $page->doDeleteArticle( "Testing ;)" );
61 // Page from non-default namespace
62 $title = Title
::newFromText( 'BackupDumperTestP1', NS_TALK
);
63 $page = WikiPage
::factory( $title );
64 list( $this->revId4_1
, $this->textId4_1
) = $this->addRevision( $page,
65 "Talk about BackupDumperTestP1 Text1",
66 "Talk BackupDumperTestP1 Summary1" );
67 $this->pageId4
= $page->getId();
68 } catch ( Exception
$e ) {
69 // We'd love to pass $e directly. However, ... see
70 // documentation of exceptionFromAddDBData in
72 $this->exceptionFromAddDBData
= $e;
77 public function setUp() {
80 // Since we will restrict dumping by page ranges (to allow
81 // working tests, even if the db gets prepopulated by a base
82 // class), we have to assert, that the page id are consecutively
85 array( $this->pageId2
, $this->pageId3
, $this->pageId4
),
86 array( $this->pageId1 +
1, $this->pageId2 +
1, $this->pageId3 +
1 ),
87 "Page ids increasing without holes" );
91 function testPlain() {
92 // Setting up the dump
93 $nameStub = $this->setUpStub();
94 $nameFull = $this->getNewTempFile();
95 $dumper = new TextPassDumper( array ( "--stub=file:" . $nameStub,
96 "--output=file:" . $nameFull ) );
97 $dumper->reporting
= false;
98 $dumper->setDb( $this->db
);
100 // Performing the dump
101 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
103 // Checking for correctness of the dumped data
104 $this->assertDumpStart( $nameFull );
107 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
108 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
109 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
110 "BackupDumperTestP1Text1" );
111 $this->assertPageEnd();
114 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
115 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
116 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
117 "BackupDumperTestP2Text1", $this->revId2_2
);
118 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
119 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
120 "BackupDumperTestP2Text2", $this->revId2_3
);
121 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
122 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
123 "BackupDumperTestP2Text3", $this->revId2_4
);
124 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
125 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
126 "BackupDumperTestP2Text4 some additional Text" );
127 $this->assertPageEnd();
130 // -> Page is marked deleted. Hence not visible
133 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
134 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
135 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
136 "Talk about BackupDumperTestP1 Text1" );
137 $this->assertPageEnd();
139 $this->assertDumpEnd();
142 function testPrefetchPlain() {
143 // The mapping between ids and text, for the hits of the prefetch mock
144 $prefetchMap = array(
145 array( $this->pageId1
, $this->revId1_1
, "Prefetch_________1Text1" ),
146 array( $this->pageId2
, $this->revId2_3
, "Prefetch_________2Text3" )
150 $prefetchMock = $this->getMock( 'BaseDump', array( 'prefetch' ), array(), '', FALSE );
151 $prefetchMock->expects( $this->exactly( 6 ) )
152 ->method( 'prefetch' )
153 ->will( $this->returnValueMap( $prefetchMap ) );
155 // Setting up of the dump
156 $nameStub = $this->setUpStub();
157 $nameFull = $this->getNewTempFile();
158 $dumper = new TextPassDumper( array ( "--stub=file:"
159 . $nameStub, "--output=file:" . $nameFull ) );
160 $dumper->prefetch
= $prefetchMock;
161 $dumper->reporting
= false;
162 $dumper->setDb( $this->db
);
164 // Performing the dump
165 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
167 // Checking for correctness of the dumped data
168 $this->assertDumpStart( $nameFull );
171 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
172 // Prefetch kicks in. This is still the SHA-1 of the original text,
173 // But the actual text (with different SHA-1) comes from prefetch.
174 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
175 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
176 "Prefetch_________1Text1" );
177 $this->assertPageEnd();
180 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
181 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
182 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
183 "BackupDumperTestP2Text1", $this->revId2_2
);
184 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
185 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
186 "BackupDumperTestP2Text2", $this->revId2_3
);
187 // Prefetch kicks in. This is still the SHA-1 of the original text,
188 // But the actual text (with different SHA-1) comes from prefetch.
189 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
190 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
191 "Prefetch_________2Text3", $this->revId2_4
);
192 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
193 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
194 "BackupDumperTestP2Text4 some additional Text" );
195 $this->assertPageEnd();
198 // -> Page is marked deleted. Hence not visible
201 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
202 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
203 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
204 "Talk about BackupDumperTestP1 Text1" );
205 $this->assertPageEnd();
207 $this->assertDumpEnd();
212 * Ensures that checkpoint dumps are used and written, by successively increasing the
213 * stub size and dumping until the duration crosses a threshold.
215 * @param $checkpointFormat string: Either "file" for plain text or "gzip" for gzipped
218 private function checkpointHelper( $checkpointFormat = "file" ) {
219 // Getting temporary names
220 $nameStub = $this->getNewTempFile();
221 $nameOutputDir = $this->getNewTempDirectory();
223 $stderr = fopen( 'php://output', 'a' );
224 if ( $stderr === FALSE ) {
225 $this->fail( "Could not open stream for stderr" );
228 $iterations = 32; // We'll start with that many iterations of revisions in stub
230 $minDuration = 2; // We want the dump to take at least this many seconds
231 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
234 // Until a dump takes at least $minDuration seconds, perform a dump and check
235 // duration. If the dump did not take long enough increase the iteration
236 // count, to generate a bigger stub file next time.
237 while ( $lastDuration < $minDuration ) {
239 // Setting up the dump
240 wfRecursiveRemoveDir( $nameOutputDir );
241 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
242 "Creating temporary output directory " );
243 $this->setUpStub( $nameStub, $iterations );
244 $dumper = new TextPassDumper( array ( "--stub=file:" . $nameStub,
245 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
246 "--maxtime=1" /*This is in minutes. Fixup is below*/,
247 "--checkpointfile=checkpoint-%s-%s.xml.gz" ) );
248 $dumper->setDb( $this->db
);
249 $dumper->maxTimeAllowed
= $checkpointAfter; // Patching maxTime from 1 minute
250 $dumper->stderr
= $stderr;
252 // The actual dump and taking time
253 $ts_before = wfTime();
254 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
255 $ts_after = wfTime();
256 $lastDuration = $ts_after - $ts_before;
258 // Handling increasing the iteration count for the stubs
259 if ( $lastDuration < $minDuration ) {
260 $old_iterations = $iterations;
261 if ( $lastDuration > 0.2 ) {
262 // lastDuration is big enough, to allow an educated guess
263 $factor = ( $minDuration +
0.5 ) / $lastDuration;
264 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
265 // educated guess is reasonable
266 $iterations = (int)( $iterations * $factor );
270 if ( $old_iterations == $iterations ) {
271 // Heuristics were not applied, so we just *2.
275 $this->assertLessThan( 50000, $iterations,
276 "Emergency stop against infinitely increasing iteration "
277 . "count ( last duration: $lastDuration )" );
281 // The dump (hopefully) did take long enough to produce more than one
284 // We now check all the checkpoint files for validity.
286 $files = scandir( $nameOutputDir );
287 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
290 $checkpointFiles = 0;
292 // Each run of the following loop body tries to handle exactly 1 /page/ (not
293 // iteration of stub content). $i is only increased after having treated page 4.
294 for ( $i = 0 ; $i < $iterations ; ) {
296 // 1. Assuring a file is opened and ready. Skipping across header if
298 if ( ! $fileOpened ) {
299 $this->assertNotEmpty( $files, "No more existing dump files, "
300 . "but not yet all pages found" );
301 $fname = array_shift( $files );
302 while ( $fname == "." ||
$fname == ".." ) {
303 $this->assertNotEmpty( $files, "No more existing dump"
304 . " files, but not yet all pages found" );
305 $fname = array_shift( $files );
307 if ( $checkpointFormat == "gzip" ) {
308 $this->gunzip( $nameOutputDir . "/" . $fname );
310 $this->assertDumpStart( $nameOutputDir . "/" . $fname );
315 // 2. Performing a single page check
316 switch ( $lookingForPage ) {
319 $this->assertPageStart( $this->pageId1 +
$i * self
::$numOfPages, NS_MAIN
,
320 "BackupDumperTestP1" );
321 $this->assertRevision( $this->revId1_1 +
$i * self
::$numOfRevs, "BackupDumperTestP1Summary1",
322 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
323 "BackupDumperTestP1Text1" );
324 $this->assertPageEnd();
331 $this->assertPageStart( $this->pageId2 +
$i * self
::$numOfPages, NS_MAIN
,
332 "BackupDumperTestP2" );
333 $this->assertRevision( $this->revId2_1 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary1",
334 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
335 "BackupDumperTestP2Text1", $this->revId2_2 +
$i * self
::$numOfRevs );
336 $this->assertRevision( $this->revId2_2 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary2",
337 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
338 "BackupDumperTestP2Text2", $this->revId2_3 +
$i * self
::$numOfRevs );
339 $this->assertRevision( $this->revId2_3 +
$i * self
::$numOfRevs, "BackupDumperTestP2Summary3",
340 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
341 "BackupDumperTestP2Text3", $this->revId2_4 +
$i * self
::$numOfRevs );
342 $this->assertRevision( $this->revId2_4 +
$i * self
::$numOfRevs,
343 "BackupDumperTestP2Summary4 extra",
344 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
345 "BackupDumperTestP2Text4 some additional Text" );
346 $this->assertPageEnd();
353 $this->assertPageStart( $this->pageId4 +
$i * self
::$numOfPages, NS_TALK
,
354 "Talk:BackupDumperTestP1" );
355 $this->assertRevision( $this->revId4_1 +
$i * self
::$numOfRevs,
356 "Talk BackupDumperTestP1 Summary1",
357 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
358 "Talk about BackupDumperTestP1 Text1" );
359 $this->assertPageEnd();
363 // We dealt with the whole iteration.
368 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
371 // 3. Checking for the end of the current checkpoint file
372 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
373 && $this->xml
->name
== "mediawiki" ) {
375 $this->assertDumpEnd();
380 // Assuring we completely read all files ...
381 $this->assertFalse( $fileOpened, "Currently read file still open?" );
382 $this->assertEmpty( $files, "Remaining unchecked files" );
384 // ... and have dealt with more than one checkpoint file
385 $this->assertGreaterThan( 1, $checkpointFiles, "# of checkpoint files" );
387 $this->expectETAOutput();
393 function testCheckpointPlain() {
394 $this->checkpointHelper();
398 * tests for working checkpoint generation in gzip format work.
400 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
401 * were once problems when the used sinks were DumpPipeOutputs.
403 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
404 * PHP extensions, we go for gzip instead, which triggers the same relevant code
405 * paths while still being testable on more systems.
409 function testCheckpointGzip() {
410 $this->checkpointHelper( "gzip" );
415 * Creates a stub file that is used for testing the text pass of dumps
417 * @param $fname string: (Optional) Absolute name of the file to write
418 * the stub into. If this parameter is null, a new temporary
419 * file is generated that is automatically removed upon
421 * @param $iterations integer: (Optional) specifies how often the block
422 * of 3 pages should go into the stub file. The page and
423 * revision id increase further and further, while the text
424 * id of the first iteration is reused. The pages and revision
425 * of iteration > 1 have no corresponding representation in the
427 * @return string absolute filename of the stub
429 private function setUpStub( $fname = null, $iterations = 1 ) {
430 if ( $fname === null ) {
431 $fname = $this->getNewTempFile();
433 $header = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.7/" '
434 . 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
435 . 'xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.7/ '
436 . 'http://www.mediawiki.org/xml/export-0.7.xsd" version="0.7" xml:lang="en">
438 <sitename>wikisvn</sitename>
439 <base>http://localhost/wiki-svn/index.php/Main_Page</base>
440 <generator>MediaWiki 1.20alpha</generator>
441 <case>first-letter</case>
443 <namespace key="-2" case="first-letter">Media</namespace>
444 <namespace key="-1" case="first-letter">Special</namespace>
445 <namespace key="0" case="first-letter" />
446 <namespace key="1" case="first-letter">Talk</namespace>
447 <namespace key="2" case="first-letter">User</namespace>
448 <namespace key="3" case="first-letter">User talk</namespace>
449 <namespace key="4" case="first-letter">Wikisvn</namespace>
450 <namespace key="5" case="first-letter">Wikisvn talk</namespace>
451 <namespace key="6" case="first-letter">File</namespace>
452 <namespace key="7" case="first-letter">File talk</namespace>
453 <namespace key="8" case="first-letter">MediaWiki</namespace>
454 <namespace key="9" case="first-letter">MediaWiki talk</namespace>
455 <namespace key="10" case="first-letter">Template</namespace>
456 <namespace key="11" case="first-letter">Template talk</namespace>
457 <namespace key="12" case="first-letter">Help</namespace>
458 <namespace key="13" case="first-letter">Help talk</namespace>
459 <namespace key="14" case="first-letter">Category</namespace>
460 <namespace key="15" case="first-letter">Category talk</namespace>
464 $tail = '</mediawiki>
468 $iterations = intval( $iterations );
469 for ( $i = 0; $i < $iterations; $i++
) {
472 <title>BackupDumperTestP1</title>
474 <id>' . ( $this->pageId1 +
$i * self
::$numOfPages ) . '</id>
476 <id>' . ( $this->revId1_1 +
$i * self
::$numOfRevs ) . '</id>
477 <timestamp>2012-04-01T16:46:05Z</timestamp>
481 <comment>BackupDumperTestP1Summary1</comment>
482 <sha1>0bolhl6ol7i6x0e7yq91gxgaan39j87</sha1>
483 <text id="' . $this->textId1_1
. '" bytes="23" />
488 <title>BackupDumperTestP2</title>
490 <id>' . ( $this->pageId2 +
$i * self
::$numOfPages ) . '</id>
492 <id>' . ( $this->revId2_1 +
$i * self
::$numOfRevs ) . '</id>
493 <parentid>' . ( $this->revId2_2 +
$i * self
::$numOfRevs ) . '</parentid>
494 <timestamp>2012-04-01T16:46:05Z</timestamp>
498 <comment>BackupDumperTestP2Summary1</comment>
499 <sha1>jprywrymfhysqllua29tj3sc7z39dl2</sha1>
500 <text id="' . $this->textId2_1
. '" bytes="23" />
503 <id>' . ( $this->revId2_2 +
$i * self
::$numOfRevs ) . '</id>
504 <parentid>' . ( $this->revId2_3 +
$i * self
::$numOfRevs ) . '</parentid>
505 <timestamp>2012-04-01T16:46:05Z</timestamp>
509 <comment>BackupDumperTestP2Summary2</comment>
510 <sha1>b7vj5ks32po5m1z1t1br4o7scdwwy95</sha1>
511 <text id="' . $this->textId2_2
. '" bytes="23" />
514 <id>' . ( $this->revId2_3 +
$i * self
::$numOfRevs ) . '</id>
515 <parentid>' . ( $this->revId2_4 +
$i * self
::$numOfRevs ) . '</parentid>
516 <timestamp>2012-04-01T16:46:05Z</timestamp>
520 <comment>BackupDumperTestP2Summary3</comment>
521 <sha1>jfunqmh1ssfb8rs43r19w98k28gg56r</sha1>
522 <text id="' . $this->textId2_3
. '" bytes="23" />
525 <id>' . ( $this->revId2_4 +
$i * self
::$numOfRevs ) . '</id>
526 <timestamp>2012-04-01T16:46:05Z</timestamp>
530 <comment>BackupDumperTestP2Summary4 extra</comment>
531 <sha1>6o1ciaxa6pybnqprmungwofc4lv00wv</sha1>
532 <text id="' . $this->textId2_4
. '" bytes="44" />
536 // page 3 not in stub
539 <title>Talk:BackupDumperTestP1</title>
541 <id>' . ( $this->pageId4 +
$i * self
::$numOfPages ) . '</id>
543 <id>' . ( $this->revId4_1 +
$i * self
::$numOfRevs ) . '</id>
544 <timestamp>2012-04-01T16:46:05Z</timestamp>
548 <comment>Talk BackupDumperTestP1 Summary1</comment>
549 <sha1>nktofwzd0tl192k3zfepmlzxoax1lpe</sha1>
550 <text id="' . $this->textId4_1
. '" bytes="35" />
554 $content .= $page1 . $page2 . $page4;
557 $this->assertEquals( strlen( $content ), file_put_contents(
558 $fname, $content ), "Length of prepared stub" );