3 require_once dirname( __FILE__
) . "/../../../maintenance/backupTextPass.inc";
6 * Tests for page dumps of BackupDumper
11 class TextPassDumperTest
extends DumpTestCase
{
13 // We'll add several pages, revision and texts. The following variables hold the
15 private $pageId1, $pageId2, $pageId3, $pageId4, $pageId5;
16 private $revId1_1, $textId1_1;
17 private $revId2_1, $textId2_1, $revId2_2, $textId2_2;
18 private $revId2_3, $textId2_3, $revId2_4, $textId2_4;
19 private $revId3_1, $textId3_1, $revId3_2, $textId3_2;
20 private $revId4_1, $textId4_1;
22 function addDBData() {
23 $this->tablesUsed
[] = 'page';
24 $this->tablesUsed
[] = 'revision';
25 $this->tablesUsed
[] = 'text';
29 $title = Title
::newFromText( 'BackupDumperTestP1' );
30 $page = WikiPage
::factory( $title );
31 list( $this->revId1_1
, $this->textId1_1
) = $this->addRevision( $page,
32 "BackupDumperTestP1Text1", "BackupDumperTestP1Summary1" );
33 $this->pageId1
= $page->getId();
35 // Page with more than one revision
36 $title = Title
::newFromText( 'BackupDumperTestP2' );
37 $page = WikiPage
::factory( $title );
38 list( $this->revId2_1
, $this->textId2_1
) = $this->addRevision( $page,
39 "BackupDumperTestP2Text1", "BackupDumperTestP2Summary1" );
40 list( $this->revId2_2
, $this->textId2_2
) = $this->addRevision( $page,
41 "BackupDumperTestP2Text2", "BackupDumperTestP2Summary2" );
42 list( $this->revId2_3
, $this->textId2_3
) = $this->addRevision( $page,
43 "BackupDumperTestP2Text3", "BackupDumperTestP2Summary3" );
44 list( $this->revId2_4
, $this->textId2_4
) = $this->addRevision( $page,
45 "BackupDumperTestP2Text4 some additional Text ",
46 "BackupDumperTestP2Summary4 extra " );
47 $this->pageId2
= $page->getId();
50 $title = Title
::newFromText( 'BackupDumperTestP3' );
51 $page = WikiPage
::factory( $title );
52 list( $this->revId3_1
, $this->textId3_1
) = $this->addRevision( $page,
53 "BackupDumperTestP3Text1", "BackupDumperTestP2Summary1" );
54 list( $this->revId3_2
, $this->textId3_2
) = $this->addRevision( $page,
55 "BackupDumperTestP3Text2", "BackupDumperTestP2Summary2" );
56 $this->pageId3
= $page->getId();
57 $page->doDeleteArticle( "Testing ;)" );
59 // Page from non-default namespace
60 $title = Title
::newFromText( 'BackupDumperTestP1', NS_TALK
);
61 $page = WikiPage
::factory( $title );
62 list( $this->revId4_1
, $this->textId4_1
) = $this->addRevision( $page,
63 "Talk about BackupDumperTestP1 Text1",
64 "Talk BackupDumperTestP1 Summary1" );
65 $this->pageId4
= $page->getId();
66 } catch ( Exception
$e ) {
67 // We'd love to pass $e directly. However, ... see
68 // documentation of exceptionFromAddDBData in
70 $this->exceptionFromAddDBData
= $e;
75 public function setUp() {
78 // Since we will restrict dumping by page ranges (to allow
79 // working tests, even if the db gets prepopulated by a base
80 // class), we have to assert, that the page id are consecutively
83 array( $this->pageId2
, $this->pageId3
, $this->pageId4
),
84 array( $this->pageId1 +
1, $this->pageId2 +
1, $this->pageId3 +
1 ),
85 "Page ids increasing without holes" );
89 function testPlain() {
90 // Setting up the dump
91 $nameStub = $this->setUpStub();
92 $nameFull = $this->getNewTempFile();
93 $dumper = new TextPassDumper( array ( "--stub=file:" . $nameStub,
94 "--output=file:" . $nameFull ) );
95 $dumper->reporting
= false;
96 $dumper->setDb( $this->db
);
98 // Performing the dump
99 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
101 // Checking for correctness of the dumped data
102 $this->assertDumpStart( $nameFull );
105 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
106 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
107 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
108 "BackupDumperTestP1Text1" );
109 $this->assertPageEnd();
112 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
113 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
114 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
115 "BackupDumperTestP2Text1" );
116 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
117 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
118 "BackupDumperTestP2Text2" );
119 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
120 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
121 "BackupDumperTestP2Text3" );
122 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
123 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
124 "BackupDumperTestP2Text4 some additional Text" );
125 $this->assertPageEnd();
128 // -> Page is marked deleted. Hence not visible
131 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
132 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
133 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
134 "Talk about BackupDumperTestP1 Text1" );
135 $this->assertPageEnd();
137 $this->assertDumpEnd();
140 function testPrefetchPlain() {
141 // The mapping between ids and text, for the hits of the prefetch mock
142 $prefetchMap = array(
143 array( $this->pageId1
, $this->revId1_1
, "Prefetch_________1Text1" ),
144 array( $this->pageId2
, $this->revId2_3
, "Prefetch_________2Text3" )
148 $prefetchMock = $this->getMock( 'BaseDump', array( 'prefetch' ), array(), '', FALSE );
149 $prefetchMock->expects( $this->exactly( 6 ) )
150 ->method( 'prefetch' )
151 ->will( $this->returnValueMap( $prefetchMap ) );
153 // Setting up of the dump
154 $nameStub = $this->setUpStub();
155 $nameFull = $this->getNewTempFile();
156 $dumper = new TextPassDumper( array ( "--stub=file:"
157 . $nameStub, "--output=file:" . $nameFull ) );
158 $dumper->prefetch
= $prefetchMock;
159 $dumper->reporting
= false;
160 $dumper->setDb( $this->db
);
162 // Performing the dump
163 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
165 // Checking for correctness of the dumped data
166 $this->assertDumpStart( $nameFull );
169 $this->assertPageStart( $this->pageId1
, NS_MAIN
, "BackupDumperTestP1" );
170 // Prefetch kicks in. This is still the SHA-1 of the original text,
171 // But the actual text (with different SHA-1) comes from prefetch.
172 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
173 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
174 "Prefetch_________1Text1" );
175 $this->assertPageEnd();
178 $this->assertPageStart( $this->pageId2
, NS_MAIN
, "BackupDumperTestP2" );
179 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
180 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
181 "BackupDumperTestP2Text1" );
182 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
183 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
184 "BackupDumperTestP2Text2" );
185 // Prefetch kicks in. This is still the SHA-1 of the original text,
186 // But the actual text (with different SHA-1) comes from prefetch.
187 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
188 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
189 "Prefetch_________2Text3" );
190 $this->assertRevision( $this->revId2_4
, "BackupDumperTestP2Summary4 extra",
191 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
192 "BackupDumperTestP2Text4 some additional Text" );
193 $this->assertPageEnd();
196 // -> Page is marked deleted. Hence not visible
199 $this->assertPageStart( $this->pageId4
, NS_TALK
, "Talk:BackupDumperTestP1" );
200 $this->assertRevision( $this->revId4_1
, "Talk BackupDumperTestP1 Summary1",
201 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
202 "Talk about BackupDumperTestP1 Text1" );
203 $this->assertPageEnd();
205 $this->assertDumpEnd();
210 * Ensures that checkpoint dumps are used and written, by successively increasing the
211 * stub size and dumping until the duration crosses a threshold.
213 * @param $checkpointFormat string: Either "file" for plain text or "gzip" for gzipped
216 private function checkpointHelper( $checkpointFormat = "file" ) {
217 // Getting temporary names
218 $nameStub = $this->getNewTempFile();
219 $nameOutputDir = $this->getNewTempDirectory();
221 $stderr = fopen( 'php://output', 'a' );
222 if ( $stderr === FALSE ) {
223 $this->fail( "Could not open stream for stderr" );
226 $iterations = 32; // We'll start with that many iterations of revisions in stub
228 $minDuration = 2; // We want the dump to take at least this many seconds
229 $checkpointAfter = 0.5; // Generate checkpoint after this many seconds
232 // Until a dump takes at least $minDuration seconds, perform a dump and check
233 // duration. If the dump did not take long enough increase the iteration
234 // count, to generate a bigger stub file next time.
235 while ( $lastDuration < $minDuration ) {
237 // Setting up the dump
238 wfRecursiveRemoveDir( $nameOutputDir );
239 $this->assertTrue( wfMkdirParents( $nameOutputDir ),
240 "Creating temporary output directory " );
241 $this->setUpStub( $nameStub, $iterations );
242 $dumper = new TextPassDumper( array ( "--stub=file:" . $nameStub,
243 "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
244 "--maxtime=1" /*This is in minutes. Fixup is below*/,
245 "--checkpointfile=checkpoint-%s-%s.xml.gz" ) );
246 $dumper->setDb( $this->db
);
247 $dumper->maxTimeAllowed
= $checkpointAfter; // Patching maxTime from 1 minute
248 $dumper->stderr
= $stderr;
250 // The actual dump and taking time
251 $ts_before = wfTime();
252 $dumper->dump( WikiExporter
::FULL
, WikiExporter
::TEXT
);
253 $ts_after = wfTime();
254 $lastDuration = $ts_after - $ts_before;
256 // Handling increasing the iteration count for the stubs
257 if ( $lastDuration < $minDuration ) {
258 $old_iterations = $iterations;
259 if ( $lastDuration > 0.2 ) {
260 // lastDuration is big enough, to allow an educated guess
261 $factor = ( $minDuration +
0.5 ) / $lastDuration;
262 if ( ( $factor > 1.1 ) && ( $factor < 100 ) ) {
263 // educated guess is reasonable
264 $iterations = (int)( $iterations * $factor );
268 if ( $old_iterations == $iterations ) {
269 // Heuristics were not applied, so we just *2.
273 $this->assertLessThan( 50000, $iterations,
274 "Emergency stop against infinitely increasing iteration "
275 . "count ( last duration: $lastDuration )" );
279 // The dump (hopefully) did take long enough to produce more than one
282 // We now check all the checkpoint files for validity.
284 $files = scandir( $nameOutputDir );
285 $this->assertTrue( asort( $files ), "Sorting files in temporary directory" );
288 $checkpointFiles = 0;
290 // Each run of the following loop body tries to handle exactly 1 /page/ (not
291 // iteration of stub content). $i is only increased after having treated page 4.
292 for ( $i = 0 ; $i < $iterations ; ) {
294 // 1. Assuring a file is opened and ready. Skipping across header if
296 if ( ! $fileOpened ) {
297 $this->assertNotEmpty( $files, "No more existing dump files, "
298 . "but not yet all pages found" );
299 $fname = array_shift( $files );
300 while ( $fname == "." ||
$fname == ".." ) {
301 $this->assertNotEmpty( $files, "No more existing dump"
302 . " files, but not yet all pages found" );
303 $fname = array_shift( $files );
305 if ( $checkpointFormat == "gzip" ) {
306 $this->gunzip( $nameOutputDir . "/" . $fname );
308 $this->assertDumpStart( $nameOutputDir . "/" . $fname );
313 // 2. Performing a single page check
314 switch ( $lookingForPage ) {
317 $this->assertPageStart( $this->pageId1 +
$i * 4, NS_MAIN
,
318 "BackupDumperTestP1" );
319 $this->assertRevision( $this->revId1_1
, "BackupDumperTestP1Summary1",
320 $this->textId1_1
, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87",
321 "BackupDumperTestP1Text1" );
322 $this->assertPageEnd();
329 $this->assertPageStart( $this->pageId2 +
$i * 4, NS_MAIN
,
330 "BackupDumperTestP2" );
331 $this->assertRevision( $this->revId2_1
, "BackupDumperTestP2Summary1",
332 $this->textId2_1
, false, "jprywrymfhysqllua29tj3sc7z39dl2",
333 "BackupDumperTestP2Text1" );
334 $this->assertRevision( $this->revId2_2
, "BackupDumperTestP2Summary2",
335 $this->textId2_2
, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95",
336 "BackupDumperTestP2Text2" );
337 $this->assertRevision( $this->revId2_3
, "BackupDumperTestP2Summary3",
338 $this->textId2_3
, false, "jfunqmh1ssfb8rs43r19w98k28gg56r",
339 "BackupDumperTestP2Text3" );
340 $this->assertRevision( $this->revId2_4
,
341 "BackupDumperTestP2Summary4 extra",
342 $this->textId2_4
, false, "6o1ciaxa6pybnqprmungwofc4lv00wv",
343 "BackupDumperTestP2Text4 some additional Text" );
344 $this->assertPageEnd();
351 $this->assertPageStart( $this->pageId4 +
$i * 4, NS_TALK
,
352 "Talk:BackupDumperTestP1" );
353 $this->assertRevision( $this->revId4_1
,
354 "Talk BackupDumperTestP1 Summary1",
355 $this->textId4_1
, false, "nktofwzd0tl192k3zfepmlzxoax1lpe",
356 "Talk about BackupDumperTestP1 Text1" );
357 $this->assertPageEnd();
361 // We dealt with the whole iteration.
366 $this->fail( "Bad setting for lookingForPage ($lookingForPage)" );
369 // 3. Checking for the end of the current checkpoint file
370 if ( $this->xml
->nodeType
== XMLReader
::END_ELEMENT
371 && $this->xml
->name
== "mediawiki" ) {
373 $this->assertDumpEnd();
378 // Assuring we completely read all files ...
379 $this->assertFalse( $fileOpened, "Currently read file still open?" );
380 $this->assertEmpty( $files, "Remaining unchecked files" );
382 // ... and have dealt with more than one checkpoint file
383 $this->assertGreaterThan( 1, $checkpointFiles, "# of checkpoint files" );
385 $this->expectETAOutput();
391 function testCheckpointPlain() {
392 $this->checkpointHelper();
396 * tests for working checkpoint generation in gzip format work.
398 * We keep this test in addition to the simpler self::testCheckpointPlain, as there
399 * were once problems when the used sinks were DumpPipeOutputs.
401 * xmldumps-backup typically uses bzip2 instead of gzip. However, as bzip2 requires
402 * PHP extensions, we go for gzip instead, which triggers the same relevant code
403 * paths while still being testable on more systems.
407 function testCheckpointGzip() {
408 $this->checkpointHelper( "gzip" );
413 * Creates a stub file that is used for testing the text pass of dumps
415 * @param $fname string: (Optional) Absolute name of the file to write
416 * the stub into. If this parameter is null, a new temporary
417 * file is generated that is automatically removed upon
419 * @param $iterations integer: (Optional) specifies how often the block
420 * of 3 pages should go into the stub file. The page id
421 * increase further and further, while the revision and text
422 * ids of the first iteration are reused. The pages of
423 * iteration > 1 have no corresponding representation in the
425 * @return string absolute filename of the stub
427 private function setUpStub( $fname = null, $iterations = 1 ) {
428 if ( $fname === null ) {
429 $fname = $this->getNewTempFile();
431 $header = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.7/" '
432 . 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
433 . 'xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.7/ '
434 . 'http://www.mediawiki.org/xml/export-0.7.xsd" version="0.7" xml:lang="en">
436 <sitename>wikisvn</sitename>
437 <base>http://localhost/wiki-svn/index.php/Main_Page</base>
438 <generator>MediaWiki 1.20alpha</generator>
439 <case>first-letter</case>
441 <namespace key="-2" case="first-letter">Media</namespace>
442 <namespace key="-1" case="first-letter">Special</namespace>
443 <namespace key="0" case="first-letter" />
444 <namespace key="1" case="first-letter">Talk</namespace>
445 <namespace key="2" case="first-letter">User</namespace>
446 <namespace key="3" case="first-letter">User talk</namespace>
447 <namespace key="4" case="first-letter">Wikisvn</namespace>
448 <namespace key="5" case="first-letter">Wikisvn talk</namespace>
449 <namespace key="6" case="first-letter">File</namespace>
450 <namespace key="7" case="first-letter">File talk</namespace>
451 <namespace key="8" case="first-letter">MediaWiki</namespace>
452 <namespace key="9" case="first-letter">MediaWiki talk</namespace>
453 <namespace key="10" case="first-letter">Template</namespace>
454 <namespace key="11" case="first-letter">Template talk</namespace>
455 <namespace key="12" case="first-letter">Help</namespace>
456 <namespace key="13" case="first-letter">Help talk</namespace>
457 <namespace key="14" case="first-letter">Category</namespace>
458 <namespace key="15" case="first-letter">Category talk</namespace>
462 $tail = '</mediawiki>
466 $iterations = intval( $iterations );
467 for ( $i = 0; $i < $iterations; $i++
) {
470 <title>BackupDumperTestP1</title>
472 <id>' . ( $this->pageId1 +
$i * 4 ) . '</id>
474 <id>' . $this->revId1_1
. '</id>
475 <timestamp>2012-04-01T16:46:05Z</timestamp>
479 <comment>BackupDumperTestP1Summary1</comment>
480 <sha1>0bolhl6ol7i6x0e7yq91gxgaan39j87</sha1>
481 <text id="' . $this->textId1_1
. '" bytes="23" />
486 <title>BackupDumperTestP2</title>
488 <id>' . ( $this->pageId2 +
$i * 4 ) . '</id>
490 <id>' . $this->revId2_1
. '</id>
491 <timestamp>2012-04-01T16:46:05Z</timestamp>
495 <comment>BackupDumperTestP2Summary1</comment>
496 <sha1>jprywrymfhysqllua29tj3sc7z39dl2</sha1>
497 <text id="' . $this->textId2_1
. '" bytes="23" />
500 <id>' . $this->revId2_2
. '</id>
501 <timestamp>2012-04-01T16:46:05Z</timestamp>
505 <comment>BackupDumperTestP2Summary2</comment>
506 <sha1>b7vj5ks32po5m1z1t1br4o7scdwwy95</sha1>
507 <text id="' . $this->textId2_2
. '" bytes="23" />
510 <id>' . $this->revId2_3
. '</id>
511 <timestamp>2012-04-01T16:46:05Z</timestamp>
515 <comment>BackupDumperTestP2Summary3</comment>
516 <sha1>jfunqmh1ssfb8rs43r19w98k28gg56r</sha1>
517 <text id="' . $this->textId2_3
. '" bytes="23" />
520 <id>' . $this->revId2_4
. '</id>
521 <timestamp>2012-04-01T16:46:05Z</timestamp>
525 <comment>BackupDumperTestP2Summary4 extra</comment>
526 <sha1>6o1ciaxa6pybnqprmungwofc4lv00wv</sha1>
527 <text id="' . $this->textId2_4
. '" bytes="44" />
531 // page 3 not in stub
534 <title>Talk:BackupDumperTestP1</title>
536 <id>' . ( $this->pageId4 +
$i * 4 ) . '</id>
538 <id>' . $this->revId4_1
. '</id>
539 <timestamp>2012-04-01T16:46:05Z</timestamp>
543 <comment>Talk BackupDumperTestP1 Summary1</comment>
544 <sha1>nktofwzd0tl192k3zfepmlzxoax1lpe</sha1>
545 <text id="' . $this->textId4_1
. '" bytes="35" />
549 $content .= $page1 . $page2 . $page4;
552 $this->assertEquals( strlen( $content ), file_put_contents(
553 $fname, $content ), "Length of prepared stub" );