Added support for wikidiff2 and similar external diff engines.
[lhc/web/wiklou.git] / includes / DifferenceEngine.php
1 <?php
2 /**
3 * See diff.doc
4 * @package MediaWiki
5 * @subpackage DifferenceEngine
6 */
7
8 /** */
9 require_once( 'Revision.php' );
10
11 define( 'MAX_DIFF_LINE', 10000 );
12 define( 'MAX_DIFF_XREF_LENGTH', 10000 );
13
14 /**
15 * @todo document
16 * @access public
17 * @package MediaWiki
18 * @subpackage DifferenceEngine
19 */
20 class DifferenceEngine {
21 /**#@+
22 * @access private
23 */
24 var $mOldid, $mNewid, $mTitle;
25 var $mOldtitle, $mNewtitle, $mPagetitle;
26 var $mOldtext, $mNewtext;
27 var $mOldUser, $mNewUser;
28 var $mOldComment, $mNewComment;
29 var $mOldPage, $mNewPage;
30 var $mRcidMarkPatrolled;
31 var $mOldRev, $mNewRev;
32 var $mRevisionsLoaded = false; // Have the revisions been loaded
33 var $mTextLoaded = 0; // How many text blobs have been loaded, 0, 1 or 2?
34 /**#@-*/
35
36 /**
37 * Constructor
38 * @param Title $titleObj Title object that the diff is associated with
39 * @param integer $old Old ID we want to show and diff with.
40 * @param string $new Either 'prev' or 'next'.
41 * @param integer $rcid ??? (default 0)
42 */
43 function DifferenceEngine( $titleObj = null, $old = 0, $new = 0, $rcid = 0 ) {
44 $this->mTitle = $titleObj;
45 wfDebug("DifferenceEngine old '$old' new '$new' rcid '$rcid'\n");
46
47 if ( 'prev' === $new ) {
48 # Show diff between revision $old and the previous one.
49 # Get previous one from DB.
50 #
51 $this->mNewid = intval($old);
52
53 $this->mOldid = $this->mTitle->getPreviousRevisionID( $this->mNewid );
54
55 } elseif ( 'next' === $new ) {
56 # Show diff between revision $old and the previous one.
57 # Get previous one from DB.
58 #
59 $this->mOldid = intval($old);
60 $this->mNewid = $this->mTitle->getNextRevisionID( $this->mOldid );
61 if ( false === $this->mNewid ) {
62 # if no result, NewId points to the newest old revision. The only newer
63 # revision is cur, which is "0".
64 $this->mNewid = 0;
65 }
66
67 } else {
68 $this->mOldid = intval($old);
69 $this->mNewid = intval($new);
70 }
71 $this->mRcidMarkPatrolled = intval($rcid); # force it to be an integer
72 }
73
74 function showDiffPage() {
75 global $wgUser, $wgOut, $wgContLang, $wgOnlySysopsCanPatrol,
76 $wgUseExternalEditor, $wgUseRCPatrol;
77 $fname = 'DifferenceEngine::showDiffPage';
78 wfProfileIn( $fname );
79
80 # If external diffs are enabled both globally and for the user,
81 # we'll use the application/x-external-editor interface to call
82 # an external diff tool like kompare, kdiff3, etc.
83 if($wgUseExternalEditor && $wgUser->getOption('externaldiff')) {
84 global $wgInputEncoding,$wgServer,$wgScript,$wgLang;
85 $wgOut->disable();
86 header ( "Content-type: application/x-external-editor; charset=".$wgInputEncoding );
87 $url1=$this->mTitle->getFullURL("action=raw&oldid=".$this->mOldid);
88 $url2=$this->mTitle->getFullURL("action=raw&oldid=".$this->mNewid);
89 $special=$wgLang->getNsText(NS_SPECIAL);
90 $control=<<<CONTROL
91 [Process]
92 Type=Diff text
93 Engine=MediaWiki
94 Script={$wgServer}{$wgScript}
95 Special namespace={$special}
96
97 [File]
98 Extension=wiki
99 URL=$url1
100
101 [File 2]
102 Extension=wiki
103 URL=$url2
104 CONTROL;
105 echo($control);
106 return;
107 }
108
109 $t = $this->mTitle->getPrefixedText() . " (Diff: {$this->mOldid}, " .
110 "{$this->mNewid})";
111 $mtext = wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" );
112
113 $wgOut->setArticleFlag( false );
114 if ( ! $this->loadRevisionData() ) {
115 $wgOut->setPagetitle( wfMsg( 'errorpagetitle' ) );
116 $wgOut->addWikitext( $mtext );
117 wfProfileOut( $fname );
118 return;
119 }
120 if ( $this->mNewRev->isCurrent() ) {
121 $wgOut->setArticleFlag( true );
122 }
123
124 # mOldid is false if the difference engine is called with a "vague" query for
125 # a diff between a version V and its previous version V' AND the version V
126 # is the first version of that article. In that case, V' does not exist.
127 if ( $this->mOldid === false ) {
128 $this->showFirstRevision();
129 wfProfileOut( $fname );
130 return;
131 }
132
133 $wgOut->suppressQuickbar();
134
135 $oldTitle = $this->mOldPage->getPrefixedText();
136 $newTitle = $this->mNewPage->getPrefixedText();
137 if( $oldTitle == $newTitle ) {
138 $wgOut->setPageTitle( $newTitle );
139 } else {
140 $wgOut->setPageTitle( $oldTitle . ', ' . $newTitle );
141 }
142 $wgOut->setSubtitle( wfMsg( 'difference' ) );
143 $wgOut->setRobotpolicy( 'noindex,follow' );
144
145 if ( !( $this->mOldPage->userCanRead() && $this->mNewPage->userCanRead() ) ) {
146 $wgOut->loginToUse();
147 $wgOut->output();
148 wfProfileOut( $fname );
149 exit;
150 }
151
152 $sk = $wgUser->getSkin();
153 $talk = $wgContLang->getNsText( NS_TALK );
154 $contribs = wfMsg( 'contribslink' );
155
156 $this->mOldComment = $sk->formatComment($this->mOldComment);
157 $this->mNewComment = $sk->formatComment($this->mNewComment);
158
159 $oldUserLink = $sk->makeLinkObj( Title::makeTitleSafe( NS_USER, $this->mOldUser ), $this->mOldUser );
160 $newUserLink = $sk->makeLinkObj( Title::makeTitleSafe( NS_USER, $this->mNewUser ), $this->mNewUser );
161 $oldUTLink = $sk->makeLinkObj( Title::makeTitleSafe( NS_USER_TALK, $this->mOldUser ), $talk );
162 $newUTLink = $sk->makeLinkObj( Title::makeTitleSafe( NS_USER_TALK, $this->mNewUser ), $talk );
163 $oldContribs = $sk->makeKnownLinkObj( Title::makeTitle( NS_SPECIAL, 'Contributions' ), $contribs,
164 'target=' . urlencode($this->mOldUser) );
165 $newContribs = $sk->makeKnownLinkObj( Title::makeTitle( NS_SPECIAL, 'Contributions' ), $contribs,
166 'target=' . urlencode($this->mNewUser) );
167 if ( $this->mNewRev->isCurrent() && $wgUser->isAllowed('rollback') ) {
168 $rollback = '&nbsp;&nbsp;&nbsp;<strong>[' . $sk->makeKnownLinkObj( $this->mTitle, wfMsg( 'rollbacklink' ),
169 'action=rollback&from=' . urlencode($this->mNewUser) .
170 '&token=' . urlencode( $wgUser->editToken( array( $this->mTitle->getPrefixedText(), $this->mNewUser ) ) ) ) .
171 ']</strong>';
172 } else {
173 $rollback = '';
174 }
175 if ( $wgUseRCPatrol && $this->mRcidMarkPatrolled != 0 && $wgUser->isLoggedIn() &&
176 ( $wgUser->isAllowed('rollback') || !$wgOnlySysopsCanPatrol ) )
177 {
178 $patrol = ' [' . $sk->makeKnownLinkObj( $this->mTitle, wfMsg( 'markaspatrolleddiff' ),
179 "action=markpatrolled&rcid={$this->mRcidMarkPatrolled}" ) . ']';
180 } else {
181 $patrol = '';
182 }
183
184 $prevlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'previousdiff' ),
185 'diff=prev&oldid='.$this->mOldid, '', '', 'id="differences-prevlink"' );
186 if ( $this->mNewRev->isCurrent() ) {
187 $nextlink = '';
188 } else {
189 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ),
190 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
191 }
192
193 $oldHeader = "<strong>{$this->mOldtitle}</strong><br />$oldUserLink " .
194 "($oldUTLink | $oldContribs)<br />" . $this->mOldComment .
195 '<br />' . $prevlink;
196 $newHeader = "<strong>{$this->mNewtitle}</strong><br />$newUserLink " .
197 "($newUTLink | $newContribs) $rollback<br />" . $this->mNewComment .
198 '<br />' . $nextlink . $patrol;
199
200 $this->showDiff( $oldHeader, $newHeader );
201 $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" );
202
203 if( !$this->mNewRev->isCurrent() ) {
204 $oldEditSectionSetting = $wgOut->mParserOptions->setEditSection( false );
205 }
206
207 $this->loadNewText();
208 if( is_object( $this->mNewRev ) ) {
209 $wgOut->setRevisionId( $this->mNewRev->getId() );
210 }
211 $wgOut->addSecondaryWikiText( $this->mNewtext );
212
213 if( !$this->mNewRev->isCurrent() ) {
214 $wgOut->mParserOptions->setEditSection( $oldEditSectionSetting );
215 }
216
217 wfProfileOut( $fname );
218 }
219
220 /**
221 * Show the first revision of an article. Uses normal diff headers in
222 * contrast to normal "old revision" display style.
223 */
224 function showFirstRevision() {
225 global $wgOut, $wgUser, $wgLang;
226
227 $fname = 'DifferenceEngine::showFirstRevision';
228 wfProfileIn( $fname );
229
230 # Get article text from the DB
231 #
232 if ( ! $this->loadNewText() ) {
233 $t = $this->mTitle->getPrefixedText() . " (Diff: {$this->mOldid}, " .
234 "{$this->mNewid})";
235 $mtext = wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" );
236 $wgOut->setPagetitle( wfMsg( 'errorpagetitle' ) );
237 $wgOut->addWikitext( $mtext );
238 wfProfileOut( $fname );
239 return;
240 }
241 if ( $this->mNewRev->isCurrent() ) {
242 $wgOut->setArticleFlag( true );
243 }
244
245 # Check if user is allowed to look at this page. If not, bail out.
246 #
247 if ( !( $this->mTitle->userCanRead() ) ) {
248 $wgOut->loginToUse();
249 $wgOut->output();
250 wfProfileOut( $fname );
251 exit;
252 }
253
254 # Prepare the header box
255 #
256 $sk = $wgUser->getSkin();
257
258 $uTLink = $sk->makeLinkObj( Title::makeTitleSafe( NS_USER_TALK, $this->mOldUser ), $wgLang->getNsText( NS_TALK ) );
259 $userLink = $sk->makeLinkObj( Title::makeTitleSafe( NS_USER, $this->mOldUser ), $this->mOldUser );
260 $contribs = $sk->makeKnownLinkObj( Title::makeTitle( NS_SPECIAL, 'Contributions' ), wfMsg( 'contribslink' ),
261 'target=' . urlencode($this->mOldUser) );
262 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ), 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
263 $header = "<div class=\"firstrevisionheader\" style=\"text-align: center\"><strong>{$this->mOldtitle}</strong><br />$userLink " .
264 "($uTLink | $contribs)<br />" . $this->mOldComment .
265 '<br />' . $nextlink. "</div>\n";
266
267 $wgOut->addHTML( $header );
268
269 $wgOut->setSubtitle( wfMsg( 'difference' ) );
270 $wgOut->setRobotpolicy( 'noindex,follow' );
271
272
273 # Show current revision
274 #
275 $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" );
276 if( is_object( $this->mNewRev ) ) {
277 $wgOut->setRevisionId( $this->mNewRev->getId() );
278 }
279 $wgOut->addSecondaryWikiText( $this->mNewtext );
280
281 wfProfileOut( $fname );
282 }
283
284 /**
285 * Get the diff text, send it to $wgOut
286 * Returns false if the diff could not be generated, otherwise returns true
287 */
288 function showDiff( $otitle, $ntitle ) {
289 global $wgOut;
290 $diff = $this->getDiff( $otitle, $ntitle );
291 if ( $diff === false ) {
292 $wgOut->addWikitext( wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" ) );
293 return false;
294 } else {
295 $wgOut->addHTML( $diff );
296 return true;
297 }
298 }
299
300 /**
301 * Get diff table, including header
302 * Note that the interface has changed, it's no longer static.
303 * Returns false on error
304 */
305 function getDiff( $otitle, $ntitle ) {
306 $body = $this->getDiffBody();
307 if ( $body === false ) {
308 return false;
309 } else {
310 return $this->addHeader( $body, $otitle, $ntitle );
311 }
312 }
313
314 /**
315 * Get the diff table body, without header
316 * Returns false on error
317 */
318 function getDiffBody() {
319 global $wgExternalDiffEngine, $wgContLang, $wgMemc, $wgDBname;
320 $fname = 'DifferenceEngine::getDiffBody';
321 wfProfileIn( $fname );
322
323 // Cacheable?
324 $key = false;
325 if ( $this->mOldid && $this->mNewid ) {
326 // Try cache
327 $key = "$wgDBname:diff:oldid:{$this->mOldid}:newid:{$this->mNewid}";
328 $difftext = $wgMemc->get( $key );
329 if ( $difftext ) {
330 wfIncrStats( 'diff_cache_hit' );
331 $difftext = $this->localiseLineNumbers( $difftext );
332 $difftext .= "\n<!-- diff cache key $key -->\n";
333 wfProfileOut( $fname );
334 return $difftext;
335 }
336 }
337
338 if ( !$this->loadText() ) {
339 wfProfileOut( $fname );
340 return false;
341 }
342
343 $otext = $wgContLang->segmentForDiff($this->mOldtext);
344 $ntext = $wgContLang->segmentForDiff($this->mNewtext);
345 if ( $wgExternalDiffEngine == 'wikidiff' ) {
346 # For historical reasons, external diff engine expects
347 # input text to be HTML-escaped already
348 $otext = str_replace( "\r\n", "\n", htmlspecialchars ( $otext ) );
349 $ntext = str_replace( "\r\n", "\n", htmlspecialchars ( $ntext ) );
350 if( !function_exists( 'wikidiff_do_diff' ) ) {
351 dl('php_wikidiff.so');
352 }
353 $difftext = wikidiff_do_diff( $otext, $ntext, 2 );
354 } elseif ( $wgExternalDiffEngine == 'wikidiff2' ) {
355 # Better external diff engine, the 2 may some day be dropped
356 # This one does the escaping itself
357 $otext = str_replace( "\r\n", "\n", $otext );
358 $ntext = str_replace( "\r\n", "\n", $ntext );
359 if ( !function_exists( 'wikidiff2_do_diff' ) ) {
360 dl('php_wikidiff2.so');
361 }
362 $difftext = wikidiff2_do_diff( $otext, $ntext, 2 );
363 } elseif ( $wgExternalDiffEngine !== false ) {
364 # Diff via the shell
365 global $wgTmpDirectory;
366 $otext = str_replace( "\r\n", "\n", $otext );
367 $ntext = str_replace( "\r\n", "\n", $ntext );
368 $tempName1 = tempnam( $wgTmpDirectory, 'diff_' );
369 $tempName2 = tempnam( $wgTmpDirectory, 'diff_' );
370
371 $tempFile1 = fopen( $tempName1, "w" );
372 if ( !$tempFile1 ) {
373 wfProfileOut( $fname );
374 return false;
375 }
376 $tempFile2 = fopen( $tempName2, "w" );
377 if ( !$tempFile2 ) {
378 wfProfileOut( $fname );
379 return false;
380 }
381 fwrite( $tempFile1, $otext );
382 fwrite( $tempFile2, $ntext );
383 fclose( $tempFile1 );
384 fclose( $tempFile2 );
385 $cmd = wfEscapeShellArg( $wgExternalDiffEngine, $tempName1, $tempName2 );
386 wfProfileIn( "$fname-shellexec" );
387 $difftext = wfShellExec( $cmd );
388 wfProfileOut( "$fname-shellexec" );
389 unlink( $tempName1 );
390 unlink( $tempName2 );
391 } else {
392 $ota = explode( "\n", str_replace( "\r\n", "\n", $otext ) );
393 $nta = explode( "\n", str_replace( "\r\n", "\n", $ntext ) );
394 $diffs =& new Diff( $ota, $nta );
395 $formatter =& new TableDiffFormatter();
396 $difftext = $formatter->format( $diffs );
397 }
398 $difftext = $wgContLang->unsegmentForDiff($difftext);
399
400 // Save to cache for 7 days
401 if ( $key !== false ) {
402 wfIncrStats( 'diff_cache_miss' );
403 $wgMemc->set( $key, $difftext, 7*86400 );
404 } else {
405 wfIncrStats( 'diff_uncacheable' );
406 }
407 // Replace line numbers with the text in the user's language
408 $difftext = $this->localiseLineNumbers( $difftext );
409 wfProfileOut( $fname );
410 return $difftext;
411 }
412
413 /**
414 * Replace line numbers with the text in the user's language
415 */
416 function localiseLineNumbers( $text ) {
417 return preg_replace_callback( '/<!--LINE (\d+)-->/',
418 array( &$this, 'localiseLineNumbersCb' ), $text );
419 }
420
421 function localiseLineNumbersCb( $matches ) {
422 global $wgLang;
423 return wfMsg( 'lineno', $wgLang->formatNum( $matches[1] ) );
424 }
425
426 /**
427 * Add the header to a diff body
428 */
429 function addHeader( $diff, $otitle, $ntitle ) {
430 $out = "
431 <table border='0' width='98%' cellpadding='0' cellspacing='4' class='diff'>
432 <tr>
433 <td colspan='2' width='50%' align='center' class='diff-otitle'>{$otitle}</td>
434 <td colspan='2' width='50%' align='center' class='diff-ntitle'>{$ntitle}</td>
435 </tr>
436 $diff
437 </table>
438 ";
439 return $out;
440 }
441
442 /**
443 * Use specified text instead of loading from the database
444 */
445 function setText( $oldText, $newText ) {
446 $this->mOldtext = $oldText;
447 $this->mNewtext = $newText;
448 $this->mTextLoaded = 2;
449 }
450
451 /**
452 * Load revision metadata for the specified articles. If newid is 0, then compare
453 * the old article in oldid to the current article; if oldid is 0, then
454 * compare the current article to the immediately previous one (ignoring the
455 * value of newid).
456 *
457 * If oldid is false, leave the corresponding revision object set
458 * to false. This is impossible via ordinary user input, and is provided for
459 * API convenience.
460 */
461 function loadRevisionData() {
462 global $wgLang;
463 if ( $this->mRevisionsLoaded ) {
464 return true;
465 } else {
466 // Whether it succeeds or fails, we don't want to try again
467 $this->mRevisionsLoaded = true;
468 }
469
470 // Load the new revision object
471 if( $this->mNewid ) {
472 $this->mNewRev = Revision::newFromId( $this->mNewid );
473 } else {
474 $this->mNewRev = Revision::newFromTitle( $this->mTitle );
475 }
476
477 if( is_null( $this->mNewRev ) ) {
478 return false;
479 }
480
481 // Set assorted variables
482 if( $this->mNewRev->isCurrent() ) {
483 $this->mPagetitle = htmlspecialchars( wfMsg( 'currentrev' ) );
484 $this->mNewPage = $this->mTitle;
485 $newLink = $this->mNewPage->escapeLocalUrl();
486 $this->mNewtitle = "<a href='$newLink'>{$this->mPagetitle}</a>";
487 } else {
488 $this->mNewPage = $this->mNewRev->getTitle();
489 $newLink = $this->mNewPage->escapeLocalUrl ('oldid=' . $this->mNewid );
490 $t = $wgLang->timeanddate( $this->mNewRev->getTimestamp(), true );
491 $this->mPagetitle = htmlspecialchars( wfMsg( 'revisionasof', $t ) );
492 $this->mNewtitle = "<a href='$newLink'>{$this->mPagetitle}</a>";
493 }
494
495 $this->mNewUser = $this->mNewRev->getUserText();
496 $this->mNewComment = $this->mNewRev->getComment();
497
498 // Load the old revision object
499 $this->mOldRev = false;
500 if( $this->mOldid ) {
501 $this->mOldRev = Revision::newFromId( $this->mOldid );
502 } elseif ( $this->mOldid === 0 ) {
503 $rev = $this->mNewRev->getPrevious();
504 if( $rev ) {
505 $this->mOldid = $rev->getId();
506 $this->mOldRev = $rev;
507 } else {
508 // No previous revision; mark to show as first-version only.
509 $this->mOldid = false;
510 $this->mOldRev = false;
511 }
512 }/* elseif ( $this->mOldid === false ) leave mOldRev false; */
513
514 if( is_null( $this->mOldRev ) ) {
515 return false;
516 }
517
518 if ( $this->mOldRev ) {
519 $this->mOldPage = $this->mOldRev->getTitle();
520
521 $t = $wgLang->timeanddate( $this->mOldRev->getTimestamp(), true );
522 $oldLink = $this->mOldPage->escapeLocalUrl( 'oldid=' . $this->mOldid );
523 $this->mOldtitle = "<a href='$oldLink'>" . htmlspecialchars( wfMsg( 'revisionasof', $t ) ) . '</a>';
524
525
526 $this->mOldUser = $this->mOldRev->getUserText();
527 $this->mOldComment = $this->mOldRev->getComment();
528 }
529
530 return true;
531 }
532
533 /**
534 * Load the text of the revisions, as well as revision data.
535 */
536 function loadText() {
537 if ( $this->mTextLoaded == 2 ) {
538 return true;
539 } else {
540 // Whether it succeeds or fails, we don't want to try again
541 $this->mTextLoaded = 2;
542 }
543
544 if ( !$this->loadRevisionData() ) {
545 return false;
546 }
547 if ( $this->mOldRev ) {
548 $this->mOldtext = $this->mOldRev->getText();
549 if ( $this->mOldtext === false ) {
550 return false;
551 }
552 }
553 if ( $this->mNewRev ) {
554 $this->mNewtext = $this->mNewRev->getText();
555 if ( $this->mNewtext === false ) {
556 return false;
557 }
558 }
559 return true;
560 }
561
562 /**
563 * Load the text of the new revision, not the old one
564 */
565 function loadNewText() {
566 if ( $this->mTextLoaded >= 1 ) {
567 return true;
568 } else {
569 $this->mTextLoaded = 1;
570 }
571 if ( !$this->loadRevisionData() ) {
572 return false;
573 }
574 $this->mNewtext = $this->mNewRev->getText();
575 return true;
576 }
577
578
579 }
580
581 // A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
582 //
583 // Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
584 // You may copy this code freely under the conditions of the GPL.
585 //
586
587 define('USE_ASSERTS', function_exists('assert'));
588
589 /**
590 * @todo document
591 * @access private
592 * @package MediaWiki
593 * @subpackage DifferenceEngine
594 */
595 class _DiffOp {
596 var $type;
597 var $orig;
598 var $closing;
599
600 function reverse() {
601 trigger_error('pure virtual', E_USER_ERROR);
602 }
603
604 function norig() {
605 return $this->orig ? sizeof($this->orig) : 0;
606 }
607
608 function nclosing() {
609 return $this->closing ? sizeof($this->closing) : 0;
610 }
611 }
612
613 /**
614 * @todo document
615 * @access private
616 * @package MediaWiki
617 * @subpackage DifferenceEngine
618 */
619 class _DiffOp_Copy extends _DiffOp {
620 var $type = 'copy';
621
622 function _DiffOp_Copy ($orig, $closing = false) {
623 if (!is_array($closing))
624 $closing = $orig;
625 $this->orig = $orig;
626 $this->closing = $closing;
627 }
628
629 function reverse() {
630 return new _DiffOp_Copy($this->closing, $this->orig);
631 }
632 }
633
634 /**
635 * @todo document
636 * @access private
637 * @package MediaWiki
638 * @subpackage DifferenceEngine
639 */
640 class _DiffOp_Delete extends _DiffOp {
641 var $type = 'delete';
642
643 function _DiffOp_Delete ($lines) {
644 $this->orig = $lines;
645 $this->closing = false;
646 }
647
648 function reverse() {
649 return new _DiffOp_Add($this->orig);
650 }
651 }
652
653 /**
654 * @todo document
655 * @access private
656 * @package MediaWiki
657 * @subpackage DifferenceEngine
658 */
659 class _DiffOp_Add extends _DiffOp {
660 var $type = 'add';
661
662 function _DiffOp_Add ($lines) {
663 $this->closing = $lines;
664 $this->orig = false;
665 }
666
667 function reverse() {
668 return new _DiffOp_Delete($this->closing);
669 }
670 }
671
672 /**
673 * @todo document
674 * @access private
675 * @package MediaWiki
676 * @subpackage DifferenceEngine
677 */
678 class _DiffOp_Change extends _DiffOp {
679 var $type = 'change';
680
681 function _DiffOp_Change ($orig, $closing) {
682 $this->orig = $orig;
683 $this->closing = $closing;
684 }
685
686 function reverse() {
687 return new _DiffOp_Change($this->closing, $this->orig);
688 }
689 }
690
691
692 /**
693 * Class used internally by Diff to actually compute the diffs.
694 *
695 * The algorithm used here is mostly lifted from the perl module
696 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
697 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
698 *
699 * More ideas are taken from:
700 * http://www.ics.uci.edu/~eppstein/161/960229.html
701 *
702 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
703 * diffutils-2.7, which can be found at:
704 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
705 *
706 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
707 * are my own.
708 *
709 * Line length limits for robustness added by Tim Starling, 2005-08-31
710 *
711 * @author Geoffrey T. Dairiki, Tim Starling
712 * @access private
713 * @package MediaWiki
714 * @subpackage DifferenceEngine
715 */
716 class _DiffEngine
717 {
718 function diff ($from_lines, $to_lines) {
719 $fname = '_DiffEngine::diff';
720 wfProfileIn( $fname );
721
722 $n_from = sizeof($from_lines);
723 $n_to = sizeof($to_lines);
724
725 $this->xchanged = $this->ychanged = array();
726 $this->xv = $this->yv = array();
727 $this->xind = $this->yind = array();
728 unset($this->seq);
729 unset($this->in_seq);
730 unset($this->lcs);
731
732 // Skip leading common lines.
733 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
734 if ($from_lines[$skip] !== $to_lines[$skip])
735 break;
736 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
737 }
738 // Skip trailing common lines.
739 $xi = $n_from; $yi = $n_to;
740 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
741 if ($from_lines[$xi] !== $to_lines[$yi])
742 break;
743 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
744 }
745
746 // Ignore lines which do not exist in both files.
747 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
748 $xhash[$this->_line_hash($from_lines[$xi])] = 1;
749 }
750
751 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
752 $line = $to_lines[$yi];
753 if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
754 continue;
755 $yhash[$this->_line_hash($line)] = 1;
756 $this->yv[] = $line;
757 $this->yind[] = $yi;
758 }
759 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
760 $line = $from_lines[$xi];
761 if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
762 continue;
763 $this->xv[] = $line;
764 $this->xind[] = $xi;
765 }
766
767 // Find the LCS.
768 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
769
770 // Merge edits when possible
771 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
772 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
773
774 // Compute the edit operations.
775 $edits = array();
776 $xi = $yi = 0;
777 while ($xi < $n_from || $yi < $n_to) {
778 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
779 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
780
781 // Skip matching "snake".
782 $copy = array();
783 while ( $xi < $n_from && $yi < $n_to
784 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
785 $copy[] = $from_lines[$xi++];
786 ++$yi;
787 }
788 if ($copy)
789 $edits[] = new _DiffOp_Copy($copy);
790
791 // Find deletes & adds.
792 $delete = array();
793 while ($xi < $n_from && $this->xchanged[$xi])
794 $delete[] = $from_lines[$xi++];
795
796 $add = array();
797 while ($yi < $n_to && $this->ychanged[$yi])
798 $add[] = $to_lines[$yi++];
799
800 if ($delete && $add)
801 $edits[] = new _DiffOp_Change($delete, $add);
802 elseif ($delete)
803 $edits[] = new _DiffOp_Delete($delete);
804 elseif ($add)
805 $edits[] = new _DiffOp_Add($add);
806 }
807 wfProfileOut( $fname );
808 return $edits;
809 }
810
811 /**
812 * Returns the whole line if it's small enough, or the MD5 hash otherwise
813 */
814 function _line_hash( $line ) {
815 if ( strlen( $line ) > MAX_DIFF_XREF_LENGTH ) {
816 return md5( $line );
817 } else {
818 return $line;
819 }
820 }
821
822
823 /* Divide the Largest Common Subsequence (LCS) of the sequences
824 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
825 * sized segments.
826 *
827 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
828 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
829 * sub sequences. The first sub-sequence is contained in [X0, X1),
830 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
831 * that (X0, Y0) == (XOFF, YOFF) and
832 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
833 *
834 * This function assumes that the first lines of the specified portions
835 * of the two files do not match, and likewise that the last lines do not
836 * match. The caller must trim matching lines from the beginning and end
837 * of the portions it is going to specify.
838 */
839 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
840 $fname = '_DiffEngine::_diag';
841 wfProfileIn( $fname );
842 $flip = false;
843
844 if ($xlim - $xoff > $ylim - $yoff) {
845 // Things seems faster (I'm not sure I understand why)
846 // when the shortest sequence in X.
847 $flip = true;
848 list ($xoff, $xlim, $yoff, $ylim)
849 = array( $yoff, $ylim, $xoff, $xlim);
850 }
851
852 if ($flip)
853 for ($i = $ylim - 1; $i >= $yoff; $i--)
854 $ymatches[$this->xv[$i]][] = $i;
855 else
856 for ($i = $ylim - 1; $i >= $yoff; $i--)
857 $ymatches[$this->yv[$i]][] = $i;
858
859 $this->lcs = 0;
860 $this->seq[0]= $yoff - 1;
861 $this->in_seq = array();
862 $ymids[0] = array();
863
864 $numer = $xlim - $xoff + $nchunks - 1;
865 $x = $xoff;
866 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
867 wfProfileIn( "$fname-chunk" );
868 if ($chunk > 0)
869 for ($i = 0; $i <= $this->lcs; $i++)
870 $ymids[$i][$chunk-1] = $this->seq[$i];
871
872 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
873 for ( ; $x < $x1; $x++) {
874 $line = $flip ? $this->yv[$x] : $this->xv[$x];
875 if (empty($ymatches[$line]))
876 continue;
877 $matches = $ymatches[$line];
878 reset($matches);
879 while (list ($junk, $y) = each($matches))
880 if (empty($this->in_seq[$y])) {
881 $k = $this->_lcs_pos($y);
882 USE_ASSERTS && assert($k > 0);
883 $ymids[$k] = $ymids[$k-1];
884 break;
885 }
886 while (list ($junk, $y) = each($matches)) {
887 if ($y > $this->seq[$k-1]) {
888 USE_ASSERTS && assert($y < $this->seq[$k]);
889 // Optimization: this is a common case:
890 // next match is just replacing previous match.
891 $this->in_seq[$this->seq[$k]] = false;
892 $this->seq[$k] = $y;
893 $this->in_seq[$y] = 1;
894 } else if (empty($this->in_seq[$y])) {
895 $k = $this->_lcs_pos($y);
896 USE_ASSERTS && assert($k > 0);
897 $ymids[$k] = $ymids[$k-1];
898 }
899 }
900 }
901 wfProfileOut( "$fname-chunk" );
902 }
903
904 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
905 $ymid = $ymids[$this->lcs];
906 for ($n = 0; $n < $nchunks - 1; $n++) {
907 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
908 $y1 = $ymid[$n] + 1;
909 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
910 }
911 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
912
913 wfProfileOut( $fname );
914 return array($this->lcs, $seps);
915 }
916
917 function _lcs_pos ($ypos) {
918 $fname = '_DiffEngine::_lcs_pos';
919 wfProfileIn( $fname );
920
921 $end = $this->lcs;
922 if ($end == 0 || $ypos > $this->seq[$end]) {
923 $this->seq[++$this->lcs] = $ypos;
924 $this->in_seq[$ypos] = 1;
925 wfProfileOut( $fname );
926 return $this->lcs;
927 }
928
929 $beg = 1;
930 while ($beg < $end) {
931 $mid = (int)(($beg + $end) / 2);
932 if ( $ypos > $this->seq[$mid] )
933 $beg = $mid + 1;
934 else
935 $end = $mid;
936 }
937
938 USE_ASSERTS && assert($ypos != $this->seq[$end]);
939
940 $this->in_seq[$this->seq[$end]] = false;
941 $this->seq[$end] = $ypos;
942 $this->in_seq[$ypos] = 1;
943 wfProfileOut( $fname );
944 return $end;
945 }
946
947 /* Find LCS of two sequences.
948 *
949 * The results are recorded in the vectors $this->{x,y}changed[], by
950 * storing a 1 in the element for each line that is an insertion
951 * or deletion (ie. is not in the LCS).
952 *
953 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
954 *
955 * Note that XLIM, YLIM are exclusive bounds.
956 * All line numbers are origin-0 and discarded lines are not counted.
957 */
958 function _compareseq ($xoff, $xlim, $yoff, $ylim) {
959 $fname = '_DiffEngine::_compareseq';
960 wfProfileIn( $fname );
961
962 // Slide down the bottom initial diagonal.
963 while ($xoff < $xlim && $yoff < $ylim
964 && $this->xv[$xoff] == $this->yv[$yoff]) {
965 ++$xoff;
966 ++$yoff;
967 }
968
969 // Slide up the top initial diagonal.
970 while ($xlim > $xoff && $ylim > $yoff
971 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
972 --$xlim;
973 --$ylim;
974 }
975
976 if ($xoff == $xlim || $yoff == $ylim)
977 $lcs = 0;
978 else {
979 // This is ad hoc but seems to work well.
980 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
981 //$nchunks = max(2,min(8,(int)$nchunks));
982 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
983 list ($lcs, $seps)
984 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
985 }
986
987 if ($lcs == 0) {
988 // X and Y sequences have no common subsequence:
989 // mark all changed.
990 while ($yoff < $ylim)
991 $this->ychanged[$this->yind[$yoff++]] = 1;
992 while ($xoff < $xlim)
993 $this->xchanged[$this->xind[$xoff++]] = 1;
994 } else {
995 // Use the partitions to split this problem into subproblems.
996 reset($seps);
997 $pt1 = $seps[0];
998 while ($pt2 = next($seps)) {
999 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
1000 $pt1 = $pt2;
1001 }
1002 }
1003 wfProfileOut( $fname );
1004 }
1005
1006 /* Adjust inserts/deletes of identical lines to join changes
1007 * as much as possible.
1008 *
1009 * We do something when a run of changed lines include a
1010 * line at one end and has an excluded, identical line at the other.
1011 * We are free to choose which identical line is included.
1012 * `compareseq' usually chooses the one at the beginning,
1013 * but usually it is cleaner to consider the following identical line
1014 * to be the "change".
1015 *
1016 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
1017 */
1018 function _shift_boundaries ($lines, &$changed, $other_changed) {
1019 $fname = '_DiffEngine::_shift_boundaries';
1020 wfProfileIn( $fname );
1021 $i = 0;
1022 $j = 0;
1023
1024 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
1025 $len = sizeof($lines);
1026 $other_len = sizeof($other_changed);
1027
1028 while (1) {
1029 /*
1030 * Scan forwards to find beginning of another run of changes.
1031 * Also keep track of the corresponding point in the other file.
1032 *
1033 * Throughout this code, $i and $j are adjusted together so that
1034 * the first $i elements of $changed and the first $j elements
1035 * of $other_changed both contain the same number of zeros
1036 * (unchanged lines).
1037 * Furthermore, $j is always kept so that $j == $other_len or
1038 * $other_changed[$j] == false.
1039 */
1040 while ($j < $other_len && $other_changed[$j])
1041 $j++;
1042
1043 while ($i < $len && ! $changed[$i]) {
1044 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1045 $i++; $j++;
1046 while ($j < $other_len && $other_changed[$j])
1047 $j++;
1048 }
1049
1050 if ($i == $len)
1051 break;
1052
1053 $start = $i;
1054
1055 // Find the end of this run of changes.
1056 while (++$i < $len && $changed[$i])
1057 continue;
1058
1059 do {
1060 /*
1061 * Record the length of this run of changes, so that
1062 * we can later determine whether the run has grown.
1063 */
1064 $runlength = $i - $start;
1065
1066 /*
1067 * Move the changed region back, so long as the
1068 * previous unchanged line matches the last changed one.
1069 * This merges with previous changed regions.
1070 */
1071 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
1072 $changed[--$start] = 1;
1073 $changed[--$i] = false;
1074 while ($start > 0 && $changed[$start - 1])
1075 $start--;
1076 USE_ASSERTS && assert('$j > 0');
1077 while ($other_changed[--$j])
1078 continue;
1079 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1080 }
1081
1082 /*
1083 * Set CORRESPONDING to the end of the changed run, at the last
1084 * point where it corresponds to a changed run in the other file.
1085 * CORRESPONDING == LEN means no such point has been found.
1086 */
1087 $corresponding = $j < $other_len ? $i : $len;
1088
1089 /*
1090 * Move the changed region forward, so long as the
1091 * first changed line matches the following unchanged one.
1092 * This merges with following changed regions.
1093 * Do this second, so that if there are no merges,
1094 * the changed region is moved forward as far as possible.
1095 */
1096 while ($i < $len && $lines[$start] == $lines[$i]) {
1097 $changed[$start++] = false;
1098 $changed[$i++] = 1;
1099 while ($i < $len && $changed[$i])
1100 $i++;
1101
1102 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1103 $j++;
1104 if ($j < $other_len && $other_changed[$j]) {
1105 $corresponding = $i;
1106 while ($j < $other_len && $other_changed[$j])
1107 $j++;
1108 }
1109 }
1110 } while ($runlength != $i - $start);
1111
1112 /*
1113 * If possible, move the fully-merged run of changes
1114 * back to a corresponding run in the other file.
1115 */
1116 while ($corresponding < $i) {
1117 $changed[--$start] = 1;
1118 $changed[--$i] = 0;
1119 USE_ASSERTS && assert('$j > 0');
1120 while ($other_changed[--$j])
1121 continue;
1122 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1123 }
1124 }
1125 wfProfileOut( $fname );
1126 }
1127 }
1128
1129 /**
1130 * Class representing a 'diff' between two sequences of strings.
1131 * @todo document
1132 * @access private
1133 * @package MediaWiki
1134 * @subpackage DifferenceEngine
1135 */
1136 class Diff
1137 {
1138 var $edits;
1139
1140 /**
1141 * Constructor.
1142 * Computes diff between sequences of strings.
1143 *
1144 * @param $from_lines array An array of strings.
1145 * (Typically these are lines from a file.)
1146 * @param $to_lines array An array of strings.
1147 */
1148 function Diff($from_lines, $to_lines) {
1149 $eng = new _DiffEngine;
1150 $this->edits = $eng->diff($from_lines, $to_lines);
1151 //$this->_check($from_lines, $to_lines);
1152 }
1153
1154 /**
1155 * Compute reversed Diff.
1156 *
1157 * SYNOPSIS:
1158 *
1159 * $diff = new Diff($lines1, $lines2);
1160 * $rev = $diff->reverse();
1161 * @return object A Diff object representing the inverse of the
1162 * original diff.
1163 */
1164 function reverse () {
1165 $rev = $this;
1166 $rev->edits = array();
1167 foreach ($this->edits as $edit) {
1168 $rev->edits[] = $edit->reverse();
1169 }
1170 return $rev;
1171 }
1172
1173 /**
1174 * Check for empty diff.
1175 *
1176 * @return bool True iff two sequences were identical.
1177 */
1178 function isEmpty () {
1179 foreach ($this->edits as $edit) {
1180 if ($edit->type != 'copy')
1181 return false;
1182 }
1183 return true;
1184 }
1185
1186 /**
1187 * Compute the length of the Longest Common Subsequence (LCS).
1188 *
1189 * This is mostly for diagnostic purposed.
1190 *
1191 * @return int The length of the LCS.
1192 */
1193 function lcs () {
1194 $lcs = 0;
1195 foreach ($this->edits as $edit) {
1196 if ($edit->type == 'copy')
1197 $lcs += sizeof($edit->orig);
1198 }
1199 return $lcs;
1200 }
1201
1202 /**
1203 * Get the original set of lines.
1204 *
1205 * This reconstructs the $from_lines parameter passed to the
1206 * constructor.
1207 *
1208 * @return array The original sequence of strings.
1209 */
1210 function orig() {
1211 $lines = array();
1212
1213 foreach ($this->edits as $edit) {
1214 if ($edit->orig)
1215 array_splice($lines, sizeof($lines), 0, $edit->orig);
1216 }
1217 return $lines;
1218 }
1219
1220 /**
1221 * Get the closing set of lines.
1222 *
1223 * This reconstructs the $to_lines parameter passed to the
1224 * constructor.
1225 *
1226 * @return array The sequence of strings.
1227 */
1228 function closing() {
1229 $lines = array();
1230
1231 foreach ($this->edits as $edit) {
1232 if ($edit->closing)
1233 array_splice($lines, sizeof($lines), 0, $edit->closing);
1234 }
1235 return $lines;
1236 }
1237
1238 /**
1239 * Check a Diff for validity.
1240 *
1241 * This is here only for debugging purposes.
1242 */
1243 function _check ($from_lines, $to_lines) {
1244 $fname = 'Diff::_check';
1245 wfProfileIn( $fname );
1246 if (serialize($from_lines) != serialize($this->orig()))
1247 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
1248 if (serialize($to_lines) != serialize($this->closing()))
1249 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
1250
1251 $rev = $this->reverse();
1252 if (serialize($to_lines) != serialize($rev->orig()))
1253 trigger_error("Reversed original doesn't match", E_USER_ERROR);
1254 if (serialize($from_lines) != serialize($rev->closing()))
1255 trigger_error("Reversed closing doesn't match", E_USER_ERROR);
1256
1257
1258 $prevtype = 'none';
1259 foreach ($this->edits as $edit) {
1260 if ( $prevtype == $edit->type )
1261 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
1262 $prevtype = $edit->type;
1263 }
1264
1265 $lcs = $this->lcs();
1266 trigger_error('Diff okay: LCS = '.$lcs, E_USER_NOTICE);
1267 wfProfileOut( $fname );
1268 }
1269 }
1270
1271 /**
1272 * FIXME: bad name.
1273 * @todo document
1274 * @access private
1275 * @package MediaWiki
1276 * @subpackage DifferenceEngine
1277 */
1278 class MappedDiff extends Diff
1279 {
1280 /**
1281 * Constructor.
1282 *
1283 * Computes diff between sequences of strings.
1284 *
1285 * This can be used to compute things like
1286 * case-insensitve diffs, or diffs which ignore
1287 * changes in white-space.
1288 *
1289 * @param $from_lines array An array of strings.
1290 * (Typically these are lines from a file.)
1291 *
1292 * @param $to_lines array An array of strings.
1293 *
1294 * @param $mapped_from_lines array This array should
1295 * have the same size number of elements as $from_lines.
1296 * The elements in $mapped_from_lines and
1297 * $mapped_to_lines are what is actually compared
1298 * when computing the diff.
1299 *
1300 * @param $mapped_to_lines array This array should
1301 * have the same number of elements as $to_lines.
1302 */
1303 function MappedDiff($from_lines, $to_lines,
1304 $mapped_from_lines, $mapped_to_lines) {
1305 $fname = 'MappedDiff::MappedDiff';
1306 wfProfileIn( $fname );
1307
1308 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
1309 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
1310
1311 $this->Diff($mapped_from_lines, $mapped_to_lines);
1312
1313 $xi = $yi = 0;
1314 for ($i = 0; $i < sizeof($this->edits); $i++) {
1315 $orig = &$this->edits[$i]->orig;
1316 if (is_array($orig)) {
1317 $orig = array_slice($from_lines, $xi, sizeof($orig));
1318 $xi += sizeof($orig);
1319 }
1320
1321 $closing = &$this->edits[$i]->closing;
1322 if (is_array($closing)) {
1323 $closing = array_slice($to_lines, $yi, sizeof($closing));
1324 $yi += sizeof($closing);
1325 }
1326 }
1327 wfProfileOut( $fname );
1328 }
1329 }
1330
1331 /**
1332 * A class to format Diffs
1333 *
1334 * This class formats the diff in classic diff format.
1335 * It is intended that this class be customized via inheritance,
1336 * to obtain fancier outputs.
1337 * @todo document
1338 * @access private
1339 * @package MediaWiki
1340 * @subpackage DifferenceEngine
1341 */
1342 class DiffFormatter
1343 {
1344 /**
1345 * Number of leading context "lines" to preserve.
1346 *
1347 * This should be left at zero for this class, but subclasses
1348 * may want to set this to other values.
1349 */
1350 var $leading_context_lines = 0;
1351
1352 /**
1353 * Number of trailing context "lines" to preserve.
1354 *
1355 * This should be left at zero for this class, but subclasses
1356 * may want to set this to other values.
1357 */
1358 var $trailing_context_lines = 0;
1359
1360 /**
1361 * Format a diff.
1362 *
1363 * @param $diff object A Diff object.
1364 * @return string The formatted output.
1365 */
1366 function format($diff) {
1367 $fname = 'DiffFormatter::format';
1368 wfProfileIn( $fname );
1369
1370 $xi = $yi = 1;
1371 $block = false;
1372 $context = array();
1373
1374 $nlead = $this->leading_context_lines;
1375 $ntrail = $this->trailing_context_lines;
1376
1377 $this->_start_diff();
1378
1379 foreach ($diff->edits as $edit) {
1380 if ($edit->type == 'copy') {
1381 if (is_array($block)) {
1382 if (sizeof($edit->orig) <= $nlead + $ntrail) {
1383 $block[] = $edit;
1384 }
1385 else{
1386 if ($ntrail) {
1387 $context = array_slice($edit->orig, 0, $ntrail);
1388 $block[] = new _DiffOp_Copy($context);
1389 }
1390 $this->_block($x0, $ntrail + $xi - $x0,
1391 $y0, $ntrail + $yi - $y0,
1392 $block);
1393 $block = false;
1394 }
1395 }
1396 $context = $edit->orig;
1397 }
1398 else {
1399 if (! is_array($block)) {
1400 $context = array_slice($context, sizeof($context) - $nlead);
1401 $x0 = $xi - sizeof($context);
1402 $y0 = $yi - sizeof($context);
1403 $block = array();
1404 if ($context)
1405 $block[] = new _DiffOp_Copy($context);
1406 }
1407 $block[] = $edit;
1408 }
1409
1410 if ($edit->orig)
1411 $xi += sizeof($edit->orig);
1412 if ($edit->closing)
1413 $yi += sizeof($edit->closing);
1414 }
1415
1416 if (is_array($block))
1417 $this->_block($x0, $xi - $x0,
1418 $y0, $yi - $y0,
1419 $block);
1420
1421 $end = $this->_end_diff();
1422 wfProfileOut( $fname );
1423 return $end;
1424 }
1425
1426 function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
1427 $fname = 'DiffFormatter::_block';
1428 wfProfileIn( $fname );
1429 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
1430 foreach ($edits as $edit) {
1431 if ($edit->type == 'copy')
1432 $this->_context($edit->orig);
1433 elseif ($edit->type == 'add')
1434 $this->_added($edit->closing);
1435 elseif ($edit->type == 'delete')
1436 $this->_deleted($edit->orig);
1437 elseif ($edit->type == 'change')
1438 $this->_changed($edit->orig, $edit->closing);
1439 else
1440 trigger_error('Unknown edit type', E_USER_ERROR);
1441 }
1442 $this->_end_block();
1443 wfProfileOut( $fname );
1444 }
1445
1446 function _start_diff() {
1447 ob_start();
1448 }
1449
1450 function _end_diff() {
1451 $val = ob_get_contents();
1452 ob_end_clean();
1453 return $val;
1454 }
1455
1456 function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1457 if ($xlen > 1)
1458 $xbeg .= "," . ($xbeg + $xlen - 1);
1459 if ($ylen > 1)
1460 $ybeg .= "," . ($ybeg + $ylen - 1);
1461
1462 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
1463 }
1464
1465 function _start_block($header) {
1466 echo $header;
1467 }
1468
1469 function _end_block() {
1470 }
1471
1472 function _lines($lines, $prefix = ' ') {
1473 foreach ($lines as $line)
1474 echo "$prefix $line\n";
1475 }
1476
1477 function _context($lines) {
1478 $this->_lines($lines);
1479 }
1480
1481 function _added($lines) {
1482 $this->_lines($lines, '>');
1483 }
1484 function _deleted($lines) {
1485 $this->_lines($lines, '<');
1486 }
1487
1488 function _changed($orig, $closing) {
1489 $this->_deleted($orig);
1490 echo "---\n";
1491 $this->_added($closing);
1492 }
1493 }
1494
1495
1496 /**
1497 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
1498 *
1499 */
1500
1501 define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1502
1503 /**
1504 * @todo document
1505 * @access private
1506 * @package MediaWiki
1507 * @subpackage DifferenceEngine
1508 */
1509 class _HWLDF_WordAccumulator {
1510 function _HWLDF_WordAccumulator () {
1511 $this->_lines = array();
1512 $this->_line = '';
1513 $this->_group = '';
1514 $this->_tag = '';
1515 }
1516
1517 function _flushGroup ($new_tag) {
1518 if ($this->_group !== '') {
1519 if ($this->_tag == 'mark')
1520 $this->_line .= '<span class="diffchange">' .
1521 htmlspecialchars ( $this->_group ) . '</span>';
1522 else
1523 $this->_line .= htmlspecialchars ( $this->_group );
1524 }
1525 $this->_group = '';
1526 $this->_tag = $new_tag;
1527 }
1528
1529 function _flushLine ($new_tag) {
1530 $this->_flushGroup($new_tag);
1531 if ($this->_line != '')
1532 array_push ( $this->_lines, $this->_line );
1533 else
1534 # make empty lines visible by inserting an NBSP
1535 array_push ( $this->_lines, NBSP );
1536 $this->_line = '';
1537 }
1538
1539 function addWords ($words, $tag = '') {
1540 if ($tag != $this->_tag)
1541 $this->_flushGroup($tag);
1542
1543 foreach ($words as $word) {
1544 // new-line should only come as first char of word.
1545 if ($word == '')
1546 continue;
1547 if ($word[0] == "\n") {
1548 $this->_flushLine($tag);
1549 $word = substr($word, 1);
1550 }
1551 assert(!strstr($word, "\n"));
1552 $this->_group .= $word;
1553 }
1554 }
1555
1556 function getLines() {
1557 $this->_flushLine('~done');
1558 return $this->_lines;
1559 }
1560 }
1561
1562 /**
1563 * @todo document
1564 * @access private
1565 * @package MediaWiki
1566 * @subpackage DifferenceEngine
1567 */
1568 class WordLevelDiff extends MappedDiff
1569 {
1570 function WordLevelDiff ($orig_lines, $closing_lines) {
1571 $fname = 'WordLevelDiff::WordLevelDiff';
1572 wfProfileIn( $fname );
1573
1574 list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1575 list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1576
1577 $this->MappedDiff($orig_words, $closing_words,
1578 $orig_stripped, $closing_stripped);
1579 wfProfileOut( $fname );
1580 }
1581
1582 function _split($lines) {
1583 $fname = 'WordLevelDiff::_split';
1584 wfProfileIn( $fname );
1585
1586 $words = array();
1587 $stripped = array();
1588 $first = true;
1589 foreach ( $lines as $line ) {
1590 # If the line is too long, just pretend the entire line is one big word
1591 # This prevents resource exhaustion problems
1592 if ( $first ) {
1593 $first = false;
1594 } else {
1595 $words[] = "\n";
1596 $stripped[] = "\n";
1597 }
1598 if ( strlen( $line ) > MAX_DIFF_LINE ) {
1599 $words[] = $line;
1600 $stripped[] = $line;
1601 } else {
1602 if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1603 $line, $m))
1604 {
1605 $words = array_merge( $words, $m[0] );
1606 $stripped = array_merge( $stripped, $m[1] );
1607 }
1608 }
1609 }
1610 wfProfileOut( $fname );
1611 return array($words, $stripped);
1612 }
1613
1614 function orig () {
1615 $fname = 'WordLevelDiff::orig';
1616 wfProfileIn( $fname );
1617 $orig = new _HWLDF_WordAccumulator;
1618
1619 foreach ($this->edits as $edit) {
1620 if ($edit->type == 'copy')
1621 $orig->addWords($edit->orig);
1622 elseif ($edit->orig)
1623 $orig->addWords($edit->orig, 'mark');
1624 }
1625 $lines = $orig->getLines();
1626 wfProfileOut( $fname );
1627 return $lines;
1628 }
1629
1630 function closing () {
1631 $fname = 'WordLevelDiff::closing';
1632 wfProfileIn( $fname );
1633 $closing = new _HWLDF_WordAccumulator;
1634
1635 foreach ($this->edits as $edit) {
1636 if ($edit->type == 'copy')
1637 $closing->addWords($edit->closing);
1638 elseif ($edit->closing)
1639 $closing->addWords($edit->closing, 'mark');
1640 }
1641 $lines = $closing->getLines();
1642 wfProfileOut( $fname );
1643 return $lines;
1644 }
1645 }
1646
1647 /**
1648 * Wikipedia Table style diff formatter.
1649 * @todo document
1650 * @access private
1651 * @package MediaWiki
1652 * @subpackage DifferenceEngine
1653 */
1654 class TableDiffFormatter extends DiffFormatter
1655 {
1656 function TableDiffFormatter() {
1657 $this->leading_context_lines = 2;
1658 $this->trailing_context_lines = 2;
1659 }
1660
1661 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
1662 $r = '<tr><td colspan="2" align="left"><strong><!--LINE '.$xbeg."--></strong></td>\n" .
1663 '<td colspan="2" align="left"><strong><!--LINE '.$ybeg."--></strong></td></tr>\n";
1664 return $r;
1665 }
1666
1667 function _start_block( $header ) {
1668 global $wgOut;
1669 echo $header;
1670 }
1671
1672 function _end_block() {
1673 }
1674
1675 function _lines( $lines, $prefix=' ', $color='white' ) {
1676 }
1677
1678 # HTML-escape parameter before calling this
1679 function addedLine( $line ) {
1680 return "<td>+</td><td class='diff-addedline'>{$line}</td>";
1681 }
1682
1683 # HTML-escape parameter before calling this
1684 function deletedLine( $line ) {
1685 return "<td>-</td><td class='diff-deletedline'>{$line}</td>";
1686 }
1687
1688 # HTML-escape parameter before calling this
1689 function contextLine( $line ) {
1690 return "<td> </td><td class='diff-context'>{$line}</td>";
1691 }
1692
1693 function emptyLine() {
1694 return '<td colspan="2">&nbsp;</td>';
1695 }
1696
1697 function _added( $lines ) {
1698 foreach ($lines as $line) {
1699 echo '<tr>' . $this->emptyLine() .
1700 $this->addedLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1701 }
1702 }
1703
1704 function _deleted($lines) {
1705 foreach ($lines as $line) {
1706 echo '<tr>' . $this->deletedLine( htmlspecialchars ( $line ) ) .
1707 $this->emptyLine() . "</tr>\n";
1708 }
1709 }
1710
1711 function _context( $lines ) {
1712 foreach ($lines as $line) {
1713 echo '<tr>' .
1714 $this->contextLine( htmlspecialchars ( $line ) ) .
1715 $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1716 }
1717 }
1718
1719 function _changed( $orig, $closing ) {
1720 $fname = 'TableDiffFormatter::_changed';
1721 wfProfileIn( $fname );
1722
1723 $diff = new WordLevelDiff( $orig, $closing );
1724 $del = $diff->orig();
1725 $add = $diff->closing();
1726
1727 # Notice that WordLevelDiff returns HTML-escaped output.
1728 # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1729
1730 while ( $line = array_shift( $del ) ) {
1731 $aline = array_shift( $add );
1732 echo '<tr>' . $this->deletedLine( $line ) .
1733 $this->addedLine( $aline ) . "</tr>\n";
1734 }
1735 foreach ($add as $line) { # If any leftovers
1736 echo '<tr>' . $this->emptyLine() .
1737 $this->addedLine( $line ) . "</tr>\n";
1738 }
1739 wfProfileOut( $fname );
1740 }
1741 }
1742
1743 ?>