Live fix: profiling points for wikidiff2
[lhc/web/wiklou.git] / includes / DifferenceEngine.php
1 <?php
2 /**
3 * See diff.doc
4 * @package MediaWiki
5 * @subpackage DifferenceEngine
6 */
7
8 /** */
9 require_once( 'Revision.php' );
10
11 define( 'MAX_DIFF_LINE', 10000 );
12 define( 'MAX_DIFF_XREF_LENGTH', 10000 );
13
14 /**
15 * @todo document
16 * @access public
17 * @package MediaWiki
18 * @subpackage DifferenceEngine
19 */
20 class DifferenceEngine {
21 /**#@+
22 * @access private
23 */
24 var $mOldid, $mNewid, $mTitle;
25 var $mOldtitle, $mNewtitle, $mPagetitle;
26 var $mOldtext, $mNewtext;
27 var $mOldPage, $mNewPage;
28 var $mRcidMarkPatrolled;
29 var $mOldRev, $mNewRev;
30 var $mRevisionsLoaded = false; // Have the revisions been loaded
31 var $mTextLoaded = 0; // How many text blobs have been loaded, 0, 1 or 2?
32 /**#@-*/
33
34 /**
35 * Constructor
36 * @param Title $titleObj Title object that the diff is associated with
37 * @param integer $old Old ID we want to show and diff with.
38 * @param string $new Either 'prev' or 'next'.
39 * @param integer $rcid ??? (default 0)
40 */
41 function DifferenceEngine( $titleObj = null, $old = 0, $new = 0, $rcid = 0 ) {
42 $this->mTitle = $titleObj;
43 wfDebug("DifferenceEngine old '$old' new '$new' rcid '$rcid'\n");
44
45 if ( 'prev' === $new ) {
46 # Show diff between revision $old and the previous one.
47 # Get previous one from DB.
48 #
49 $this->mNewid = intval($old);
50
51 $this->mOldid = $this->mTitle->getPreviousRevisionID( $this->mNewid );
52
53 } elseif ( 'next' === $new ) {
54 # Show diff between revision $old and the previous one.
55 # Get previous one from DB.
56 #
57 $this->mOldid = intval($old);
58 $this->mNewid = $this->mTitle->getNextRevisionID( $this->mOldid );
59 if ( false === $this->mNewid ) {
60 # if no result, NewId points to the newest old revision. The only newer
61 # revision is cur, which is "0".
62 $this->mNewid = 0;
63 }
64
65 } else {
66 $this->mOldid = intval($old);
67 $this->mNewid = intval($new);
68 }
69 $this->mRcidMarkPatrolled = intval($rcid); # force it to be an integer
70 }
71
72 function showDiffPage() {
73 global $wgUser, $wgOut, $wgContLang, $wgUseExternalEditor, $wgUseRCPatrol;
74 $fname = 'DifferenceEngine::showDiffPage';
75 wfProfileIn( $fname );
76
77 # If external diffs are enabled both globally and for the user,
78 # we'll use the application/x-external-editor interface to call
79 # an external diff tool like kompare, kdiff3, etc.
80 if($wgUseExternalEditor && $wgUser->getOption('externaldiff')) {
81 global $wgInputEncoding,$wgServer,$wgScript,$wgLang;
82 $wgOut->disable();
83 header ( "Content-type: application/x-external-editor; charset=".$wgInputEncoding );
84 $url1=$this->mTitle->getFullURL("action=raw&oldid=".$this->mOldid);
85 $url2=$this->mTitle->getFullURL("action=raw&oldid=".$this->mNewid);
86 $special=$wgLang->getNsText(NS_SPECIAL);
87 $control=<<<CONTROL
88 [Process]
89 Type=Diff text
90 Engine=MediaWiki
91 Script={$wgServer}{$wgScript}
92 Special namespace={$special}
93
94 [File]
95 Extension=wiki
96 URL=$url1
97
98 [File 2]
99 Extension=wiki
100 URL=$url2
101 CONTROL;
102 echo($control);
103 return;
104 }
105
106 $t = $this->mTitle->getPrefixedText() . " (Diff: {$this->mOldid}, " .
107 "{$this->mNewid})";
108 $mtext = wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" );
109
110 $wgOut->setArticleFlag( false );
111 if ( ! $this->loadRevisionData() ) {
112 $wgOut->setPagetitle( wfMsg( 'errorpagetitle' ) );
113 $wgOut->addWikitext( $mtext );
114 wfProfileOut( $fname );
115 return;
116 }
117 if ( $this->mNewRev->isCurrent() ) {
118 $wgOut->setArticleFlag( true );
119 }
120
121 # mOldid is false if the difference engine is called with a "vague" query for
122 # a diff between a version V and its previous version V' AND the version V
123 # is the first version of that article. In that case, V' does not exist.
124 if ( $this->mOldid === false ) {
125 $this->showFirstRevision();
126 wfProfileOut( $fname );
127 return;
128 }
129
130 $wgOut->suppressQuickbar();
131
132 $oldTitle = $this->mOldPage->getPrefixedText();
133 $newTitle = $this->mNewPage->getPrefixedText();
134 if( $oldTitle == $newTitle ) {
135 $wgOut->setPageTitle( $newTitle );
136 } else {
137 $wgOut->setPageTitle( $oldTitle . ', ' . $newTitle );
138 }
139 $wgOut->setSubtitle( wfMsg( 'difference' ) );
140 $wgOut->setRobotpolicy( 'noindex,follow' );
141
142 if ( !( $this->mOldPage->userCanRead() && $this->mNewPage->userCanRead() ) ) {
143 $wgOut->loginToUse();
144 $wgOut->output();
145 wfProfileOut( $fname );
146 exit;
147 }
148
149 $sk = $wgUser->getSkin();
150 $talk = $wgContLang->getNsText( NS_TALK );
151 $contribs = wfMsg( 'contribslink' );
152
153 if ( $this->mNewRev->isCurrent() && $wgUser->isAllowed('rollback') ) {
154 $username = $this->mNewRev->getUserText();
155 $rollback = '&nbsp;&nbsp;&nbsp;<strong>[' . $sk->makeKnownLinkObj( $this->mTitle, wfMsg( 'rollbacklink' ),
156 'action=rollback&from=' . urlencode( $username ) .
157 '&token=' . urlencode( $wgUser->editToken( array( $this->mTitle->getPrefixedText(), $username ) ) ) ) .
158 ']</strong>';
159 } else {
160 $rollback = '';
161 }
162 if( $wgUseRCPatrol && $this->mRcidMarkPatrolled != 0 && $wgUser->isAllowed( 'patrol' ) ) {
163 $patrol = ' [' . $sk->makeKnownLinkObj( $this->mTitle, wfMsg( 'markaspatrolleddiff' ), "action=markpatrolled&rcid={$this->mRcidMarkPatrolled}" ) . ']';
164 } else {
165 $patrol = '';
166 }
167
168 $prevlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'previousdiff' ),
169 'diff=prev&oldid='.$this->mOldid, '', '', 'id="differences-prevlink"' );
170 if ( $this->mNewRev->isCurrent() ) {
171 $nextlink = '';
172 } else {
173 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ),
174 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
175 }
176
177 $oldHeader = "<strong>{$this->mOldtitle}</strong><br />" .
178 $sk->revUserTools( $this->mOldRev ) . "<br />" .
179 $sk->revComment( $this->mOldRev ) . "<br />" .
180 $prevlink;
181 $newHeader = "<strong>{$this->mNewtitle}</strong><br />" .
182 $sk->revUserTools( $this->mNewRev ) . " $rollback<br />" .
183 $sk->revComment( $this->mNewRev ) . "<br />" .
184 $nextlink . $patrol;
185
186 $this->showDiff( $oldHeader, $newHeader );
187 $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" );
188
189 if( !$this->mNewRev->isCurrent() ) {
190 $oldEditSectionSetting = $wgOut->mParserOptions->setEditSection( false );
191 }
192
193 $this->loadNewText();
194 if( is_object( $this->mNewRev ) ) {
195 $wgOut->setRevisionId( $this->mNewRev->getId() );
196 }
197 $wgOut->addSecondaryWikiText( $this->mNewtext );
198
199 if( !$this->mNewRev->isCurrent() ) {
200 $wgOut->mParserOptions->setEditSection( $oldEditSectionSetting );
201 }
202
203 wfProfileOut( $fname );
204 }
205
206 /**
207 * Show the first revision of an article. Uses normal diff headers in
208 * contrast to normal "old revision" display style.
209 */
210 function showFirstRevision() {
211 global $wgOut, $wgUser;
212
213 $fname = 'DifferenceEngine::showFirstRevision';
214 wfProfileIn( $fname );
215
216 # Get article text from the DB
217 #
218 if ( ! $this->loadNewText() ) {
219 $t = $this->mTitle->getPrefixedText() . " (Diff: {$this->mOldid}, " .
220 "{$this->mNewid})";
221 $mtext = wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" );
222 $wgOut->setPagetitle( wfMsg( 'errorpagetitle' ) );
223 $wgOut->addWikitext( $mtext );
224 wfProfileOut( $fname );
225 return;
226 }
227 if ( $this->mNewRev->isCurrent() ) {
228 $wgOut->setArticleFlag( true );
229 }
230
231 # Check if user is allowed to look at this page. If not, bail out.
232 #
233 if ( !( $this->mTitle->userCanRead() ) ) {
234 $wgOut->loginToUse();
235 $wgOut->output();
236 wfProfileOut( $fname );
237 exit;
238 }
239
240 # Prepare the header box
241 #
242 $sk = $wgUser->getSkin();
243
244 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ), 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
245 $header = "<div class=\"firstrevisionheader\" style=\"text-align: center\"><strong>{$this->mOldtitle}</strong><br />" .
246 $sk->revUserTools( $this->mNewRev ) . "<br />" .
247 $sk->revComment( $this->mNewRev ) . "<br />" .
248 $nextlink . "</div>\n";
249
250 $wgOut->addHTML( $header );
251
252 $wgOut->setSubtitle( wfMsg( 'difference' ) );
253 $wgOut->setRobotpolicy( 'noindex,nofollow' );
254
255
256 # Show current revision
257 #
258 $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" );
259 if( is_object( $this->mNewRev ) ) {
260 $wgOut->setRevisionId( $this->mNewRev->getId() );
261 }
262 $wgOut->addSecondaryWikiText( $this->mNewtext );
263
264 wfProfileOut( $fname );
265 }
266
267 /**
268 * Get the diff text, send it to $wgOut
269 * Returns false if the diff could not be generated, otherwise returns true
270 */
271 function showDiff( $otitle, $ntitle ) {
272 global $wgOut;
273 $diff = $this->getDiff( $otitle, $ntitle );
274 if ( $diff === false ) {
275 $wgOut->addWikitext( wfMsg( 'missingarticle', "<nowiki>(fixme, bug)</nowiki>" ) );
276 return false;
277 } else {
278 $wgOut->addHTML( $diff );
279 return true;
280 }
281 }
282
283 /**
284 * Get diff table, including header
285 * Note that the interface has changed, it's no longer static.
286 * Returns false on error
287 */
288 function getDiff( $otitle, $ntitle ) {
289 $body = $this->getDiffBody();
290 if ( $body === false ) {
291 return false;
292 } else {
293 return $this->addHeader( $body, $otitle, $ntitle );
294 }
295 }
296
297 /**
298 * Get the diff table body, without header
299 * Results are cached
300 * Returns false on error
301 */
302 function getDiffBody() {
303 global $wgMemc, $wgDBname;
304 $fname = 'DifferenceEngine::getDiffBody';
305 wfProfileIn( $fname );
306
307 // Cacheable?
308 $key = false;
309 if ( $this->mOldid && $this->mNewid ) {
310 // Try cache
311 $key = "$wgDBname:diff:oldid:{$this->mOldid}:newid:{$this->mNewid}";
312 $difftext = $wgMemc->get( $key );
313 if ( $difftext ) {
314 wfIncrStats( 'diff_cache_hit' );
315 $difftext = $this->localiseLineNumbers( $difftext );
316 $difftext .= "\n<!-- diff cache key $key -->\n";
317 wfProfileOut( $fname );
318 return $difftext;
319 }
320 }
321
322 if ( !$this->loadText() ) {
323 wfProfileOut( $fname );
324 return false;
325 }
326
327 $difftext = $this->generateDiffBody( $this->mOldtext, $this->mNewtext );
328
329 // Save to cache for 7 days
330 if ( $key !== false && $difftext !== false ) {
331 wfIncrStats( 'diff_cache_miss' );
332 $wgMemc->set( $key, $difftext, 7*86400 );
333 } else {
334 wfIncrStats( 'diff_uncacheable' );
335 }
336 // Replace line numbers with the text in the user's language
337 if ( $difftext !== false ) {
338 $difftext = $this->localiseLineNumbers( $difftext );
339 }
340 wfProfileOut( $fname );
341 return $difftext;
342 }
343
344 /**
345 * Generate a diff, no caching
346 * $otext and $ntext must be already segmented
347 */
348 function generateDiffBody( $otext, $ntext ) {
349 global $wgExternalDiffEngine, $wgContLang;
350 $fname = 'DifferenceEngine::generateDiffBody';
351
352 $otext = str_replace( "\r\n", "\n", $otext );
353 $ntext = str_replace( "\r\n", "\n", $ntext );
354
355 if ( $wgExternalDiffEngine == 'wikidiff' ) {
356 # For historical reasons, external diff engine expects
357 # input text to be HTML-escaped already
358 $otext = htmlspecialchars ( $wgContLang->segmentForDiff( $otext ) );
359 $ntext = htmlspecialchars ( $wgContLang->segmentForDiff( $ntext ) );
360 if( !function_exists( 'wikidiff_do_diff' ) ) {
361 dl('php_wikidiff.so');
362 }
363 return $wgContLang->unsegementForDiff( wikidiff_do_diff( $otext, $ntext, 2 ) );
364 }
365
366 if ( $wgExternalDiffEngine == 'wikidiff2' ) {
367 # Better external diff engine, the 2 may some day be dropped
368 # This one does the escaping and segmenting itself
369 if ( !function_exists( 'wikidiff2_do_diff' ) ) {
370 wfProfileIn( "$fname-dl" );
371 @dl('php_wikidiff2.so');
372 wfProfileOut( "$fname-dl" );
373 }
374 if ( function_exists( 'wikidiff2_do_diff' ) ) {
375 wfProfileIn( 'wikidiff2_do_diff' );
376 $text = wikidiff2_do_diff( $otext, $ntext, 2 );
377 wfProfileOut( 'wikidiff2_do_diff' );
378 return $text;
379 }
380 }
381 if ( $wgExternalDiffEngine !== false ) {
382 # Diff via the shell
383 global $wgTmpDirectory;
384 $tempName1 = tempnam( $wgTmpDirectory, 'diff_' );
385 $tempName2 = tempnam( $wgTmpDirectory, 'diff_' );
386
387 $tempFile1 = fopen( $tempName1, "w" );
388 if ( !$tempFile1 ) {
389 wfProfileOut( $fname );
390 return false;
391 }
392 $tempFile2 = fopen( $tempName2, "w" );
393 if ( !$tempFile2 ) {
394 wfProfileOut( $fname );
395 return false;
396 }
397 fwrite( $tempFile1, $otext );
398 fwrite( $tempFile2, $ntext );
399 fclose( $tempFile1 );
400 fclose( $tempFile2 );
401 $cmd = wfEscapeShellArg( $wgExternalDiffEngine, $tempName1, $tempName2 );
402 wfProfileIn( "$fname-shellexec" );
403 $difftext = wfShellExec( $cmd );
404 wfProfileOut( "$fname-shellexec" );
405 unlink( $tempName1 );
406 unlink( $tempName2 );
407 return $difftext;
408 }
409
410 # Native PHP diff
411 $ota = explode( "\n", $wgContLang->segmentForDiff( $otext ) );
412 $nta = explode( "\n", $wgContLang->segmentForDiff( $ntext ) );
413 $diffs =& new Diff( $ota, $nta );
414 $formatter =& new TableDiffFormatter();
415 return $wgContLang->unsegmentForDiff( $formatter->format( $diffs ) );
416 }
417
418
419 /**
420 * Replace line numbers with the text in the user's language
421 */
422 function localiseLineNumbers( $text ) {
423 return preg_replace_callback( '/<!--LINE (\d+)-->/',
424 array( &$this, 'localiseLineNumbersCb' ), $text );
425 }
426
427 function localiseLineNumbersCb( $matches ) {
428 global $wgLang;
429 return wfMsg( 'lineno', $wgLang->formatNum( $matches[1] ) );
430 }
431
432 /**
433 * Add the header to a diff body
434 */
435 function addHeader( $diff, $otitle, $ntitle ) {
436 $out = "
437 <table border='0' width='98%' cellpadding='0' cellspacing='4' class='diff'>
438 <tr>
439 <td colspan='2' width='50%' align='center' class='diff-otitle'>{$otitle}</td>
440 <td colspan='2' width='50%' align='center' class='diff-ntitle'>{$ntitle}</td>
441 </tr>
442 $diff
443 </table>
444 ";
445 return $out;
446 }
447
448 /**
449 * Use specified text instead of loading from the database
450 */
451 function setText( $oldText, $newText ) {
452 $this->mOldtext = $oldText;
453 $this->mNewtext = $newText;
454 $this->mTextLoaded = 2;
455 }
456
457 /**
458 * Load revision metadata for the specified articles. If newid is 0, then compare
459 * the old article in oldid to the current article; if oldid is 0, then
460 * compare the current article to the immediately previous one (ignoring the
461 * value of newid).
462 *
463 * If oldid is false, leave the corresponding revision object set
464 * to false. This is impossible via ordinary user input, and is provided for
465 * API convenience.
466 */
467 function loadRevisionData() {
468 global $wgLang;
469 if ( $this->mRevisionsLoaded ) {
470 return true;
471 } else {
472 // Whether it succeeds or fails, we don't want to try again
473 $this->mRevisionsLoaded = true;
474 }
475
476 // Load the new revision object
477 if( $this->mNewid ) {
478 $this->mNewRev = Revision::newFromId( $this->mNewid );
479 } else {
480 $this->mNewRev = Revision::newFromTitle( $this->mTitle );
481 }
482
483 if( is_null( $this->mNewRev ) ) {
484 return false;
485 }
486
487 // Set assorted variables
488 if( $this->mNewRev->isCurrent() ) {
489 $this->mPagetitle = htmlspecialchars( wfMsg( 'currentrev' ) );
490 $this->mNewPage = $this->mTitle;
491 $newLink = $this->mNewPage->escapeLocalUrl();
492 $this->mNewtitle = "<a href='$newLink'>{$this->mPagetitle}</a>";
493 } else {
494 $this->mNewPage = $this->mNewRev->getTitle();
495 $newLink = $this->mNewPage->escapeLocalUrl ('oldid=' . $this->mNewid );
496 $t = $wgLang->timeanddate( $this->mNewRev->getTimestamp(), true );
497 $this->mPagetitle = htmlspecialchars( wfMsg( 'revisionasof', $t ) );
498 $this->mNewtitle = "<a href='$newLink'>{$this->mPagetitle}</a>";
499 }
500
501 // Load the old revision object
502 $this->mOldRev = false;
503 if( $this->mOldid ) {
504 $this->mOldRev = Revision::newFromId( $this->mOldid );
505 } elseif ( $this->mOldid === 0 ) {
506 $rev = $this->mNewRev->getPrevious();
507 if( $rev ) {
508 $this->mOldid = $rev->getId();
509 $this->mOldRev = $rev;
510 } else {
511 // No previous revision; mark to show as first-version only.
512 $this->mOldid = false;
513 $this->mOldRev = false;
514 }
515 }/* elseif ( $this->mOldid === false ) leave mOldRev false; */
516
517 if( is_null( $this->mOldRev ) ) {
518 return false;
519 }
520
521 if ( $this->mOldRev ) {
522 $this->mOldPage = $this->mOldRev->getTitle();
523
524 $t = $wgLang->timeanddate( $this->mOldRev->getTimestamp(), true );
525 $oldLink = $this->mOldPage->escapeLocalUrl( 'oldid=' . $this->mOldid );
526 $this->mOldtitle = "<a href='$oldLink'>" . htmlspecialchars( wfMsg( 'revisionasof', $t ) ) . '</a>';
527 }
528
529 return true;
530 }
531
532 /**
533 * Load the text of the revisions, as well as revision data.
534 */
535 function loadText() {
536 if ( $this->mTextLoaded == 2 ) {
537 return true;
538 } else {
539 // Whether it succeeds or fails, we don't want to try again
540 $this->mTextLoaded = 2;
541 }
542
543 if ( !$this->loadRevisionData() ) {
544 return false;
545 }
546 if ( $this->mOldRev ) {
547 // FIXME: permission tests
548 $this->mOldtext = $this->mOldRev->getText();
549 if ( $this->mOldtext === false ) {
550 return false;
551 }
552 }
553 if ( $this->mNewRev ) {
554 $this->mNewtext = $this->mNewRev->getText();
555 if ( $this->mNewtext === false ) {
556 return false;
557 }
558 }
559 return true;
560 }
561
562 /**
563 * Load the text of the new revision, not the old one
564 */
565 function loadNewText() {
566 if ( $this->mTextLoaded >= 1 ) {
567 return true;
568 } else {
569 $this->mTextLoaded = 1;
570 }
571 if ( !$this->loadRevisionData() ) {
572 return false;
573 }
574 $this->mNewtext = $this->mNewRev->getText();
575 return true;
576 }
577
578
579 }
580
581 // A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
582 //
583 // Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
584 // You may copy this code freely under the conditions of the GPL.
585 //
586
587 define('USE_ASSERTS', function_exists('assert'));
588
589 /**
590 * @todo document
591 * @access private
592 * @package MediaWiki
593 * @subpackage DifferenceEngine
594 */
595 class _DiffOp {
596 var $type;
597 var $orig;
598 var $closing;
599
600 function reverse() {
601 trigger_error('pure virtual', E_USER_ERROR);
602 }
603
604 function norig() {
605 return $this->orig ? sizeof($this->orig) : 0;
606 }
607
608 function nclosing() {
609 return $this->closing ? sizeof($this->closing) : 0;
610 }
611 }
612
613 /**
614 * @todo document
615 * @access private
616 * @package MediaWiki
617 * @subpackage DifferenceEngine
618 */
619 class _DiffOp_Copy extends _DiffOp {
620 var $type = 'copy';
621
622 function _DiffOp_Copy ($orig, $closing = false) {
623 if (!is_array($closing))
624 $closing = $orig;
625 $this->orig = $orig;
626 $this->closing = $closing;
627 }
628
629 function reverse() {
630 return new _DiffOp_Copy($this->closing, $this->orig);
631 }
632 }
633
634 /**
635 * @todo document
636 * @access private
637 * @package MediaWiki
638 * @subpackage DifferenceEngine
639 */
640 class _DiffOp_Delete extends _DiffOp {
641 var $type = 'delete';
642
643 function _DiffOp_Delete ($lines) {
644 $this->orig = $lines;
645 $this->closing = false;
646 }
647
648 function reverse() {
649 return new _DiffOp_Add($this->orig);
650 }
651 }
652
653 /**
654 * @todo document
655 * @access private
656 * @package MediaWiki
657 * @subpackage DifferenceEngine
658 */
659 class _DiffOp_Add extends _DiffOp {
660 var $type = 'add';
661
662 function _DiffOp_Add ($lines) {
663 $this->closing = $lines;
664 $this->orig = false;
665 }
666
667 function reverse() {
668 return new _DiffOp_Delete($this->closing);
669 }
670 }
671
672 /**
673 * @todo document
674 * @access private
675 * @package MediaWiki
676 * @subpackage DifferenceEngine
677 */
678 class _DiffOp_Change extends _DiffOp {
679 var $type = 'change';
680
681 function _DiffOp_Change ($orig, $closing) {
682 $this->orig = $orig;
683 $this->closing = $closing;
684 }
685
686 function reverse() {
687 return new _DiffOp_Change($this->closing, $this->orig);
688 }
689 }
690
691
692 /**
693 * Class used internally by Diff to actually compute the diffs.
694 *
695 * The algorithm used here is mostly lifted from the perl module
696 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
697 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
698 *
699 * More ideas are taken from:
700 * http://www.ics.uci.edu/~eppstein/161/960229.html
701 *
702 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
703 * diffutils-2.7, which can be found at:
704 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
705 *
706 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
707 * are my own.
708 *
709 * Line length limits for robustness added by Tim Starling, 2005-08-31
710 *
711 * @author Geoffrey T. Dairiki, Tim Starling
712 * @access private
713 * @package MediaWiki
714 * @subpackage DifferenceEngine
715 */
716 class _DiffEngine
717 {
718 function diff ($from_lines, $to_lines) {
719 $fname = '_DiffEngine::diff';
720 wfProfileIn( $fname );
721
722 $n_from = sizeof($from_lines);
723 $n_to = sizeof($to_lines);
724
725 $this->xchanged = $this->ychanged = array();
726 $this->xv = $this->yv = array();
727 $this->xind = $this->yind = array();
728 unset($this->seq);
729 unset($this->in_seq);
730 unset($this->lcs);
731
732 // Skip leading common lines.
733 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
734 if ($from_lines[$skip] !== $to_lines[$skip])
735 break;
736 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
737 }
738 // Skip trailing common lines.
739 $xi = $n_from; $yi = $n_to;
740 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
741 if ($from_lines[$xi] !== $to_lines[$yi])
742 break;
743 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
744 }
745
746 // Ignore lines which do not exist in both files.
747 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
748 $xhash[$this->_line_hash($from_lines[$xi])] = 1;
749 }
750
751 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
752 $line = $to_lines[$yi];
753 if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
754 continue;
755 $yhash[$this->_line_hash($line)] = 1;
756 $this->yv[] = $line;
757 $this->yind[] = $yi;
758 }
759 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
760 $line = $from_lines[$xi];
761 if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
762 continue;
763 $this->xv[] = $line;
764 $this->xind[] = $xi;
765 }
766
767 // Find the LCS.
768 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
769
770 // Merge edits when possible
771 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
772 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
773
774 // Compute the edit operations.
775 $edits = array();
776 $xi = $yi = 0;
777 while ($xi < $n_from || $yi < $n_to) {
778 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
779 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
780
781 // Skip matching "snake".
782 $copy = array();
783 while ( $xi < $n_from && $yi < $n_to
784 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
785 $copy[] = $from_lines[$xi++];
786 ++$yi;
787 }
788 if ($copy)
789 $edits[] = new _DiffOp_Copy($copy);
790
791 // Find deletes & adds.
792 $delete = array();
793 while ($xi < $n_from && $this->xchanged[$xi])
794 $delete[] = $from_lines[$xi++];
795
796 $add = array();
797 while ($yi < $n_to && $this->ychanged[$yi])
798 $add[] = $to_lines[$yi++];
799
800 if ($delete && $add)
801 $edits[] = new _DiffOp_Change($delete, $add);
802 elseif ($delete)
803 $edits[] = new _DiffOp_Delete($delete);
804 elseif ($add)
805 $edits[] = new _DiffOp_Add($add);
806 }
807 wfProfileOut( $fname );
808 return $edits;
809 }
810
811 /**
812 * Returns the whole line if it's small enough, or the MD5 hash otherwise
813 */
814 function _line_hash( $line ) {
815 if ( strlen( $line ) > MAX_DIFF_XREF_LENGTH ) {
816 return md5( $line );
817 } else {
818 return $line;
819 }
820 }
821
822
823 /* Divide the Largest Common Subsequence (LCS) of the sequences
824 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
825 * sized segments.
826 *
827 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
828 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
829 * sub sequences. The first sub-sequence is contained in [X0, X1),
830 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
831 * that (X0, Y0) == (XOFF, YOFF) and
832 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
833 *
834 * This function assumes that the first lines of the specified portions
835 * of the two files do not match, and likewise that the last lines do not
836 * match. The caller must trim matching lines from the beginning and end
837 * of the portions it is going to specify.
838 */
839 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
840 $fname = '_DiffEngine::_diag';
841 wfProfileIn( $fname );
842 $flip = false;
843
844 if ($xlim - $xoff > $ylim - $yoff) {
845 // Things seems faster (I'm not sure I understand why)
846 // when the shortest sequence in X.
847 $flip = true;
848 list ($xoff, $xlim, $yoff, $ylim)
849 = array( $yoff, $ylim, $xoff, $xlim);
850 }
851
852 if ($flip)
853 for ($i = $ylim - 1; $i >= $yoff; $i--)
854 $ymatches[$this->xv[$i]][] = $i;
855 else
856 for ($i = $ylim - 1; $i >= $yoff; $i--)
857 $ymatches[$this->yv[$i]][] = $i;
858
859 $this->lcs = 0;
860 $this->seq[0]= $yoff - 1;
861 $this->in_seq = array();
862 $ymids[0] = array();
863
864 $numer = $xlim - $xoff + $nchunks - 1;
865 $x = $xoff;
866 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
867 wfProfileIn( "$fname-chunk" );
868 if ($chunk > 0)
869 for ($i = 0; $i <= $this->lcs; $i++)
870 $ymids[$i][$chunk-1] = $this->seq[$i];
871
872 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
873 for ( ; $x < $x1; $x++) {
874 $line = $flip ? $this->yv[$x] : $this->xv[$x];
875 if (empty($ymatches[$line]))
876 continue;
877 $matches = $ymatches[$line];
878 reset($matches);
879 while (list ($junk, $y) = each($matches))
880 if (empty($this->in_seq[$y])) {
881 $k = $this->_lcs_pos($y);
882 USE_ASSERTS && assert($k > 0);
883 $ymids[$k] = $ymids[$k-1];
884 break;
885 }
886 while (list ($junk, $y) = each($matches)) {
887 if ($y > $this->seq[$k-1]) {
888 USE_ASSERTS && assert($y < $this->seq[$k]);
889 // Optimization: this is a common case:
890 // next match is just replacing previous match.
891 $this->in_seq[$this->seq[$k]] = false;
892 $this->seq[$k] = $y;
893 $this->in_seq[$y] = 1;
894 } else if (empty($this->in_seq[$y])) {
895 $k = $this->_lcs_pos($y);
896 USE_ASSERTS && assert($k > 0);
897 $ymids[$k] = $ymids[$k-1];
898 }
899 }
900 }
901 wfProfileOut( "$fname-chunk" );
902 }
903
904 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
905 $ymid = $ymids[$this->lcs];
906 for ($n = 0; $n < $nchunks - 1; $n++) {
907 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
908 $y1 = $ymid[$n] + 1;
909 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
910 }
911 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
912
913 wfProfileOut( $fname );
914 return array($this->lcs, $seps);
915 }
916
917 function _lcs_pos ($ypos) {
918 $fname = '_DiffEngine::_lcs_pos';
919 wfProfileIn( $fname );
920
921 $end = $this->lcs;
922 if ($end == 0 || $ypos > $this->seq[$end]) {
923 $this->seq[++$this->lcs] = $ypos;
924 $this->in_seq[$ypos] = 1;
925 wfProfileOut( $fname );
926 return $this->lcs;
927 }
928
929 $beg = 1;
930 while ($beg < $end) {
931 $mid = (int)(($beg + $end) / 2);
932 if ( $ypos > $this->seq[$mid] )
933 $beg = $mid + 1;
934 else
935 $end = $mid;
936 }
937
938 USE_ASSERTS && assert($ypos != $this->seq[$end]);
939
940 $this->in_seq[$this->seq[$end]] = false;
941 $this->seq[$end] = $ypos;
942 $this->in_seq[$ypos] = 1;
943 wfProfileOut( $fname );
944 return $end;
945 }
946
947 /* Find LCS of two sequences.
948 *
949 * The results are recorded in the vectors $this->{x,y}changed[], by
950 * storing a 1 in the element for each line that is an insertion
951 * or deletion (ie. is not in the LCS).
952 *
953 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
954 *
955 * Note that XLIM, YLIM are exclusive bounds.
956 * All line numbers are origin-0 and discarded lines are not counted.
957 */
958 function _compareseq ($xoff, $xlim, $yoff, $ylim) {
959 $fname = '_DiffEngine::_compareseq';
960 wfProfileIn( $fname );
961
962 // Slide down the bottom initial diagonal.
963 while ($xoff < $xlim && $yoff < $ylim
964 && $this->xv[$xoff] == $this->yv[$yoff]) {
965 ++$xoff;
966 ++$yoff;
967 }
968
969 // Slide up the top initial diagonal.
970 while ($xlim > $xoff && $ylim > $yoff
971 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
972 --$xlim;
973 --$ylim;
974 }
975
976 if ($xoff == $xlim || $yoff == $ylim)
977 $lcs = 0;
978 else {
979 // This is ad hoc but seems to work well.
980 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
981 //$nchunks = max(2,min(8,(int)$nchunks));
982 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
983 list ($lcs, $seps)
984 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
985 }
986
987 if ($lcs == 0) {
988 // X and Y sequences have no common subsequence:
989 // mark all changed.
990 while ($yoff < $ylim)
991 $this->ychanged[$this->yind[$yoff++]] = 1;
992 while ($xoff < $xlim)
993 $this->xchanged[$this->xind[$xoff++]] = 1;
994 } else {
995 // Use the partitions to split this problem into subproblems.
996 reset($seps);
997 $pt1 = $seps[0];
998 while ($pt2 = next($seps)) {
999 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
1000 $pt1 = $pt2;
1001 }
1002 }
1003 wfProfileOut( $fname );
1004 }
1005
1006 /* Adjust inserts/deletes of identical lines to join changes
1007 * as much as possible.
1008 *
1009 * We do something when a run of changed lines include a
1010 * line at one end and has an excluded, identical line at the other.
1011 * We are free to choose which identical line is included.
1012 * `compareseq' usually chooses the one at the beginning,
1013 * but usually it is cleaner to consider the following identical line
1014 * to be the "change".
1015 *
1016 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
1017 */
1018 function _shift_boundaries ($lines, &$changed, $other_changed) {
1019 $fname = '_DiffEngine::_shift_boundaries';
1020 wfProfileIn( $fname );
1021 $i = 0;
1022 $j = 0;
1023
1024 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
1025 $len = sizeof($lines);
1026 $other_len = sizeof($other_changed);
1027
1028 while (1) {
1029 /*
1030 * Scan forwards to find beginning of another run of changes.
1031 * Also keep track of the corresponding point in the other file.
1032 *
1033 * Throughout this code, $i and $j are adjusted together so that
1034 * the first $i elements of $changed and the first $j elements
1035 * of $other_changed both contain the same number of zeros
1036 * (unchanged lines).
1037 * Furthermore, $j is always kept so that $j == $other_len or
1038 * $other_changed[$j] == false.
1039 */
1040 while ($j < $other_len && $other_changed[$j])
1041 $j++;
1042
1043 while ($i < $len && ! $changed[$i]) {
1044 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1045 $i++; $j++;
1046 while ($j < $other_len && $other_changed[$j])
1047 $j++;
1048 }
1049
1050 if ($i == $len)
1051 break;
1052
1053 $start = $i;
1054
1055 // Find the end of this run of changes.
1056 while (++$i < $len && $changed[$i])
1057 continue;
1058
1059 do {
1060 /*
1061 * Record the length of this run of changes, so that
1062 * we can later determine whether the run has grown.
1063 */
1064 $runlength = $i - $start;
1065
1066 /*
1067 * Move the changed region back, so long as the
1068 * previous unchanged line matches the last changed one.
1069 * This merges with previous changed regions.
1070 */
1071 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
1072 $changed[--$start] = 1;
1073 $changed[--$i] = false;
1074 while ($start > 0 && $changed[$start - 1])
1075 $start--;
1076 USE_ASSERTS && assert('$j > 0');
1077 while ($other_changed[--$j])
1078 continue;
1079 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1080 }
1081
1082 /*
1083 * Set CORRESPONDING to the end of the changed run, at the last
1084 * point where it corresponds to a changed run in the other file.
1085 * CORRESPONDING == LEN means no such point has been found.
1086 */
1087 $corresponding = $j < $other_len ? $i : $len;
1088
1089 /*
1090 * Move the changed region forward, so long as the
1091 * first changed line matches the following unchanged one.
1092 * This merges with following changed regions.
1093 * Do this second, so that if there are no merges,
1094 * the changed region is moved forward as far as possible.
1095 */
1096 while ($i < $len && $lines[$start] == $lines[$i]) {
1097 $changed[$start++] = false;
1098 $changed[$i++] = 1;
1099 while ($i < $len && $changed[$i])
1100 $i++;
1101
1102 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1103 $j++;
1104 if ($j < $other_len && $other_changed[$j]) {
1105 $corresponding = $i;
1106 while ($j < $other_len && $other_changed[$j])
1107 $j++;
1108 }
1109 }
1110 } while ($runlength != $i - $start);
1111
1112 /*
1113 * If possible, move the fully-merged run of changes
1114 * back to a corresponding run in the other file.
1115 */
1116 while ($corresponding < $i) {
1117 $changed[--$start] = 1;
1118 $changed[--$i] = 0;
1119 USE_ASSERTS && assert('$j > 0');
1120 while ($other_changed[--$j])
1121 continue;
1122 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1123 }
1124 }
1125 wfProfileOut( $fname );
1126 }
1127 }
1128
1129 /**
1130 * Class representing a 'diff' between two sequences of strings.
1131 * @todo document
1132 * @access private
1133 * @package MediaWiki
1134 * @subpackage DifferenceEngine
1135 */
1136 class Diff
1137 {
1138 var $edits;
1139
1140 /**
1141 * Constructor.
1142 * Computes diff between sequences of strings.
1143 *
1144 * @param $from_lines array An array of strings.
1145 * (Typically these are lines from a file.)
1146 * @param $to_lines array An array of strings.
1147 */
1148 function Diff($from_lines, $to_lines) {
1149 $eng = new _DiffEngine;
1150 $this->edits = $eng->diff($from_lines, $to_lines);
1151 //$this->_check($from_lines, $to_lines);
1152 }
1153
1154 /**
1155 * Compute reversed Diff.
1156 *
1157 * SYNOPSIS:
1158 *
1159 * $diff = new Diff($lines1, $lines2);
1160 * $rev = $diff->reverse();
1161 * @return object A Diff object representing the inverse of the
1162 * original diff.
1163 */
1164 function reverse () {
1165 $rev = $this;
1166 $rev->edits = array();
1167 foreach ($this->edits as $edit) {
1168 $rev->edits[] = $edit->reverse();
1169 }
1170 return $rev;
1171 }
1172
1173 /**
1174 * Check for empty diff.
1175 *
1176 * @return bool True iff two sequences were identical.
1177 */
1178 function isEmpty () {
1179 foreach ($this->edits as $edit) {
1180 if ($edit->type != 'copy')
1181 return false;
1182 }
1183 return true;
1184 }
1185
1186 /**
1187 * Compute the length of the Longest Common Subsequence (LCS).
1188 *
1189 * This is mostly for diagnostic purposed.
1190 *
1191 * @return int The length of the LCS.
1192 */
1193 function lcs () {
1194 $lcs = 0;
1195 foreach ($this->edits as $edit) {
1196 if ($edit->type == 'copy')
1197 $lcs += sizeof($edit->orig);
1198 }
1199 return $lcs;
1200 }
1201
1202 /**
1203 * Get the original set of lines.
1204 *
1205 * This reconstructs the $from_lines parameter passed to the
1206 * constructor.
1207 *
1208 * @return array The original sequence of strings.
1209 */
1210 function orig() {
1211 $lines = array();
1212
1213 foreach ($this->edits as $edit) {
1214 if ($edit->orig)
1215 array_splice($lines, sizeof($lines), 0, $edit->orig);
1216 }
1217 return $lines;
1218 }
1219
1220 /**
1221 * Get the closing set of lines.
1222 *
1223 * This reconstructs the $to_lines parameter passed to the
1224 * constructor.
1225 *
1226 * @return array The sequence of strings.
1227 */
1228 function closing() {
1229 $lines = array();
1230
1231 foreach ($this->edits as $edit) {
1232 if ($edit->closing)
1233 array_splice($lines, sizeof($lines), 0, $edit->closing);
1234 }
1235 return $lines;
1236 }
1237
1238 /**
1239 * Check a Diff for validity.
1240 *
1241 * This is here only for debugging purposes.
1242 */
1243 function _check ($from_lines, $to_lines) {
1244 $fname = 'Diff::_check';
1245 wfProfileIn( $fname );
1246 if (serialize($from_lines) != serialize($this->orig()))
1247 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
1248 if (serialize($to_lines) != serialize($this->closing()))
1249 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
1250
1251 $rev = $this->reverse();
1252 if (serialize($to_lines) != serialize($rev->orig()))
1253 trigger_error("Reversed original doesn't match", E_USER_ERROR);
1254 if (serialize($from_lines) != serialize($rev->closing()))
1255 trigger_error("Reversed closing doesn't match", E_USER_ERROR);
1256
1257
1258 $prevtype = 'none';
1259 foreach ($this->edits as $edit) {
1260 if ( $prevtype == $edit->type )
1261 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
1262 $prevtype = $edit->type;
1263 }
1264
1265 $lcs = $this->lcs();
1266 trigger_error('Diff okay: LCS = '.$lcs, E_USER_NOTICE);
1267 wfProfileOut( $fname );
1268 }
1269 }
1270
1271 /**
1272 * FIXME: bad name.
1273 * @todo document
1274 * @access private
1275 * @package MediaWiki
1276 * @subpackage DifferenceEngine
1277 */
1278 class MappedDiff extends Diff
1279 {
1280 /**
1281 * Constructor.
1282 *
1283 * Computes diff between sequences of strings.
1284 *
1285 * This can be used to compute things like
1286 * case-insensitve diffs, or diffs which ignore
1287 * changes in white-space.
1288 *
1289 * @param $from_lines array An array of strings.
1290 * (Typically these are lines from a file.)
1291 *
1292 * @param $to_lines array An array of strings.
1293 *
1294 * @param $mapped_from_lines array This array should
1295 * have the same size number of elements as $from_lines.
1296 * The elements in $mapped_from_lines and
1297 * $mapped_to_lines are what is actually compared
1298 * when computing the diff.
1299 *
1300 * @param $mapped_to_lines array This array should
1301 * have the same number of elements as $to_lines.
1302 */
1303 function MappedDiff($from_lines, $to_lines,
1304 $mapped_from_lines, $mapped_to_lines) {
1305 $fname = 'MappedDiff::MappedDiff';
1306 wfProfileIn( $fname );
1307
1308 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
1309 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
1310
1311 $this->Diff($mapped_from_lines, $mapped_to_lines);
1312
1313 $xi = $yi = 0;
1314 for ($i = 0; $i < sizeof($this->edits); $i++) {
1315 $orig = &$this->edits[$i]->orig;
1316 if (is_array($orig)) {
1317 $orig = array_slice($from_lines, $xi, sizeof($orig));
1318 $xi += sizeof($orig);
1319 }
1320
1321 $closing = &$this->edits[$i]->closing;
1322 if (is_array($closing)) {
1323 $closing = array_slice($to_lines, $yi, sizeof($closing));
1324 $yi += sizeof($closing);
1325 }
1326 }
1327 wfProfileOut( $fname );
1328 }
1329 }
1330
1331 /**
1332 * A class to format Diffs
1333 *
1334 * This class formats the diff in classic diff format.
1335 * It is intended that this class be customized via inheritance,
1336 * to obtain fancier outputs.
1337 * @todo document
1338 * @access private
1339 * @package MediaWiki
1340 * @subpackage DifferenceEngine
1341 */
1342 class DiffFormatter
1343 {
1344 /**
1345 * Number of leading context "lines" to preserve.
1346 *
1347 * This should be left at zero for this class, but subclasses
1348 * may want to set this to other values.
1349 */
1350 var $leading_context_lines = 0;
1351
1352 /**
1353 * Number of trailing context "lines" to preserve.
1354 *
1355 * This should be left at zero for this class, but subclasses
1356 * may want to set this to other values.
1357 */
1358 var $trailing_context_lines = 0;
1359
1360 /**
1361 * Format a diff.
1362 *
1363 * @param $diff object A Diff object.
1364 * @return string The formatted output.
1365 */
1366 function format($diff) {
1367 $fname = 'DiffFormatter::format';
1368 wfProfileIn( $fname );
1369
1370 $xi = $yi = 1;
1371 $block = false;
1372 $context = array();
1373
1374 $nlead = $this->leading_context_lines;
1375 $ntrail = $this->trailing_context_lines;
1376
1377 $this->_start_diff();
1378
1379 foreach ($diff->edits as $edit) {
1380 if ($edit->type == 'copy') {
1381 if (is_array($block)) {
1382 if (sizeof($edit->orig) <= $nlead + $ntrail) {
1383 $block[] = $edit;
1384 }
1385 else{
1386 if ($ntrail) {
1387 $context = array_slice($edit->orig, 0, $ntrail);
1388 $block[] = new _DiffOp_Copy($context);
1389 }
1390 $this->_block($x0, $ntrail + $xi - $x0,
1391 $y0, $ntrail + $yi - $y0,
1392 $block);
1393 $block = false;
1394 }
1395 }
1396 $context = $edit->orig;
1397 }
1398 else {
1399 if (! is_array($block)) {
1400 $context = array_slice($context, sizeof($context) - $nlead);
1401 $x0 = $xi - sizeof($context);
1402 $y0 = $yi - sizeof($context);
1403 $block = array();
1404 if ($context)
1405 $block[] = new _DiffOp_Copy($context);
1406 }
1407 $block[] = $edit;
1408 }
1409
1410 if ($edit->orig)
1411 $xi += sizeof($edit->orig);
1412 if ($edit->closing)
1413 $yi += sizeof($edit->closing);
1414 }
1415
1416 if (is_array($block))
1417 $this->_block($x0, $xi - $x0,
1418 $y0, $yi - $y0,
1419 $block);
1420
1421 $end = $this->_end_diff();
1422 wfProfileOut( $fname );
1423 return $end;
1424 }
1425
1426 function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
1427 $fname = 'DiffFormatter::_block';
1428 wfProfileIn( $fname );
1429 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
1430 foreach ($edits as $edit) {
1431 if ($edit->type == 'copy')
1432 $this->_context($edit->orig);
1433 elseif ($edit->type == 'add')
1434 $this->_added($edit->closing);
1435 elseif ($edit->type == 'delete')
1436 $this->_deleted($edit->orig);
1437 elseif ($edit->type == 'change')
1438 $this->_changed($edit->orig, $edit->closing);
1439 else
1440 trigger_error('Unknown edit type', E_USER_ERROR);
1441 }
1442 $this->_end_block();
1443 wfProfileOut( $fname );
1444 }
1445
1446 function _start_diff() {
1447 ob_start();
1448 }
1449
1450 function _end_diff() {
1451 $val = ob_get_contents();
1452 ob_end_clean();
1453 return $val;
1454 }
1455
1456 function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1457 if ($xlen > 1)
1458 $xbeg .= "," . ($xbeg + $xlen - 1);
1459 if ($ylen > 1)
1460 $ybeg .= "," . ($ybeg + $ylen - 1);
1461
1462 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
1463 }
1464
1465 function _start_block($header) {
1466 echo $header;
1467 }
1468
1469 function _end_block() {
1470 }
1471
1472 function _lines($lines, $prefix = ' ') {
1473 foreach ($lines as $line)
1474 echo "$prefix $line\n";
1475 }
1476
1477 function _context($lines) {
1478 $this->_lines($lines);
1479 }
1480
1481 function _added($lines) {
1482 $this->_lines($lines, '>');
1483 }
1484 function _deleted($lines) {
1485 $this->_lines($lines, '<');
1486 }
1487
1488 function _changed($orig, $closing) {
1489 $this->_deleted($orig);
1490 echo "---\n";
1491 $this->_added($closing);
1492 }
1493 }
1494
1495
1496 /**
1497 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
1498 *
1499 */
1500
1501 define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1502
1503 /**
1504 * @todo document
1505 * @access private
1506 * @package MediaWiki
1507 * @subpackage DifferenceEngine
1508 */
1509 class _HWLDF_WordAccumulator {
1510 function _HWLDF_WordAccumulator () {
1511 $this->_lines = array();
1512 $this->_line = '';
1513 $this->_group = '';
1514 $this->_tag = '';
1515 }
1516
1517 function _flushGroup ($new_tag) {
1518 if ($this->_group !== '') {
1519 if ($this->_tag == 'mark')
1520 $this->_line .= '<span class="diffchange">' .
1521 htmlspecialchars ( $this->_group ) . '</span>';
1522 else
1523 $this->_line .= htmlspecialchars ( $this->_group );
1524 }
1525 $this->_group = '';
1526 $this->_tag = $new_tag;
1527 }
1528
1529 function _flushLine ($new_tag) {
1530 $this->_flushGroup($new_tag);
1531 if ($this->_line != '')
1532 array_push ( $this->_lines, $this->_line );
1533 else
1534 # make empty lines visible by inserting an NBSP
1535 array_push ( $this->_lines, NBSP );
1536 $this->_line = '';
1537 }
1538
1539 function addWords ($words, $tag = '') {
1540 if ($tag != $this->_tag)
1541 $this->_flushGroup($tag);
1542
1543 foreach ($words as $word) {
1544 // new-line should only come as first char of word.
1545 if ($word == '')
1546 continue;
1547 if ($word[0] == "\n") {
1548 $this->_flushLine($tag);
1549 $word = substr($word, 1);
1550 }
1551 assert(!strstr($word, "\n"));
1552 $this->_group .= $word;
1553 }
1554 }
1555
1556 function getLines() {
1557 $this->_flushLine('~done');
1558 return $this->_lines;
1559 }
1560 }
1561
1562 /**
1563 * @todo document
1564 * @access private
1565 * @package MediaWiki
1566 * @subpackage DifferenceEngine
1567 */
1568 class WordLevelDiff extends MappedDiff
1569 {
1570 function WordLevelDiff ($orig_lines, $closing_lines) {
1571 $fname = 'WordLevelDiff::WordLevelDiff';
1572 wfProfileIn( $fname );
1573
1574 list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1575 list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1576
1577 $this->MappedDiff($orig_words, $closing_words,
1578 $orig_stripped, $closing_stripped);
1579 wfProfileOut( $fname );
1580 }
1581
1582 function _split($lines) {
1583 $fname = 'WordLevelDiff::_split';
1584 wfProfileIn( $fname );
1585
1586 $words = array();
1587 $stripped = array();
1588 $first = true;
1589 foreach ( $lines as $line ) {
1590 # If the line is too long, just pretend the entire line is one big word
1591 # This prevents resource exhaustion problems
1592 if ( $first ) {
1593 $first = false;
1594 } else {
1595 $words[] = "\n";
1596 $stripped[] = "\n";
1597 }
1598 if ( strlen( $line ) > MAX_DIFF_LINE ) {
1599 $words[] = $line;
1600 $stripped[] = $line;
1601 } else {
1602 if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1603 $line, $m))
1604 {
1605 $words = array_merge( $words, $m[0] );
1606 $stripped = array_merge( $stripped, $m[1] );
1607 }
1608 }
1609 }
1610 wfProfileOut( $fname );
1611 return array($words, $stripped);
1612 }
1613
1614 function orig () {
1615 $fname = 'WordLevelDiff::orig';
1616 wfProfileIn( $fname );
1617 $orig = new _HWLDF_WordAccumulator;
1618
1619 foreach ($this->edits as $edit) {
1620 if ($edit->type == 'copy')
1621 $orig->addWords($edit->orig);
1622 elseif ($edit->orig)
1623 $orig->addWords($edit->orig, 'mark');
1624 }
1625 $lines = $orig->getLines();
1626 wfProfileOut( $fname );
1627 return $lines;
1628 }
1629
1630 function closing () {
1631 $fname = 'WordLevelDiff::closing';
1632 wfProfileIn( $fname );
1633 $closing = new _HWLDF_WordAccumulator;
1634
1635 foreach ($this->edits as $edit) {
1636 if ($edit->type == 'copy')
1637 $closing->addWords($edit->closing);
1638 elseif ($edit->closing)
1639 $closing->addWords($edit->closing, 'mark');
1640 }
1641 $lines = $closing->getLines();
1642 wfProfileOut( $fname );
1643 return $lines;
1644 }
1645 }
1646
1647 /**
1648 * Wikipedia Table style diff formatter.
1649 * @todo document
1650 * @access private
1651 * @package MediaWiki
1652 * @subpackage DifferenceEngine
1653 */
1654 class TableDiffFormatter extends DiffFormatter
1655 {
1656 function TableDiffFormatter() {
1657 $this->leading_context_lines = 2;
1658 $this->trailing_context_lines = 2;
1659 }
1660
1661 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
1662 $r = '<tr><td colspan="2" align="left"><strong><!--LINE '.$xbeg."--></strong></td>\n" .
1663 '<td colspan="2" align="left"><strong><!--LINE '.$ybeg."--></strong></td></tr>\n";
1664 return $r;
1665 }
1666
1667 function _start_block( $header ) {
1668 echo $header;
1669 }
1670
1671 function _end_block() {
1672 }
1673
1674 function _lines( $lines, $prefix=' ', $color='white' ) {
1675 }
1676
1677 # HTML-escape parameter before calling this
1678 function addedLine( $line ) {
1679 return "<td>+</td><td class='diff-addedline'>{$line}</td>";
1680 }
1681
1682 # HTML-escape parameter before calling this
1683 function deletedLine( $line ) {
1684 return "<td>-</td><td class='diff-deletedline'>{$line}</td>";
1685 }
1686
1687 # HTML-escape parameter before calling this
1688 function contextLine( $line ) {
1689 return "<td> </td><td class='diff-context'>{$line}</td>";
1690 }
1691
1692 function emptyLine() {
1693 return '<td colspan="2">&nbsp;</td>';
1694 }
1695
1696 function _added( $lines ) {
1697 foreach ($lines as $line) {
1698 echo '<tr>' . $this->emptyLine() .
1699 $this->addedLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1700 }
1701 }
1702
1703 function _deleted($lines) {
1704 foreach ($lines as $line) {
1705 echo '<tr>' . $this->deletedLine( htmlspecialchars ( $line ) ) .
1706 $this->emptyLine() . "</tr>\n";
1707 }
1708 }
1709
1710 function _context( $lines ) {
1711 foreach ($lines as $line) {
1712 echo '<tr>' .
1713 $this->contextLine( htmlspecialchars ( $line ) ) .
1714 $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1715 }
1716 }
1717
1718 function _changed( $orig, $closing ) {
1719 $fname = 'TableDiffFormatter::_changed';
1720 wfProfileIn( $fname );
1721
1722 $diff = new WordLevelDiff( $orig, $closing );
1723 $del = $diff->orig();
1724 $add = $diff->closing();
1725
1726 # Notice that WordLevelDiff returns HTML-escaped output.
1727 # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1728
1729 while ( $line = array_shift( $del ) ) {
1730 $aline = array_shift( $add );
1731 echo '<tr>' . $this->deletedLine( $line ) .
1732 $this->addedLine( $aline ) . "</tr>\n";
1733 }
1734 foreach ($add as $line) { # If any leftovers
1735 echo '<tr>' . $this->emptyLine() .
1736 $this->addedLine( $line ) . "</tr>\n";
1737 }
1738 wfProfileOut( $fname );
1739 }
1740 }
1741
1742 ?>