3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
29 * The base class for all other DiffOp classes.
31 * The classes that extend DiffOp are: DiffOpCopy, DiffOpDelete, DiffOpAdd and
32 * DiffOpChange. FakeDiffOp also extends DiffOp, but it is not located in this file.
35 * @ingroup DifferenceEngine
37 abstract class DiffOp
{
57 public function getType() {
64 public function getOrig() {
72 public function getClosing( $i = null ) {
74 return $this->closing
;
76 if ( array_key_exists( $i, $this->closing
) ) {
77 return $this->closing
[$i];
82 abstract public function reverse();
87 public function norig() {
88 return $this->orig ?
count( $this->orig
) : 0;
94 public function nclosing() {
95 return $this->closing ?
count( $this->closing
) : 0;
100 * Extends DiffOp. Used to mark strings that have been
101 * copied from one string array to the other.
104 * @ingroup DifferenceEngine
106 class DiffOpCopy
extends DiffOp
{
107 public $type = 'copy';
109 public function __construct( $orig, $closing = false ) {
110 if ( !is_array( $closing ) ) {
114 $this->closing
= $closing;
120 public function reverse() {
121 return new DiffOpCopy( $this->closing
, $this->orig
);
126 * Extends DiffOp. Used to mark strings that have been
127 * deleted from the first string array.
130 * @ingroup DifferenceEngine
132 class DiffOpDelete
extends DiffOp
{
133 public $type = 'delete';
135 public function __construct( $lines ) {
136 $this->orig
= $lines;
137 $this->closing
= false;
143 public function reverse() {
144 return new DiffOpAdd( $this->orig
);
149 * Extends DiffOp. Used to mark strings that have been
150 * added from the first string array.
153 * @ingroup DifferenceEngine
155 class DiffOpAdd
extends DiffOp
{
156 public $type = 'add';
158 public function __construct( $lines ) {
159 $this->closing
= $lines;
164 * @return DiffOpDelete
166 public function reverse() {
167 return new DiffOpDelete( $this->closing
);
172 * Extends DiffOp. Used to mark strings that have been
173 * changed from the first string array (both added and subtracted).
176 * @ingroup DifferenceEngine
178 class DiffOpChange
extends DiffOp
{
179 public $type = 'change';
181 public function __construct( $orig, $closing ) {
183 $this->closing
= $closing;
187 * @return DiffOpChange
189 public function reverse() {
190 return new DiffOpChange( $this->closing
, $this->orig
);
195 * Class used internally by Diff to actually compute the diffs.
197 * The algorithm used here is mostly lifted from the perl module
198 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
199 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
201 * More ideas are taken from:
202 * http://www.ics.uci.edu/~eppstein/161/960229.html
204 * Some ideas (and a bit of code) are from analyze.c, from GNU
205 * diffutils-2.7, which can be found at:
206 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
208 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
211 * Line length limits for robustness added by Tim Starling, 2005-08-31
212 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
214 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
216 * @ingroup DifferenceEngine
219 protected $xchanged, $ychanged;
222 * @param string[] $from_lines
223 * @param string[] $to_lines
227 public function diff( $from_lines, $to_lines ) {
229 // Diff and store locally
230 $this->diffLocal( $from_lines, $to_lines );
232 // Merge edits when possible
233 $this->shiftBoundaries( $from_lines, $this->xchanged
, $this->ychanged
);
234 $this->shiftBoundaries( $to_lines, $this->ychanged
, $this->xchanged
);
236 // Compute the edit operations.
237 $n_from = count( $from_lines );
238 $n_to = count( $to_lines );
242 while ( $xi < $n_from ||
$yi < $n_to ) {
243 assert( $yi < $n_to ||
$this->xchanged
[$xi] );
244 assert( $xi < $n_from ||
$this->ychanged
[$yi] );
246 // Skip matching "snake".
248 while ( $xi < $n_from && $yi < $n_to
249 && !$this->xchanged
[$xi] && !$this->ychanged
[$yi]
251 $copy[] = $from_lines[$xi++
];
255 $edits[] = new DiffOpCopy( $copy );
258 // Find deletes & adds.
260 while ( $xi < $n_from && $this->xchanged
[$xi] ) {
261 $delete[] = $from_lines[$xi++
];
265 while ( $yi < $n_to && $this->ychanged
[$yi] ) {
266 $add[] = $to_lines[$yi++
];
269 if ( $delete && $add ) {
270 $edits[] = new DiffOpChange( $delete, $add );
271 } elseif ( $delete ) {
272 $edits[] = new DiffOpDelete( $delete );
274 $edits[] = new DiffOpAdd( $add );
282 * @param string[] $from_lines
283 * @param string[] $to_lines
285 private function diffLocal( $from_lines, $to_lines ) {
286 $wikidiff3 = new WikiDiff3();
287 $wikidiff3->diff( $from_lines, $to_lines );
288 $this->xchanged
= $wikidiff3->removed
;
289 $this->ychanged
= $wikidiff3->added
;
293 * Adjust inserts/deletes of identical lines to join changes
294 * as much as possible.
296 * We do something when a run of changed lines include a
297 * line at one end and has an excluded, identical line at the other.
298 * We are free to choose which identical line is included.
299 * `compareseq' usually chooses the one at the beginning,
300 * but usually it is cleaner to consider the following identical line
301 * to be the "change".
303 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
305 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
309 assert( count( $lines ) == count( $changed ) );
310 $len = count( $lines );
311 $other_len = count( $other_changed );
315 * Scan forwards to find beginning of another run of changes.
316 * Also keep track of the corresponding point in the other file.
318 * Throughout this code, $i and $j are adjusted together so that
319 * the first $i elements of $changed and the first $j elements
320 * of $other_changed both contain the same number of zeros
322 * Furthermore, $j is always kept so that $j == $other_len or
323 * $other_changed[$j] == false.
325 while ( $j < $other_len && $other_changed[$j] ) {
329 while ( $i < $len && !$changed[$i] ) {
330 assert( $j < $other_len && ! $other_changed[$j] );
333 while ( $j < $other_len && $other_changed[$j] ) {
344 // Find the end of this run of changes.
345 while ( ++
$i < $len && $changed[$i] ) {
351 * Record the length of this run of changes, so that
352 * we can later determine whether the run has grown.
354 $runlength = $i - $start;
357 * Move the changed region back, so long as the
358 * previous unchanged line matches the last changed one.
359 * This merges with previous changed regions.
361 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
362 $changed[--$start] = 1;
363 $changed[--$i] = false;
364 while ( $start > 0 && $changed[$start - 1] ) {
368 while ( $other_changed[--$j] ) {
371 assert( $j >= 0 && !$other_changed[$j] );
375 * Set CORRESPONDING to the end of the changed run, at the last
376 * point where it corresponds to a changed run in the other file.
377 * CORRESPONDING == LEN means no such point has been found.
379 $corresponding = $j < $other_len ?
$i : $len;
382 * Move the changed region forward, so long as the
383 * first changed line matches the following unchanged one.
384 * This merges with following changed regions.
385 * Do this second, so that if there are no merges,
386 * the changed region is moved forward as far as possible.
388 while ( $i < $len && $lines[$start] == $lines[$i] ) {
389 $changed[$start++
] = false;
391 while ( $i < $len && $changed[$i] ) {
395 assert( $j < $other_len && ! $other_changed[$j] );
397 if ( $j < $other_len && $other_changed[$j] ) {
399 while ( $j < $other_len && $other_changed[$j] ) {
404 } while ( $runlength != $i - $start );
407 * If possible, move the fully-merged run of changes
408 * back to a corresponding run in the other file.
410 while ( $corresponding < $i ) {
411 $changed[--$start] = 1;
414 while ( $other_changed[--$j] ) {
417 assert( $j >= 0 && !$other_changed[$j] );
424 * Class representing a 'diff' between two sequences of strings.
427 * @ingroup DifferenceEngine
438 * Computes diff between sequences of strings.
440 * @param string[] $from_lines An array of strings.
441 * Typically these are lines from a file.
442 * @param string[] $to_lines An array of strings.
444 public function __construct( $from_lines, $to_lines ) {
445 $eng = new DiffEngine
;
446 $this->edits
= $eng->diff( $from_lines, $to_lines );
452 public function getEdits() {
457 * Compute reversed Diff.
461 * $diff = new Diff($lines1, $lines2);
462 * $rev = $diff->reverse();
464 * @return Object A Diff object representing the inverse of the
467 public function reverse() {
470 /** @var DiffOp $edit */
471 foreach ( $this->edits
as $edit ) {
472 $rev->edits
[] = $edit->reverse();
479 * Check for empty diff.
481 * @return bool True if two sequences were identical.
483 public function isEmpty() {
484 foreach ( $this->edits
as $edit ) {
485 if ( $edit->type
!= 'copy' ) {
494 * Compute the length of the Longest Common Subsequence (LCS).
496 * This is mostly for diagnostic purposed.
498 * @return int The length of the LCS.
500 public function lcs() {
502 foreach ( $this->edits
as $edit ) {
503 if ( $edit->type
== 'copy' ) {
504 $lcs +
= count( $edit->orig
);
512 * Get the original set of lines.
514 * This reconstructs the $from_lines parameter passed to the
517 * @return string[] The original sequence of strings.
519 public function orig() {
522 foreach ( $this->edits
as $edit ) {
524 array_splice( $lines, count( $lines ), 0, $edit->orig
);
532 * Get the closing set of lines.
534 * This reconstructs the $to_lines parameter passed to the
537 * @return string[] The sequence of strings.
539 public function closing() {
542 foreach ( $this->edits
as $edit ) {
543 if ( $edit->closing
) {
544 array_splice( $lines, count( $lines ), 0, $edit->closing
);
553 * @deprecated Alias for WordAccumulator, to be soon removed
555 class HWLDFWordAccumulator
extends MediaWiki\Diff\WordAccumulator
{