New implementation of wikidiff3 algorithm replacing the previous one and basic HTML...
authorGuy Van den Broeck <guyvdb@users.mediawiki.org>
Fri, 15 Aug 2008 11:46:46 +0000 (11:46 +0000)
committerGuy Van den Broeck <guyvdb@users.mediawiki.org>
Fri, 15 Aug 2008 11:46:46 +0000 (11:46 +0000)
includes/Diff.php
includes/DifferenceEngine.php
includes/HTMLDiff.php [new file with mode: 0644]
skins/common/diff.css
skins/common/diff.js
skins/common/images/diffunderline.gif [new file with mode: 0644]

index 7c4ebcd..df8293b 100644 (file)
  */
 
 /**
- * Implementation of the Diff algorithm.
- * 
- * DO NOT USE ON PRODUCTION SYSTEMS
- * 
- * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
- * and extended with my own ideas.
+ * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which
+ * in turn is based on Myers' "An O(ND) difference algorithm and its variations"
+ * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s
+ * "An O(NP) Sequence Comparison Algorithm").
  *
- * @return array($from_removed, $to_added)
- *                TRUE if the token was removed or added.
+ * This implementation supports an upper bound on the excution time.
+ *
+ * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space
  *
  * @author Guy Van den Broeck
+ *
  */
-function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
-       wfProfileIn( __METHOD__ );
-
-       $m = sizeof($from);
-       $n = sizeof($to);
-
-       $result_from =  array_fill(0,$m,TRUE);
-       $result_to =  array_fill(0,$n,TRUE);
-
-       //reduce the complexity for the next step (intentionally done twice)
-       //remove common tokens at the start
-       $i=0;
-       while($i<$m && $i<$n && $from[$i]===$to[$i]){
-               $result_from[$i] = $result_to[$i] = FALSE;
-               unset($from[$i],$to[$i]);
-               ++$i;
-       }
+class WikiDiff3{
 
-       //remove common tokens at the end
-       $j=1;
-       while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
-               $result_from[$m-$j] = $result_to[$n-$j] = FALSE;
-               unset($from[$m-$j],$to[$n-$j]);
-               ++$j;
+       //Input variables
+       private $from;
+       private $to;
+       private $m;
+       private $n;
+
+       private $tooLong;
+       private $powLimit;
+
+       //State variables
+       private $maxDifferences;
+       
+       //Output variables
+       public $length;
+       public $added;
+       public $removed;
+       public $heuristicUsed;
+       
+       function __construct($tooLong = 2000000, $powLimit = 1.45){
+               $this->tooLong = $tooLong;
+               $this->powLimit = $powLimit;
        }
 
-       $newFrom = $newFromIndex = $newTo = $newToIndex = array();
+       public function diff(/*array*/ $from, /*array*/ $to){
+               $m = sizeof($from);
+               $n = sizeof($to);
+               
+               $this->heuristicUsed = FALSE;
+
+               $result_from =  $m>0?array_fill(0,$m,true):array();
+               $result_to =  $n>0?array_fill(0,$n,true):array();
+
+               //reduce the complexity for the next step (intentionally done twice)
+               //remove common tokens at the start
+               $i=0;
+               while($i<$m && $i<$n && $from[$i]===$to[$i]){
+                       $result_from[$i] = $result_to[$i] = false;
+                       unset($from[$i],$to[$i]);
+                       ++$i;
+               }
 
-       //remove tokens not in both sequences
-       $shared = array_fill_keys($from,FALSE);
-       foreach($to as $index => $el){
-               if(array_key_exists($el,$shared)){
-                       //keep it
-                       $shared[$el] = TRUE;
-                       $newTo[] = $el;
-                       $newToIndex[] = $index;
+               //remove common tokens at the end
+               $j=1;
+               while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
+                       $result_from[$m-$j] = $result_to[$n-$j] = false;
+                       unset($from[$m-$j],$to[$n-$j]);
+                       ++$j;
                }
-       }
-       foreach($from as $index => $el){
-               if($shared[$el]){
-                       //keep it
-                       $newFrom[] = $el;
-                       $newFromIndex[] = $index;
+
+               $newFrom = $newFromIndex = $newTo = $newToIndex = array();
+
+               //remove tokens not in both sequences
+               $shared = array_fill_keys($from,false);
+               foreach($to as $index => $el){
+                       if(array_key_exists($el,$shared)){
+                               //keep it
+                               $this->to[] = $el;
+                               $shared[$el] = true;
+                               $newToIndex[] = $index;
+                       }
+               }
+               foreach($from as $index => $el){
+                       if($shared[$el]){
+                               //keep it
+                               $this->from[] = $el;
+                               $newFromIndex[] = $index;
+                       }
                }
-       }
 
-       unset($from, $to, $shared);
+               unset($shared);
 
-       $m2 = sizeof($newFrom);
-       $n2 = sizeof($newTo);
-       $offsetx = $offsety = 0;
-       $from_inLcs = new InLcs($m2);
-       $to_inLcs = new InLcs($n2);
+               $this->m = sizeof($this->from);
+               $this->n = sizeof($this->to);
+               $this->removed =  $this->m>0?array_fill(0,$this->m,true):array();
+               $this->added =  $this->n>0?array_fill(0,$this->n,true):array();
 
-       wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
+               $this->longestCommonSubsequence();
 
-       foreach($from_inLcs->inLcs as $key => $in){
-               if($in){
-                       $result_from[$newFromIndex[$key]] = FALSE;
+               $this->m = $m;
+               $this->n = $n;
+               $this->length += $i+$j-1;
+
+               foreach($this->removed as $key => $removed){
+                       if(!$removed){
+                               $result_from[$newFromIndex[$key]] = false;
+                       }
                }
-       }
-       foreach($to_inLcs->inLcs as $key => $in){
-               if($in){
-                       $result_to[$newToIndex[$key]] = FALSE;
+               foreach($this->added as $key => $added){
+                       if(!$added){
+                               $result_to[$newToIndex[$key]] = false;
+                       }
                }
+               unset($this->added, $this->removed);
+               return array($result_from, $result_to);
        }
-       
-       wfProfileOut( __METHOD__ );
-       return array($result_from, $result_to);
-}
 
-function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
-       if($bestKnownLcs==0 || $m==0 || $n==0){
-               return;
-       }
-       
-       if($m>$n){
-               return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
-       }
+       public function longestCommonSubsequence() {
+               if ($this->m == 0 || $this->n == 0) {
+                       $this->length = 0;
+                       return;
+               }
 
-       $a_inLcs_sym = &$a_inLcs->inLcs;
-       $b_inLcs_sym = &$b_inLcs->inLcs;
-
-       //remove common tokens at the start
-       while($m>0 && $n>0 && $a[0]===$b[0]){
-               $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
-               ++$offsetx;     ++$offsety;
-               --$m; --$n;
-               --$bestKnownLcs;
-               array_shift($a);
-               array_shift($b);
-       }
+               $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
+               if ($this->m * $this->n > $this->tooLong) {
+                       // limit complexity to D^POW_LIMIT for long sequences
+                       $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
+                       wfDebug("Limiting max number of differences to $this->maxDifferences");
+               }
 
-       //remove common tokens at the end
-       while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
-               $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
-               --$m; --$n;
-               --$bestKnownLcs;
-               array_pop($a);
-               array_pop($b);
-       }
+               /*
+                * The common prefixes and suffixes are always part of some LCS, include
+                * them now to reduce our search space
+                */
+               $max = min($this->m, $this->n);
+               for ($forwardBound = 0; $forwardBound < $max
+               && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
+                       $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
+               }
 
-       if($bestKnownLcs==0 || $m==0 || $n==0){
-               return;
-       }
-       if($m>$n){
-               return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
+               $backBoundL1 = $this->m - 1;
+               $backBoundL2 = $this->n - 1;
+
+               while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
+               && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
+                       $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
+               }
+
+               $temp = array_fill(0,$this->m + $this->n + 1, 0);
+               $V = array($temp,$temp);
+               $snake = array(0,0,0);
+
+               $this->length = $forwardBound + $this->m - $backBoundL1 - 1
+               + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
+               $V, $snake);
        }
 
-       $delta = $n-$m;
-       $delta_plus_1 = $delta+1;
-       $delta_min_1 = $delta-1;
-
-       $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
-       $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
-       $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
-       $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
-       $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
-
-       if($boundRunningTime){
-               $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
-               if($maxp_before_bound>=$m){
-                       $boundRunningTime = false;
-                       unset($maxp_before_bound);
+       private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
+               // check that both sequences are non-empty
+               if ($bottoml1 > $topl1 || $bottoml2 > $topl2) {
+                       return 0;
                }
+
+               $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake);
+
+               // need to store these so we don't lose them when they're overwritten by
+               // the recursion
+               $len = $snake[2];
+               $startx = $snake[0];
+               $starty = $snake[1];
+
+               // the middle snake is part of the LCS, store it
+               for ($i = 0; $i < $len; ++$i) {
+                       $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
+               }
+
+               if ($d > 1) {
+                       return $len
+                       + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake)
+                       + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake);
+               } else if ($d == 1) {
+                       /*
+                        * In this case the sequences differ by exactly 1 line. We have
+                        * already saved all the lines after the difference in the for loop
+                        * above, now we need to save all the lines before the difference.
+                        */
+                       $max = min($startx - $bottoml1, $starty - $bottoml2);
+                       for ($i = 0; $i < $max; ++$i) {
+                               $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false;
+                       }
+                       return $max + $len;
+               }
+               return $len;
        }
 
-       $p=-1;
-       $m_min_1 = $m-1;
-       $maxp=$m_min_1-$bestLcsLength;
-
-       while($p<$maxp){
-
-               if($boundRunningTime && $p>$maxp_before_bound){
-                       // bound the running time by stopping early
-                       if($bestLcsLength>=0){
-                               // accept the best LCS thusfar
-                               break;
-                       }else{
-                               $bestLcsProgressForw = $bestkForw = 0;
-                               foreach($lcsSizeForw as $k => $localLcsProgress){
-                                       if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
-                                               $bestLcsProgressForw = $localLcsProgress;
-                                               $bestkForw = $k;
+       private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) {
+               $from = &$this->from;
+               $to = &$this->to;
+               $V0 = &$V[0];
+               $V1 = &$V[1];
+               $snake0 = &$snake[0];
+               $snake1 = &$snake[1];
+               $snake2 = &$snake[2];
+               $bottoml1_min_1 = $bottoml1-1;
+               $bottoml2_min_1 = $bottoml2-1;
+               $N = $topl1 - $bottoml1_min_1;
+               $M = $topl2 - $bottoml2_min_1;
+               $delta = $N - $M;
+               $maxabsx = $N+$bottoml1;
+               $maxabsy = $M+$bottoml2;
+               $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
+
+               //value_to_add_forward: a 0 or 1 that we add to the start
+               // offset
+               // to make it odd/even
+               if (($M & 1) == 1) {
+                       $value_to_add_forward = 1;
+               } else {
+                       $value_to_add_forward = 0;
+               }
+
+               if (($N & 1) == 1) {
+                       $value_to_add_backward = 1;
+               } else {
+                       $value_to_add_backward = 0;
+               }
+
+               $start_forward = -$M;
+               $end_forward = $N;
+               $start_backward = -$N;
+               $end_backward = $M;
+
+               $limit_min_1 = $limit-1;
+               $limit_plus_1 = $limit+1;
+
+               $V0[$limit_plus_1] = 0;
+               $V1[$limit_min_1] = $N;
+               $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
+                       
+               if (($delta & 1) == 1) {
+                       for ($d = 0; $d <= $limit; ++$d) {
+                               $start_diag = max($value_to_add_forward + $start_forward, -$d);
+                               $end_diag = min($end_forward, $d);
+                               $value_to_add_forward = 1 - $value_to_add_forward;
+
+                               // compute forward furthest reaching paths
+                               for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+                                       if ($k == -$d
+                                       || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+                                               $x = $V0[$limit_plus_1 + $k];
+                                       } else {
+                                               $x = $V0[$limit_min_1 + $k] + 1;
                                        }
-                               }
-                               $bestLcsProgressBack = $bestkBack = 0;
-                               foreach($lcsSizeBack as $k => $localLcsProgress){
-                                       if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
-                                               $bestLcsProgressBack = $localLcsProgress;
-                                               $bestkBack = $k;
+
+                                       $absx = $snake0 = $x + $bottoml1;
+                                       $absy = $snake1 = $x - $k + $bottoml2;
+                                               
+                                       while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+                                               ++$absx;
+                                               ++$absy;
                                        }
-                               }
-                               if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
-                                       // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
-                               }else{
-                                       // lets do this
-                                       $topSnakeStart = $snakeBeginForw[$bestkForw];
-                                       $topSnakeEnd = $snakeEndForw[$bestkForw];
-                                       $topSnakek = $snakekForw[$bestkForw];
-                                       $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
-
-                                       $bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
-                                       $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
-                                       $bottomSnakek = $snakekBack[$bestkBack];
-                                       $bestLcsLengthBottom =  $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
-
-                                       if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
-                                               // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
-                                               // also diff the middle now
-                                               if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
-                                                       // do the middle diff between the snakes
-                                                       wfDebug("  Limiting diff run time from both sides, middle between snakes\n");
-                                                       $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
-                                                       $n_t = $bottomSnakeStart-$topSnakeEnd;
-                                                       $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
-                                                       $b_t = array_slice($b, $topSnakeEnd, $n_t);
-                                                       $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
-                                                       $offsety_t = $offsety+$topSnakeEnd;
-                                               }else{
-                                                       // the snake is too early in the sequence, do the middle diff between endpoints of progress
-                                                       wfDebug("  Limiting diff run time from both sides, middle between endpoints\n");
-                                                       $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
-                                                       $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
-                                                       $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
-                                                       $b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
-                                                       $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
-                                                       $offsety_t = $offsety+$fpForw[$bestkForw];
-                                               }
-                                               wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
-                                       }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
-                                               // the bottom snake is more interesting
-                                               wfDebug("Limiting diff run time from bottom side\n");
-                                               $topSnakeStart = $bottomSnakeStart;
-                                               $topSnakeEnd = $bottomSnakeEnd;
-                                               $topSnakek = $bottomSnakek;
-                                               $bestLcsLengthTop = $topSnakeEnd-$topSnakek;
-                                               $bottomSnakeStart = $bottomSnakeEnd;
-                                       }else{
-                                               // the top snake is more interesting
-                                               wfDebug("  Limiting diff run time from top side\n");
-                                               $bottomSnakeStart = $topSnakeEnd;
-                                               $bottomSnakeEnd = $topSnakeEnd;
-                                               $bottomSnakek = $topSnakek;
-                                               $bestLcsLengthBottom =  $m-($bottomSnakeEnd-$bottomSnakek);
+                                       $x = $absx-$bottoml1;
+                                               
+                                       $snake2 = $absx -$snake0;
+                                       $V0[$limit + $k] = $x;
+                                       if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
+                                       && $x >= $V1[$limit + $k - $delta]) {
+                                               return 2 * $d - 1;
+                                       }
+
+                                       // check to see if we can cut down the diagonal range
+                                       if ($x >= $N && $end_forward > $k - 1) {
+                                               $end_forward = $k - 1;
+                                       } else if ($absy-$bottoml2 >= $M) {
+                                               $start_forward = $k + 1;
+                                               $value_to_add_forward = 0;
                                        }
-                                       break;
-                               }
-                       }
-               }
-               ++$p;
-               $overlap[-$p] = $overlap[$delta+$p] = FALSE;
-
-               $min_p_min_1 = -$p-1;
-               $delta_plus_1_plus_p = $delta_plus_1+$p;
-
-               // forward
-               $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
-               $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
-               $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
-
-               $k=-$p;
-               do {
-                       $k_plus_1 = $k+1;
-                       $k_min_1 = $k-1;
-
-                       $fpBelow = $fpForw[$k_min_1]+1;
-                       $fpAbove = $fpForw[$k_plus_1];
-                       $y = &$fpForw[$k];
-                       if($fpBelow>$fpAbove){
-                               $oldy = $y = $fpBelow;
-                               $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
-                               $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
-                               $snakeEndForw[$k] = $snakeEndForw[$k_min_1];
-                               $snakekForw[$k] = $snakekForw[$k_min_1];
-                       }else{
-                               $oldy = $y = $fpAbove;
-                               $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
-                               $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
-                               $snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
-                               $snakekForw[$k] = $snakekForw[$k_plus_1];
-                       }
-                       $x = $y-$k;
-                       while($x < $m && $y < $n && $a[$x]===$b[$y]){
-                               ++$x;
-                               ++$y;
-                       }
-                       if($y-$oldy>0){
-                               $lcsSizeForw[$k] += $y-$oldy;
-                               $snakeBeginForw[$k] = $oldy;
-                               $snakeEndForw[$k]= $y;
-                               $snakekForw[$k] = $k;
-                       }
-                       if($y>=$fpBack[$k] && !$overlap[$k]){
-                               // there is overlap
-                               $overlap[$k]=TRUE;
-                               $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
-                               if($y>$fpBack[$k]+1){
-                                       $snakeoverlap = $y-$fpBack[$k]-1;
-                                       $lcsLength -= $snakeoverlap;
                                }
-                               if($lcsLength>$bestLcsLength){
-                                       // a better sequence found!
-                                       $bestLcsLength = $lcsLength;
-
-                                       $topSnakeStart = $snakeBeginForw[$k];
-                                       $topSnakeEnd = $snakeEndForw[$k];
-                                       $topSnakek = $snakekForw[$k];
-
-                                       // aligned snakes bite (\   )
-                                       //                     ( \ \)
-                                       $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
-                                       $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
-                                       $bottomSnakek = $snakekBack[$k];
-
-                                       if($bottomSnakeEnd<$y){
-                                               $bottomSnakeStart = $y;
-                                               $bottomSnakeEnd = $y;
-                                               $bottomSnakek = $k;
+
+                               $start_diag = max($value_to_add_backward + $start_backward, -$d);
+                               $end_diag = min($end_backward, $d);
+                               $value_to_add_backward = 1 - $value_to_add_backward;
+
+                               // compute backward furthest reaching paths
+                               for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+                                       if ($k == $d
+                                       || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+                                               $x = $V1[$limit_min_1 + $k];
+                                       } else {
+                                               $x = $V1[$limit_plus_1 + $k] - 1;
                                        }
 
-                                       $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
-                                       $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
-                                       if($bestKnownLcs==$lcsLength){
-                                               $fpForw[$k]=$y;
-                                               break 2;
+                                       $y = $x - $k - $delta;
+
+                                       $snake2 = 0;
+                                       while ($x > 0 && $y > 0
+                                       && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+                                               --$x;
+                                               --$y;
+                                               ++$snake2;
+                                       }
+                                       $V1[$limit + $k] = $x;
+
+                                       // check to see if we can cut down our diagonal range
+                                       if ($x <= 0) {
+                                               $start_backward = $k + 1;
+                                               $value_to_add_backward = 0;
+                                       } else if ($y <= 0 && $end_backward > $k - 1) {
+                                               $end_backward = $k - 1;
                                        }
-                                       $maxp=$m_min_1-$bestLcsLength;
                                }
                        }
-                       if($k<$delta_min_1){
-                               ++$k;
-                       }else if($k>$delta){
-                               --$k;
-                       }else if($k==$delta_min_1){
-                               $k = $delta+$p;
-                       }else{
-                               break;
-                       }
-               } while(TRUE);
-
-               // backward
-               $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
-               $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
-               $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
-
-               $k=$delta+$p;
-               do {
-                       $k_plus_1 = $k+1;
-                       $k_min_1 = $k-1;
-
-                       $fpBelow = $fpBack[$k_min_1];
-                       $fpAbove = $fpBack[$k_plus_1]-1;
-                       $y = &$fpBack[$k];
-                       if($fpBelow<$fpAbove){
-                               $oldy = $y = $fpBelow;
-                               $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
-                               $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
-                               $snakeEndBack[$k] = $snakeEndBack[$k_min_1];
-                               $snakekBack[$k] = $snakekBack[$k_min_1];
-                       }else{
-                               $oldy = $y = $fpAbove;
-                               $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
-                               $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
-                               $snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
-                               $snakekBack[$k] = $snakekBack[$k_plus_1];
-                       }
-                       $x = $y-$k;
-                       while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
-                               --$x;
-                               --$y;
-                       }
-                       if($oldy-$y>0){
-                               $lcsSizeBack[$k] += $oldy-$y;
-                               $snakeBeginBack[$k] = $oldy;
-                               $snakeEndBack[$k] = $y;
-                               $snakekBack[$k] = $k;
-                       }
-                       if($fpForw[$k]>=$y && !$overlap[$k]){
-                               // there is overlap
-                               $overlap[$k]=TRUE;
-                               $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
-                               if($fpForw[$k]>$y+1){
-                                       $snakeoverlap = $fpForw[$k]-$y-1;
-                                       $lcsLength -= $snakeoverlap;
+               } else {
+                       for ($d = 0; $d <= $limit; ++$d) {
+                               $start_diag = max($value_to_add_forward + $start_forward, -$d);
+                               $end_diag = min($end_forward, $d);
+                               $value_to_add_forward = 1 - $value_to_add_forward;
+
+                               // compute forward furthest reaching paths
+                               for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+                                       if ($k == -$d
+                                       || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+                                               $x = $V0[$limit_plus_1 + $k];
+                                       } else {
+                                               $x = $V0[$limit_min_1 + $k] + 1;
+                                       }
+
+                                       $absx = $snake0 = $x + $bottoml1;
+                                       $absy = $snake1 = $x - $k + $bottoml2;
+                                               
+                                       while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+                                               ++$absx;
+                                               ++$absy;
+                                       }
+                                       $x = $absx-$bottoml1;
+                                       $snake2 = $absx -$snake0;
+                                       $V0[$limit + $k] = $x;
+
+                                       // check to see if we can cut down the diagonal range
+                                       if ($x >= $N && $end_forward > $k - 1) {
+                                               $end_forward = $k - 1;
+                                       } else if ($absy-$bottoml2 >= $M) {
+                                               $start_forward = $k + 1;
+                                               $value_to_add_forward = 0;
+                                       }
                                }
-                               if($lcsLength>$bestLcsLength){
-                                       // a better sequence found!
-                                       $bestLcsLength = $lcsLength;
-
-                                       $topSnakeStart = $snakeBeginForw[$k];
-                                       $topSnakeEnd = $snakeEndForw[$k];
-                                       $topSnakek = $snakekForw[$k];
-
-                                       // aligned snakes bite (\   )
-                                       //                     ( \ \)
-                                       $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
-                                       $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
-                                       $bottomSnakek = $snakekBack[$k];
-
-                                       if($bottomSnakeEnd<$fpForw[$k]){
-                                               $bottomSnakeStart = $fpForw[$k];
-                                               $bottomSnakeEnd = $fpForw[$k];
-                                               $bottomSnakek = $k;
+
+                               $start_diag = max($value_to_add_backward + $start_backward, -$d);
+                               $end_diag = min($end_backward, $d);
+                               $value_to_add_backward = 1 - $value_to_add_backward;
+
+                               // compute backward furthest reaching paths
+                               for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+                                       if ($k == $d
+                                       || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+                                               $x = $V1[$limit_min_1 + $k];
+                                       } else {
+                                               $x = $V1[$limit_plus_1 + $k] - 1;
+                                       }
+
+                                       $y = $x - $k - $delta;
+
+                                       $snake2 = 0;
+                                       while ($x > 0 && $y > 0
+                                       && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+                                               --$x;
+                                               --$y;
+                                               ++$snake2;
                                        }
+                                       $V1[$limit + $k] = $x;
 
-                                       $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
-                                       $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
-                                       if($bestKnownLcs==$lcsLength){
-                                               $fpBack[$k] = $y;
-                                               break 2;
+                                       if ($k >= -$delta - $d && $k <= $d - $delta
+                                       && $x <= $V0[$limit + $k + $delta]) {
+                                               $snake0 = $bottoml1 + $x;
+                                               $snake1 = $bottoml2 + $y;
+                                               return 2 * $d;
+                                       }
+
+                                       // check to see if we can cut down our diagonal range
+                                       if ($x <= 0) {
+                                               $start_backward = $k + 1;
+                                               $value_to_add_backward = 0;
+                                       } else if ($y <= 0 && $end_backward > $k - 1) {
+                                               $end_backward = $k - 1;
                                        }
-                                       $maxp=$m_min_1-$bestLcsLength;
                                }
                        }
-                       if($k>1){
-                               --$k;
-                       }else if($k<0){
-                               ++$k;
-                       }else if($k==1){
-                               $k = -$p;
-                       }else{
-                               break;
-                       }
-               } while(TRUE);
+               }
+               /*
+                * computing the true LCS is too expensive, instead find the diagonal
+                * with the most progress and pretend a midle snake of length 0 occurs
+                * there.
+                */
+
+               $most_progress = self::findMostProgress($M, $N, $limit, $V);
+
+               $snake0 = $bottoml1 + $most_progress[0];
+               $snake1 = $bottoml2 + $most_progress[1];
+               $snake2 = 0;
+               wfDebug('Computing the LCS is too expensive. Using a heuristic.');
+               $this->heuristicUsed = true;
+               return 5; /*
+               * HACK: since we didn't really finish the LCS computation
+               * we don't really know the length of the SES. We don't do
+               * anything with the result anyway, unless it's <=1. We know
+               * for a fact SES > 1 so 5 is as good a number as any to
+               * return here
+               */
        }
 
-       unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
-       unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
-       unset($overlap);
+       private static function findMostProgress($M, $N, $limit, $V) {
+               $delta = $N - $M;
 
-       // Mark snakes as in LCS
-       $maxi = $offsetx+$topSnakeEnd-$topSnakek;
-       for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
-               $a_inLcs_sym[$i] = TRUE;
-       }
-       $maxi = $offsety+$topSnakeEnd;
-       for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
-               $b_inLcs_sym[$i] = TRUE;
-       }
-       $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
-       for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
-               $a_inLcs_sym[$i] = TRUE;
-       }
-       $maxi = $offsety+$bottomSnakeEnd;
-       for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
-               $b_inLcs_sym[$i] = TRUE;
-       }
+               if (($M & 1) == ($limit & 1)) {
+                       $forward_start_diag = max(-$M, -$limit);
+               } else {
+                       $forward_start_diag = max(1 - $M, -$limit);
+               }
 
-       $m_t = $topSnakeStart-$topSnakek;
-       $a_t = array_slice($a, 0, $m_t);
-       $b_t = array_slice($b, 0, $topSnakeStart);
+               $forward_end_diag = min($N, $limit);
 
-       $m_b = $m-($bottomSnakeEnd-$bottomSnakek);
-       $n_b = $n-$bottomSnakeEnd;
-       $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
-       $b_b = array_slice($b, $bottomSnakeEnd, $n_b);
+               if (($N & 1) == ($limit & 1)) {
+                       $backward_start_diag = max(-$N, -$limit);
+               } else {
+                       $backward_start_diag = max(1 - $N, -$limit);
+               }
 
-       wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
+               $backward_end_diag = -min($M, $limit);
 
-       wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
-}
+               $temp = array(0,0,0);
 
-class InLcs {
 
-       public $inLcs;
+               $max_progress = array_fill(0,ceil(max($forward_end_diag
+               - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp);
+               $num_progress = 0; // the 1st entry is current, it is initialized
+               // with 0s
 
-       function __construct($length){
-               $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
-       }
+               // first search the forward diagonals
+               for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) {
+                       $x = $V[0][$limit + $k];
+                       $y = $x - $k;
+                       if ($x > $N || $y > $M) {
+                               continue;
+                       }
+
+                       $progress = $x + $y;
+                       if ($progress > $max_progress[0][2]) {
+                               $num_progress = 0;
+                               $max_progress[0][0] = $x;
+                               $max_progress[0][1] = $y;
+                               $max_progress[0][2] = $progress;
+                       } else if ($progress == $max_progress[0][2]) {
+                               ++$num_progress;
+                               $max_progress[$num_progress][0] = $x;
+                               $max_progress[$num_progress][1] = $y;
+                               $max_progress[$num_progress][2] = $progress;
+                       }
+               }
+
+               $max_progress_forward = true; // initially the maximum
+               // progress is in the forward
+               // direction
 
-       /**
-        * Get the length of the Longest Common Subsequence (computed)
-        */
-       public function getLcsLength(){
-               return array_sum($this->inLcs);
+               // now search the backward diagonals
+               for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) {
+                       $x = $V[1][$limit + $k];
+                       $y = $x - $k - $delta;
+                       if ($x < 0 || $y < 0) {
+                               continue;
+                       }
+
+                       $progress = $N - $x + $M - $y;
+                       if ($progress > $max_progress[0][2]) {
+                               $num_progress = 0;
+                               $max_progress_forward = false;
+                               $max_progress[0][0] = $x;
+                               $max_progress[0][1] = $y;
+                               $max_progress[0][2] = $progress;
+                       } else if ($progress == $max_progress[0][2] && !$max_progress_forward) {
+                               ++$num_progress;
+                               $max_progress[$num_progress][0] = $x;
+                               $max_progress[$num_progress][1] = $y;
+                               $max_progress[$num_progress][2] = $progress;
+                       }
+               }
+
+               // return the middle diagonal with maximal progress.
+               return $max_progress[floor($num_progress / 2)];
        }
 
 }
+
index 1df4fea..c02eab2 100644 (file)
@@ -74,9 +74,10 @@ class DifferenceEngine {
        }
 
        function showDiffPage( $diffOnly = false ) {
-               global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol;
+               global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff;
                wfProfileIn( __METHOD__ );
 
+
                # If external diffs are enabled both globally and for the user,
                # we'll use the application/x-external-editor interface to call
                # an external diff tool like kompare, kdiff3, etc.
@@ -265,11 +266,16 @@ CONTROL;
                        '<div id="mw-diff-ntitle3">' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "</div>" .
                        '<div id="mw-diff-ntitle4">' . $nextlink . $patrol . '</div>';
 
-               $this->showDiff( $oldHeader, $newHeader );
+               if($wgEnableHtmlDiff){
+                       $this->renderHtmlDiff();
+               }else{
 
-               if ( !$diffOnly )
-               $this->renderNewRevision();
+                       $this->showDiff( $oldHeader, $newHeader );
 
+                       if ( !$diffOnly ){
+                               $this->renderNewRevision();
+                       }
+               }
                wfProfileOut( __METHOD__ );
        }
 
@@ -319,6 +325,65 @@ CONTROL;
                wfProfileOut( __METHOD__ );
        }
 
+
+       function renderHtmlDiff() {
+               global $wgOut, $IP;
+               wfProfileIn( __METHOD__ );
+
+               $this->showDiffStyle();
+
+               require_once( "$IP/includes/HTMLDiff.php" );
+
+               #add deleted rev tag if needed
+               if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
+                       $wgOut->addWikiMsg( 'rev-deleted-text-permission' );
+               } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
+                       $wgOut->addWikiMsg( 'rev-deleted-text-view' );
+               }
+
+               if( !$this->mNewRev->isCurrent() ) {
+                       $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false );
+               }
+
+               $this->loadText();
+
+               if( is_object( $this->mOldRev ) ) {
+                       $wgOut->setRevisionId( $this->mOldRev->getId() );
+               }
+
+               global $wgTitle, $wgParser, $wgTitle;
+               $popts = $wgOut->parserOptions();
+               $oldTidy = $popts->setTidy( TRUE );
+
+               $parserOutput = $wgParser->parse($this->mOldtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
+               $popts->setTidy( $oldTidy );
+
+               //only for new?
+               //$wgOut->addParserOutputNoText( $parserOutput );
+               $oldHtml =      $parserOutput->getText();
+               wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$oldHtml ) );
+
+               if( is_object( $this->mNewRev ) ) {
+                       $wgOut->setRevisionId( $this->mNewRev->getId() );
+               }
+
+               $popts = $wgOut->parserOptions();
+               $oldTidy = $popts->setTidy( TRUE );
+
+               $parserOutput = $wgParser->parse($this->mNewtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
+               $popts->setTidy( $oldTidy );
+
+               //only for new?
+               $wgOut->addParserOutputNoText( $parserOutput );
+               $newHtml =      $parserOutput->getText();
+               wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) );
+
+               $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut));
+               $differ->htmlDiff($oldHtml, $newHtml);
+
+               wfProfileOut( __METHOD__ );
+       }
+
        /**
         * Show the first revision of an article. Uses normal diff headers in
         * contrast to normal "old revision" display style.
@@ -892,6 +957,30 @@ class _DiffOp_Change extends _DiffOp {
        }
 }
 
+/**
+ * Alternative representation of a set of changes, by the index
+ * ranges that are changed.
+ */
+class RangeDifference {
+
+       public $leftstart;
+       public $leftend;
+       public $leftlength;
+
+       public $rightstart;
+       public $rightend;
+       public $rightlength;
+
+       function __construct($leftstart, $leftend, $rightstart, $rightend){
+               $this->leftstart = $leftstart;
+               $this->leftend = $leftend;
+               $this->leftlength = $leftend-$leftstart;
+               $this->rightstart = $rightstart;
+               $this->rightend = $rightend;
+               $this->rightlength = $rightend-$rightstart;
+       }
+}
+
 /**
  * Class used internally by Diff to actually compute the diffs.
  *
@@ -920,22 +1009,108 @@ class _DiffEngine {
 
        const MAX_XREF_LENGTH =  10000;
 
-       function diff ($from_lines, $to_lines) {
+       function diff ($from_lines, $to_lines){
                wfProfileIn( __METHOD__ );
 
-               global $wgExternalDiffEngine;
+               // Diff and store locally
+               $this->diff_local($from_lines, $to_lines);
+
+               // Merge edits when possible
+               $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
+               $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
+
+               // Compute the edit operations.
+               $n_from = sizeof($from_lines);
+               $n_to = sizeof($to_lines);
+
+               $edits = array();
+               $xi = $yi = 0;
+               while ($xi < $n_from || $yi < $n_to) {
+                       USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
+                       USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
+
+                       // Skip matching "snake".
+                       $copy = array();
+                       while ( $xi < $n_from && $yi < $n_to
+                       && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+                               $copy[] = $from_lines[$xi++];
+                               ++$yi;
+                       }
+                       if ($copy)
+                       $edits[] = new _DiffOp_Copy($copy);
+
+                       // Find deletes & adds.
+                       $delete = array();
+                       while ($xi < $n_from && $this->xchanged[$xi])
+                       $delete[] = $from_lines[$xi++];
+
+                       $add = array();
+                       while ($yi < $n_to && $this->ychanged[$yi])
+                       $add[] = $to_lines[$yi++];
 
+                       if ($delete && $add)
+                       $edits[] = new _DiffOp_Change($delete, $add);
+                       elseif ($delete)
+                       $edits[] = new _DiffOp_Delete($delete);
+                       elseif ($add)
+                       $edits[] = new _DiffOp_Add($add);
+               }
+               wfProfileOut( __METHOD__ );
+               return $edits;
+       }
+
+       function diff_range ($from_lines, $to_lines){
+               wfProfileIn( __METHOD__ );
+
+               // Diff and store locally
+               $this->diff_local($from_lines, $to_lines);
+
+               // Compute the ranges operations.
                $n_from = sizeof($from_lines);
                $n_to = sizeof($to_lines);
 
+               $ranges = array();
+               $xi = $yi = 0;
+               while ($xi < $n_from || $yi < $n_to) {
+                       // Matching "snake".
+                       while ( $xi < $n_from && $yi < $n_to
+                       && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+                               ++$xi;
+                               ++$yi;
+                       }
+                       // Find deletes & adds.
+                       $xstart = $xi;
+                       while ($xi < $n_from && $this->xchanged[$xi]){
+                               ++$xi;
+                       }
+
+                       $ystart = $yi;
+                       while ($yi < $n_to && $this->ychanged[$yi]){
+                               ++$yi;
+                       }
+
+                       if ($xi>$xstart || $yi>$ystart){
+                               $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi);
+                       }
+               }
+               wfProfileOut( __METHOD__ );
+               return $ranges;
+       }
+
+       function diff_local ($from_lines, $to_lines) {
+               global $wgExternalDiffEngine;
+               wfProfileIn( __METHOD__);
+
                if($wgExternalDiffEngine == 'wikidiff3'){
                        // wikidiff3
                        global $IP;
                        require_once( "$IP/includes/Diff.php" );
-                       list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000);
+                       $wikidiff3 = new WikiDiff3();
+                       list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines);
                }else{
                        // old diff
-                       wfProfileIn( __METHOD__ ." - basicdiff");
+                       $n_from = sizeof($from_lines);
+                       $n_to = sizeof($to_lines);
                        $this->xchanged = $this->ychanged = array();
                        $this->xv = $this->yv = array();
                        $this->xind = $this->yind = array();
@@ -980,48 +1155,8 @@ class _DiffEngine {
 
                        // Find the LCS.
                        $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
-                       wfProfileOut( __METHOD__ ." - basicdiff");
-               }
-
-               // Merge edits when possible
-               $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
-               $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
-
-               // Compute the edit operations.
-               $edits = array();
-               $xi = $yi = 0;
-               while ($xi < $n_from || $yi < $n_to) {
-                       USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
-                       USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
-
-                       // Skip matching "snake".
-                       $copy = array();
-                       while ( $xi < $n_from && $yi < $n_to
-                       && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
-                               $copy[] = $from_lines[$xi++];
-                               ++$yi;
-                       }
-                       if ($copy)
-                       $edits[] = new _DiffOp_Copy($copy);
-
-                       // Find deletes & adds.
-                       $delete = array();
-                       while ($xi < $n_from && $this->xchanged[$xi])
-                       $delete[] = $from_lines[$xi++];
-
-                       $add = array();
-                       while ($yi < $n_to && $this->ychanged[$yi])
-                       $add[] = $to_lines[$yi++];
-
-                       if ($delete && $add)
-                       $edits[] = new _DiffOp_Change($delete, $add);
-                       elseif ($delete)
-                       $edits[] = new _DiffOp_Delete($delete);
-                       elseif ($add)
-                       $edits[] = new _DiffOp_Add($add);
                }
                wfProfileOut( __METHOD__ );
-               return $edits;
        }
 
        /**
@@ -1077,7 +1212,6 @@ class _DiffEngine {
                $numer = $xlim - $xoff + $nchunks - 1;
                $x = $xoff;
                for ($chunk = 0; $chunk < $nchunks; $chunk++) {
-                       wfProfileIn( __METHOD__ . "-chunk" );
                        if ($chunk > 0)
                        for ($i = 0; $i <= $this->lcs; $i++)
                        $ymids[$i][$chunk-1] = $this->seq[$i];
@@ -1130,7 +1264,6 @@ class _DiffEngine {
                if ($end == 0 || $ypos > $this->seq[$end]) {
                        $this->seq[++$this->lcs] = $ypos;
                        $this->in_seq[$ypos] = 1;
-                       wfProfileOut( __METHOD__ );
                        return $this->lcs;
                }
 
diff --git a/includes/HTMLDiff.php b/includes/HTMLDiff.php
new file mode 100644 (file)
index 0000000..7e1cc16
--- /dev/null
@@ -0,0 +1,1997 @@
+<?php
+/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * or see http://www.gnu.org/
+ */
+
+
+/**
+ * Any element in the DOM tree of an HTML document.
+ */
+abstract class Node{
+
+       protected $parent;
+
+
+       function __construct($parent){
+               $this->parent = $parent;
+               if(!is_null($parent)){
+                       $parent->addChild($this);
+               }
+       }
+
+       public function getParent(){
+               return $this->parent;
+       }
+
+       public function getParentTree(){
+               if(!is_null($this->parent)){
+                       $parentTree = $this->parent->getParentTree();
+                       $parentTree[] = $this->parent;
+                       return $parentTree;
+               }else{
+                       return array();
+               }
+       }
+
+       public abstract function getMinimalDeletedSet($id);
+
+       public function detectIgnorableWhiteSpace(){
+               // no op
+       }
+
+       public function getLastCommonParent(Node $other){
+               if(is_null($other))
+               throw new Exception('The given node is NULL');
+
+               $result = new LastCommonParentResult();
+
+               $myParents = $this->getParentTree();
+               $otherParents = $other->getParentTree();
+
+               $i = 1;
+               $isSame = true;
+               while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
+                       if (!$myParents[$i]->isSameTag($otherParents[$i])) {
+                               $isSame = false;
+                       } else {
+                               // After the while, the index i-1 must be the last common parent
+                               $i++;
+                       }
+               }
+
+               $result->setLastCommonParentDepth($i - 1);
+               $result->setLastCommonParent($myParents[$i - 1]);
+
+               if (!$isSame) {
+                       $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+                       $result->setSplittingNeeded();
+               } else if (sizeof($myParents) < sizeof($otherParents)) {
+                       $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+               } else if (sizeof($myParents) > sizeof($otherParents)) {
+                       // All tags matched but there are tags left in this tree
+                       $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+                       $result->setSplittingNeeded();
+               } else {
+                       // All tags matched untill the very last one in both trees
+                       // or there were no tags besides the BODY
+                       $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+               }
+               return $result;
+       }
+
+       public function setParent($parent) {
+               $this->parent = $parent;
+       }
+
+       public abstract function copyTree();
+
+       public function inPre() {
+               $tree = $this->getParentTree();
+               foreach ($tree as $ancestor) {
+                       if ($ancestor->isPre()) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       private $whiteBefore = false;
+
+       private $whiteAfter = false;
+
+       public function isWhiteBefore() {
+               return $this->whiteBefore;
+       }
+
+       public function setWhiteBefore($whiteBefore) {
+               $this->whiteBefore = $whiteBefore;
+       }
+
+       public function isWhiteAfter() {
+               return $this->whiteAfter;
+       }
+
+       public function setWhiteAfter($whiteAfter) {
+               $this->whiteAfter = $whiteAfter;
+       }
+
+       public abstract function getLeftMostChild();
+
+       public abstract function getRightMostChild();
+}
+
+/**
+ * Node that can contain other nodes. Represents an HTML tag.
+ */
+class TagNode extends Node{
+
+       public $children = array();
+
+       protected $qName;
+
+       protected $attributes = array();
+
+       function __construct($parent, $qName, /*array*/ $attributes) {
+               parent::__construct($parent);
+               $this->qName = strtolower($qName);
+               foreach($attributes as $key => $value){
+                       $this->attributes[strtolower($key)] = $value;
+               }
+       }
+
+       public function addChild(Node $node, $index=-1) {
+               if ($node->getParent() !== $this)
+               throw new Exception(
+                    'The new child must have this node as a parent.');
+               if($index == -1){
+                       $this->children[] = $node;
+               }else{
+                       array_splice($this->children,$index,0,array($node));
+               }
+       }
+
+       public function getIndexOf(Node $child) {
+               // don't trust array_search with objects
+               foreach($this->children as $key=>$value){
+                       if($value === $child){
+                               return $key;
+                       }
+               }
+               return NULL;
+       }
+
+       public function getChild($i) {
+               return $this->children[$i];
+       }
+
+       public function getNbChildren() {
+               return count($this->children);
+       }
+
+       public function getQName() {
+               return $this->qName;
+       }
+
+       public function getAttributes() {
+               return $this->attributes;
+       }
+
+       public function isSameTag(TagNode $other) {
+               if (is_null($other))
+               return false;
+               return $this->getOpeningTag() === $other->getOpeningTag();
+       }
+
+       public function getOpeningTag() {
+               $s = '<'.$this->getQName();
+               foreach ($this->attributes as $attribute => $value) {
+                       $s .= ' ' . $attribute . '="' . $value . '"';
+               }
+               return $s .= '>';
+       }
+
+       public function getEndTag() {
+               return '</' . $this->getQName() + '>"';
+       }
+
+       public function getMinimalDeletedSet($id) {
+               $nodes = array();
+
+               if ($this->getNbChildren() == 0)
+               return $nodes;
+
+               $hasNotDeletedDescendant = false;
+
+               foreach ($this->children as $child) {
+                       $childrenChildren = $child->getMinimalDeletedSet($id);
+                       $nodes = array_merge($nodes, $childrenChildren);
+                       if (!$hasNotDeletedDescendant
+                       && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
+                               // This child is not entirely deleted
+                               $hasNotDeletedDescendant = true;
+                       }
+               }
+               if (!$hasNotDeletedDescendant) {
+                       $nodes = array($this);
+               }
+               return $nodes;
+       }
+
+       public function toString() {
+               return $this->getOpeningTag();
+       }
+
+       public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
+               $splitOccured = false;
+               if ($parent !== $this) {
+                       $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+                       $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+                       $part1->setParent($this->getParent());
+                       $part2->setParent($this->getParent());
+
+                       $i = 0;
+                       $nbChildren = $this->getNbChildren();
+                       while ($i < $nbChildren && $this->children[$i] !== $split) {
+                               $this->children[$i]->setParent($part1);
+                               $part1->addChild($this->children[$i]);
+                               ++$i;
+                       }
+                       if ($i < $nbChildren) {
+                               if ($includeLeft) {
+                                       $this->children[$i]->setParent($part1);
+                                       $part1->addChild($this->children[$i]);
+                               } else {
+                                       $this->children[$i]->setParent($part2);
+                                       $part2->addChild($this->children[$i]);
+                               }
+                               ++$i;
+                       }
+                       while ($i < $nbChildren) {
+                               $this->children[$i]->setParent($part2);
+                               $part2->addChild($this->children[$i]);
+                               ++$i;
+                       }
+                       $myindexinparent = $this->parent->getIndexOf($this);
+                       if ($part1->getNbChildren() > 0)
+                       $this->parent->addChild($part1,$myindexinparent);
+
+                       if ($part2->getNbChildren() > 0)
+                       $this->parent->addChild($part2,$myindexinparent);
+
+                       if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
+                               $splitOccured = true;
+                       }
+
+                       $this->parent->removeChild($myindexinparent);
+
+                       if ($includeLeft)
+                       $this->parent->splitUntill($parent, $part1, $includeLeft);
+                       else
+                       $this->parent->splitUntill($parent, $part2, $includeLeft);
+               }
+               return $splitOccured;
+
+       }
+
+       private function removeChild($index) {
+               unset($this->children[$index]);
+               $this->children = array_values($this->children);
+       }
+
+       public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
+       'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
+       'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
+
+       public static function isBlockLevelName($qName) {
+               return array_key_exists(strtolower($qName),self::$blocks);
+       }
+
+       public static function isBlockLevelNode(Node $node) {
+               if(! $node instanceof TagNode)
+               return false;
+               return self::isBlockLevelName($node->getQName());
+       }
+
+       public function isBlockLevel() {
+               return Â self::isBlockLevelNode($this);
+       }
+
+       public static function isInlineName($qName) {
+               return !self::isBlockLevelName($qName);
+       }
+
+       public static function isInlineNode(Node $node) {
+               return !self::isBlockLevelNode($node);
+       }
+
+       public function isInline() {
+               return self::isInlineNode($this);
+       }
+
+       public function copyTree() {
+               $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+               $newThis->setWhiteBefore($this->isWhiteBefore());
+               $newThis->setWhiteAfter($this->isWhiteAfter());
+               foreach($this->children as $child) {
+                       $newChild = $child->copyTree();
+                       $newChild->setParent($newThis);
+                       $newThis->addChild($newChild);
+               }
+               return $newThis;
+       }
+
+       public function getMatchRatio(TagNode $other) {
+               $txtComp = new TextOnlyComparator($other);
+               return $txtComp->getMatchRatio(new TextOnlyComparator($this));
+       }
+
+       public function expandWhiteSpace() {
+               $shift = 0;
+               $spaceAdded = false;
+
+               $nbOriginalChildren = $this->getNbChildren();
+               for ($i = 0; $i < $nbOriginalChildren; ++$i) {
+                       $child = $this->getChild($i + $shift);
+
+                       if($child instanceof TagNode){
+                               if (!$child->isPre()) {
+                                       $child->expandWhiteSpace();
+                               }
+                       }
+                       if (!$spaceAdded && $child->isWhiteBefore()) {
+                               $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
+                               $ws->setParent($this);
+                               $this->addChild($ws,$i + ($shift++));
+                       }
+                       if ($child->isWhiteAfter()) {
+                               $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
+                               $ws->setParent($this);
+                               $this->addChild($ws,$i + 1 + ($shift++));
+                               $spaceAdded = true;
+                       } else {
+                               $spaceAdded = false;
+                       }
+
+               }
+       }
+
+       public function getLeftMostChild() {
+               if ($this->getNbChildren() < 1)
+               return $this;
+               return $this->getChild(0)->getLeftMostChild();
+
+       }
+
+       public function getRightMostChild() {
+               if ($this->getNbChildren() < 1)
+               return $this;
+               return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
+       }
+
+       public function isPre() {
+               return 0 == strcasecmp($this->getQName(),'pre');
+       }
+
+       public static function toDiffLine(TagNode $node){
+               return $node->getOpeningTag();
+       }
+}
+
+/**
+ * Represents a piece of text in the HTML file.
+ */
+class TextNode extends Node{
+
+       private $s;
+
+       private $modification;
+
+       function __construct($parent, $s) {
+               parent::__construct($parent);
+               $this->modification = new Modification(Modification::NONE);
+               $this->s = $s;
+       }
+
+       public function copyTree() {
+               $clone = clone $this;
+               $clone->setParent(NULL);
+               return $clone;
+       }
+
+       public function getLeftMostChild() {
+               return $this;
+       }
+
+       public function getRightMostChild() {
+               return $this;
+       }
+
+       public function getMinimalDeletedSet($id) {
+               if ($this->getModification()->getType() == Modification::REMOVED
+               && $this->getModification()->getID() == $id){
+                       return array($this);
+               }
+               return array();
+       }
+
+       public function getModification() {
+               return $this->modification;
+       }
+
+       public function getText() {
+               return $this->s;
+       }
+
+       public function isSameText($other) {
+               if (is_null($other) || ! $other instanceof TextNode){
+                       return false;
+               }
+               return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
+       }
+
+       public function setModification(Modification $m) {
+               //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
+               $this->modification = $m;
+       }
+
+       public function toString() {
+               return $this->getText();
+       }
+
+       public static function toDiffLine(TextNode $node){
+               return str_replace('\n', ' ',$node->getText());
+       }
+}
+
+class WhiteSpaceNode extends TextNode {
+
+       function __construct($parent, $s, Node $like = NULL) {
+               parent::__construct($parent, $s);
+               if(!is_null($like) && $like instanceof TextNode){
+                       $newModification = clone $like->getModification();
+                       $newModification->setFirstOfID(false);
+                       $this->setModification($newModification);
+               }
+       }
+
+       public static function isWhiteSpace($c) {
+               switch ($c) {
+                       case ' ':
+                       case '\t':
+                       case '\r':
+                       case '\n':
+                               return true;
+                       default:
+                               return false;
+               }
+       }
+}
+
+/**
+ * Represents the root of a HTML document.
+ */
+class BodyNode extends TagNode {
+
+       function __construct() {
+               parent::__construct(NULL, 'body', array());
+       }
+
+       public function copyTree() {
+               $newThis = new BodyNode();
+               foreach ($this->children as $child) {
+                       $newChild = $child->copyTree();
+                       $newChild->setParent($newThis);
+                       $newThis->addChild($newChild);
+               }
+               return $newThis;
+       }
+
+       public function getMinimalDeletedSet($id) {
+               $nodes = array();
+               foreach ($this->children as $child) {
+                       $childrenChildren = $child->getMinimalDeletedSet($id);
+                       $nodes = array_merge($nodes, $childrenChildren);
+               }
+               return $nodes;
+       }
+
+}
+
+/**
+ * Represents an image in HTML. Even though images do not contain any text they
+ * are independent visible objects on the page. They are logically a TextNode.
+ */
+class ImageNode extends TextNode {
+
+       private $attributes;
+
+       function __construct(TagNode $parent, /*array*/ $attrs) {
+               if(!array_key_exists('src',$attrs)){
+                       //wfDebug('Image without a source:');
+                       foreach($attrs as $key => $value){
+                               //wfDebug("$key = $value");
+                       }
+                       parent::__construct($parent, '<img></img>');
+               }else{
+                       parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
+               }
+               $this->attributes = $attrs;
+       }
+
+       public function isSameText($other) {
+               if (is_null($other) || ! $other instanceof ImageNode)
+               return false;
+               return $this->getText() === $other->getText();
+       }
+
+       public function getAttributes() {
+               return $this->attributes;
+       }
+
+}
+
+/**
+ * When detecting the last common parent of two nodes, all results are stored as
+ * a LastCommonParentResult.
+ */
+class LastCommonParentResult {
+
+       // Parent
+       private $parent;
+
+       public function getLastCommonParent() {
+               return $this->parent;
+       }
+
+       public function setLastCommonParent(TagNode $parent) {
+               $this->parent = $parent;
+       }
+
+       // Splitting
+       private $splittingNeeded = false;
+
+       public function isSplittingNeeded() {
+               return $this->splittingNeeded;
+       }
+
+       public function setSplittingNeeded() {
+               $this->splittingNeeded = true;
+       }
+
+       // Depth
+       private $lastCommonParentDepth = -1;
+
+       public function getLastCommonParentDepth() {
+               return $this->lastCommonParentDepth;
+       }
+
+       public function setLastCommonParentDepth($depth) {
+               $this->lastCommonParentDepth = $depth;
+       }
+
+       // Index
+       private $indexInLastCommonParent = -1;
+
+       public function getIndexInLastCommonParent() {
+               return $this->indexInLastCommonParent;
+       }
+
+       public function setIndexInLastCommonParent($index) {
+               $this->indexInLastCommonParent = $index;
+       }
+}
+
+class Modification{
+
+       const NONE = 1;
+       const REMOVED = 2;
+       const ADDED = 4;
+       const CHANGED = 8;
+
+       private $type;
+
+       private $id = -1;
+
+       private $prevMod;
+
+       private $nextMod;
+
+       private $firstOfID = false;
+
+       function __construct($type) {
+               $this->type = $type;
+       }
+
+       public function copy() {
+               $newM = new Modification($this->getType());
+               $newM->setID($this->getID());
+               $newM->setChanges($this->getChanges());
+               $newM->setFirstOfID($this->isFirstOfID());
+               $newM->setNext($this->getNext());
+               $newM->setPrevious($this->getPrevious());
+               return $newM;
+       }
+
+       public function getType() {
+               return $this->type;
+       }
+
+       public function setID($id) {
+               $this->id = $id;
+       }
+
+       public function getID() {
+               return $this->id;
+       }
+
+       public function setPrevious($m) {
+               $this->prevMod = $m;
+       }
+
+       public function getPrevious() {
+               return $this->prevMod;
+       }
+
+       public function setNext($m) {
+               $this->nextMod = $m;
+       }
+
+       public function getNext() {
+               return $this->nextMod;
+       }
+
+       private $changes;
+
+       public function setChanges($changes) {
+               $this->changes = $changes;
+       }
+
+       public function getChanges() {
+               return $this->changes;
+       }
+
+       public function isFirstOfID() {
+               return $this->firstOfID;
+       }
+
+       public function setFirstOfID($firstOfID) {
+               $this->firstOfID = $firstOfID;
+       }
+
+       public static function typeToString($type){
+               switch($type){
+                       case self::NONE: return 'none';
+                       case self::REMOVED: return 'removed';
+                       case self::ADDED: return 'added';
+                       case self::CHANGED: return 'changed';
+               }
+       }
+}
+
+class DomTreeBuilder {
+
+       private $textNodes = array();
+
+       private $bodyNode;
+
+       private $currentParent;
+
+       private $newWord = "";
+
+       protected $bodyStarted = false;
+
+       protected $bodyEnded = false;
+
+       private $whiteSpaceBeforeThis = false;
+
+       private $lastSibling;
+
+       function __construct(){
+               $this->bodyNode = $this->currentParent = new BodyNode();
+       }
+
+       public function getBodyNode() {
+               return $this->bodyNode;
+       }
+
+       public function getTextNodes() {
+               return $this->textNodes;
+       }
+
+       /**
+        * Must be called manually
+        */
+       public function endDocument() {
+               $this->endWord();
+               //wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
+       }
+
+       public function startElement($parser, $name, /*array*/ $attributes) {
+               if(!strcasecmp($name, 'body')==0){
+                       //wfDebug("Starting $name node.");
+                       $this->endWord();
+
+                       $newTagNode = new TagNode($this->currentParent, $name, $attributes);
+                       $this->currentParent = $newTagNode;
+                       $this->lastSibling = NULL;
+                       if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
+                               $newTagNode->setWhiteBefore(true);
+                       }
+                       $this->whiteSpaceBeforeThis = false;
+               }
+       }
+
+       public function endElement($parser, $name) {
+               if(!strcasecmp($name, 'body')==0){
+                       //wfDebug("Ending $name node.");
+                       if (0 == strcasecmp($name,'img')) {
+                               // Insert a dummy leaf for the image
+                               $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
+                               $img->setWhiteBefore($this->whiteSpaceBeforeThis);
+                               $this->lastSibling = $img;
+                               $this->textNodes[] = $img;
+                       }
+                       $this->endWord();
+                       if ($this->currentParent->isInline()) {
+                               $this->lastSibling = $this->currentParent;
+                       } else {
+                               $this->lastSibling = NULL;
+                       }
+                       $this->currentParent = $this->currentParent->getParent();
+                       $this->whiteSpaceBeforeThis = false;
+               }else{
+                       $this->endDocument();
+               }
+       }
+
+       public function characters($parser, $data){
+               //wfDebug('Parsing '. strlen($data).' characters.');
+               $array = str_split($data);
+               foreach($array as $c) {
+                       if (self::isDelimiter($c)) {
+                               $this->endWord();
+                               if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
+                               && !$this->currentParent->inPre()) {
+                                       if (!is_null($this->lastSibling)){
+                                               $this->lastSibling->setWhiteAfter(true);
+                                       }
+                                       $this->whiteSpaceBeforeThis = true;
+                               } else {
+                                       $textNode = new TextNode($this->currentParent, $c);
+                                       $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
+                                       $this->whiteSpaceBeforeThis = false;
+                                       $this->lastSibling = $textNode;
+                                       $this->textNodes[] = $textNode;
+                               }
+                       } else {
+                               $this->newWord .= $c;
+                       }
+
+               }
+       }
+
+       private function endWord() {
+               if (strlen($this->newWord) > 0) {
+                       $node = new TextNode($this->currentParent, $this->newWord);
+                       $node->setWhiteBefore($this->whiteSpaceBeforeThis);
+                       $this->whiteSpaceBeforeThis = false;
+                       $this->lastSibling = $node;
+                       $this->textNodes[] = $node;
+                       $this->newWord = "";
+               }
+       }
+
+       public static function isDelimiter($c) {
+               switch ($c) {
+                       // Basic Delimiters
+                       case '/':
+                       case '.':
+                       case '!':
+                       case ',':
+                       case ';':
+                       case '?':
+                       case '=':
+                       case '\'':
+                       case '"':
+                               // Extra Delimiters
+                       case '[':
+                       case ']':
+                       case '{':
+                       case '}':
+                       case '(':
+                       case ')':
+                       case '&':
+                       case '|':
+                       case '\\':
+                       case '-':
+                       case '_':
+                       case '+':
+                       case '*':
+                       case ':':
+                               return true;
+                       default:
+                               return WhiteSpaceNode::isWhiteSpace($c);
+               }
+       }
+
+       public function getDiffLines(){
+               return array_map(array('TextNode','toDiffLine'), $this->textNodes);
+       }
+}
+
+class TextNodeDiffer{
+
+       private $textNodes;
+       private $bodyNode;
+
+       private $oldTextNodes;
+       private $oldBodyNode;
+
+       private $lastModified = array();
+
+       function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
+               $this->textNodes = $tree->getTextNodes();
+               $this->bodyNode = $tree->getBodyNode();
+               $this->oldTextNodes = $oldTree->getTextNodes();
+               $this->oldBodyNode = $oldTree->getBodyNode();
+       }
+
+       public function getBodyNode() {
+               return $this->bodyNode;
+       }
+
+       private $newID = 0;
+
+       public function markAsNew($start, $end) {
+               if ($end <= $start)
+               return;
+
+               if ($this->whiteAfterLastChangedPart)
+               $this->textNodes[$start]->setWhiteBefore(false);
+
+               $nextLastModified = array();
+
+               for ($i = $start; $i < $end; ++$i) {
+                       $mod = new Modification(Modification::ADDED);
+                       $mod->setID($this->newID);
+                       if (sizeof($this->lastModified) > 0) {
+                               $mod->setPrevious($this->lastModified[0]);
+                               if (is_null($this->lastModified[0]->getNext())) {
+                                       foreach ($this->lastModified as $lastMod) {
+                                               $lastMod->setNext($mod);
+                                       }
+                               }
+                       }
+                       $nextLastModified[] = $mod;
+                       $this->textNodes[$i]->setModification($mod);
+               }
+               if ($start < $end) {
+                       $this->textNodes[$start]->getModification()->setFirstOfID(true);
+               }
+               ++$this->newID;
+               $this->lastModified = $nextLastModified;
+       }
+
+       private $changedID = 0;
+
+       private $changedIDUsed = false;
+
+       public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
+               $i = $rightstart;
+               $j = $leftstart;
+
+               if ($this->changedIDUsed) {
+                       ++$this->changedID;
+                       $this->changedIDUsed = false;
+               }
+
+               $nextLastModified = array();
+
+               $changes;
+               while ($i < $rightend) {
+                       $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
+                       $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
+                       $result = $acthis->getResult($acother);
+                       unset($acthis, $acother);
+
+                       $nbLastModified = sizeof($this->lastModified);
+                       if ($result->isChanged()) {
+                               $mod = new Modification(Modification::CHANGED);
+
+                               if (!$this->changedIDUsed) {
+                                       $mod->setFirstOfID(true);
+                                       if (sizeof($nextLastModified) > 0) {
+                                               $this->lastModified = $nextLastModified;
+                                               $nextLastModified = array();
+                                       }
+                               } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
+                                       ++$this->changedID;
+                                       $mod->setFirstOfID(true);
+                                       if (sizeof($nextLastModified) > 0) {
+                                               $this->lastModified = $nextLastModified;
+                                               $nextLastModified = array();
+                                       }
+                               }
+
+                               if ($nbLastModified > 0) {
+                                       $mod->setPrevious($this->lastModified[0]);
+                                       if (is_null($this->lastModified[0]->getNext())) {
+                                               foreach ($this->lastModified as $lastMod) {
+                                                       $lastMod->setNext($mod);
+                                               }
+                                       }
+                               }
+                               $nextLastModified[] = $mod;
+
+                               $mod->setChanges($result->getChanges());
+                               $mod->setID($this->changedID);
+
+                               $this->textNodes[$i]->setModification($mod);
+                               $this->changes = $result->getChanges();
+                               $this->changedIDUsed = true;
+                       } else if ($this->changedIDUsed) {
+                               ++$this->changedID;
+                               $this->changedIDUsed = false;
+                       }
+                       ++$i;
+                       ++$j;
+               }
+               if (sizeof($nextLastModified) > 0){
+                       $this->lastModified = $nextLastModified;
+               }
+       }
+
+       // used to remove the whitespace between a red and green block
+       private $whiteAfterLastChangedPart = false;
+
+       private $deletedID = 0;
+
+       public function markAsDeleted($start, $end, $before) {
+
+               if ($end <= $start)
+               return;
+
+               if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
+                       $this->whiteAfterLastChangedPart = true;
+               } else {
+                       $this->whiteAfterLastChangedPart = false;
+               }
+
+               $nextLastModified = array();
+
+               for ($i = $start; $i < $end; ++$i) {
+                       $mod = new Modification(Modification::REMOVED);
+                       $mod->setID($this->deletedID);
+                       if (sizeof($this->lastModified) > 0) {
+                               $mod->setPrevious($this->lastModified[0]);
+                               if (is_null($this->lastModified[0]->getNext())) {
+                                       foreach ($this->lastModified as $lastMod) {
+                                               $lastMod->setNext($mod);
+                                       }
+                               }
+                       }
+                       $nextLastModified[] = $mod;
+
+                       // oldTextNodes is used here because we're going to move its deleted
+                       // elements
+                       // to this tree!
+                       $this->oldTextNodes[$i]->setModification($mod);
+               }
+               $this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
+
+               $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
+
+               //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
+
+               // Set prevLeaf to the leaf after which the old HTML needs to be
+               // inserted
+               if ($before > 0){
+                       $prevLeaf = $this->textNodes[$before - 1];
+               }
+               // Set nextLeaf to the leaf before which the old HTML needs to be
+               // inserted
+               if ($before < sizeof($this->textNodes)){
+                       $nextLeaf = $this->textNodes[$before];
+               }
+
+               while (sizeof($deletedNodes) > 0) {
+                       if (isset($prevLeaf)) {
+                               $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
+                       } else {
+                               $prevResult = new LastCommonParentResult();
+                               $prevResult->setLastCommonParent($this->getBodyNode());
+                               $prevResult->setIndexInLastCommonParent(0);
+                       }
+                       if (isset($nextleaf)) {
+                               $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
+                       } else {
+                               $nextResult = new LastCommonParentResult();
+                               $nextResult->setLastCommonParent($this->getBodyNode());
+                               $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
+                       }
+
+                       if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
+                               // We need some metric to choose which way to add-...
+                               if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
+                               && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
+                                       // The difference is not in the parent
+                                       $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+                               } else {
+                                       // The difference is in the parent, so compare them
+                                       // now THIS is tricky
+                                       $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
+                                       $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
+
+                                       if ($distancePrev <= $distanceNext) {
+                                               $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+                                       } else {
+                                               $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
+                                       }
+                               }
+
+                       }
+
+                       if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
+                               // Inserting at the front
+                               if ($prevResult->isSplittingNeeded()) {
+                                       $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
+                               }
+                               $prevLeaf = $deletedNodes[0]->copyTree();
+                               unset($deletedNodes[0]);
+                               $deletedNodes = array_values($deletedNodes);
+                               $prevLeaf->setParent($prevResult->getLastCommonParent());
+                               $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
+                       } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
+                               // Inserting at the back
+                               if ($nextResult->isSplittingNeeded()) {
+                                       $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
+                                       if ($splitOccured) {
+                                               // The place where to insert is shifted one place to the
+                                               // right
+                                               $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
+                                       }
+                               }
+                               $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
+                               unset($deletedNodes[sizeof(deletedNodes) - 1]);
+                               $deletedNodes = array_values($deletedNodes);
+                               $nextLeaf->setParent($nextResult->getLastCommonParent());
+                               $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
+                       } else
+                       throw new Exception("Uh?");
+               }
+               $this->lastModified = $nextLastModified;
+               ++$this->deletedID;
+       }
+
+       public function expandWhiteSpace() {
+               $this->getBodyNode()->expandWhiteSpace();
+       }
+
+       public function lengthNew(){
+               return sizeof($this->textNodes);
+       }
+
+       public function lengthOld(){
+               return sizeof($this->oldTextNodes);
+       }
+}
+
+class HTMLDiffer{
+       
+       private $output;
+       
+       function __construct($output){
+               $this->output = $output;
+       }
+
+       function htmlDiff($from, $to){
+               // Create an XML parser
+               $xml_parser = xml_parser_create('');
+
+               $domfrom = new DomTreeBuilder();
+
+               // Set the functions to handle opening and closing tags
+               xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
+
+               // Set the function to handle blocks of character data
+               xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
+
+               ;
+               //wfDebug('Parsing '.strlen($from)." characters worth of HTML");
+               if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+               || !xml_parse($xml_parser, $from, FALSE)
+               || !xml_parse($xml_parser, '</body>', TRUE)){
+                       wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+               }
+               xml_parser_free($xml_parser);
+               unset($from);
+
+               $xml_parser = xml_parser_create('');
+
+               $domto = new DomTreeBuilder();
+
+               // Set the functions to handle opening and closing tags
+               xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement"));
+
+               // Set the function to handle blocks of character data
+               xml_set_character_data_handler($xml_parser, array($domto,"characters"));
+
+               //wfDebug('Parsing '.strlen($to)." characters worth of HTML");
+               if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+               || !xml_parse($xml_parser, $to, FALSE)
+               || !xml_parse($xml_parser, '</body>', TRUE)){
+                       wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+               }
+               xml_parser_free($xml_parser);
+               unset($to);
+
+               $diffengine = new _DiffEngine();
+               $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
+               unset($xml_parser,$diffengine);
+
+               $domdiffer = new TextNodeDiffer($domto, $domfrom);
+
+               $currentIndexLeft = 0;
+               $currentIndexRight = 0;
+               foreach ($differences as $d) {
+                       if ($d->leftstart > $currentIndexLeft) {
+                               $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
+                               $currentIndexRight, $d->rightstart);
+                       }
+                       if ($d->leftlength > 0) {
+                               $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
+                       }
+                       $domdiffer->markAsNew($d->rightstart, $d->rightend);
+
+                       $currentIndexLeft = $d->leftend;
+                       $currentIndexRight = $d->rightend;
+               }
+               if ($currentIndexLeft < $domdiffer->lengthOld()) {
+                       $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
+               }
+
+               $domdiffer->expandWhiteSpace();
+               $output = new HTMLOutput('htmldiff', $this->output);
+               $output->parse($domdiffer->getBodyNode());
+       }
+
+       private function preProcess(/*array*/ $differences){
+               $newRanges = array();
+
+               $nbDifferences = sizeof($differences);
+               for ($i = 0; $i < $nbDifferences; ++$i) {
+                       $leftStart = $differences[$i]->leftstart;
+                       $leftEnd = $differences[$i]->leftend;
+                       $rightStart = $differences[$i]->rightstart;
+                       $rightEnd = $differences[$i]->rightend;
+
+                       $leftLength = $leftEnd - $leftStart;
+                       $rightLength = $rightEnd - $rightStart;
+
+                       while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength,
+                       $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) {
+                               $leftEnd = $differences[$i + 1]->leftend;
+                               $rightEnd = $differences[$i + 1]->rightend;
+                               $leftLength = $leftEnd - $leftStart;
+                               $rightLength = $rightEnd - $rightStart;
+                               ++$i;
+                       }
+                       $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
+               }
+               return $newRanges;
+       }
+
+       /**
+        * Heuristic to merge differences for readability.
+        */
+       public static function score($ll, $nll, $rl, $nrl) {
+               if (($ll == 0 && $nll == 0)
+               || ($rl == 0 && $nrl == 0)){
+                       return 0;
+               }
+               $numbers = array($ll, $nll, $rl, $nrl);
+               $d = 0;
+               foreach ($numbers as $number) {
+                       while ($number > 3) {
+                               $d += 3;
+                               $number -= 3;
+                               $number *= 0.5;
+                       }
+                       $d += $number;
+
+               }
+               return $d / (1.5 * sizeof($numbers));
+       }
+
+}
+
+class TextOnlyComparator{
+
+       public $leafs = array();
+
+       function _construct(TagNode $tree) {
+               $this->addRecursive($tree);
+               $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
+       }
+
+       private function addRecursive(TagNode $tree) {
+               foreach ($tree->children as $child) {
+                       if ($child instanceof TagNode) {
+                               $this->addRecursive($child);
+                       } else if ($child instanceof TextNode) {
+                               $this->leafs[] = $node;
+                       }
+               }
+       }
+
+       public function getMatchRatio(TextOnlyComparator $other) {
+               $nbOthers = sizeof($other->leafs);
+               $nbThis = sizeof($this->leafs);
+               if($nbOthers == 0 || $nbThis == 0){
+                       return -log(0);
+               }
+               
+               $diffengine = new _DiffEngine();
+               $diffengine->diff_local($this->leafs, $other->leafs);
+
+               $distanceThis = array_sum($diffengine->xchanged);
+               $distanceOther = array_sum($diffengine->ychanged);
+
+               return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
+               / $nbThis) / 2.0;
+       }
+}
+
+class AncestorComparatorResult {
+
+       private $changed = false;
+
+       private $changes = "";
+
+       public function isChanged() {
+               return $this->changed;
+       }
+
+       public function setChanged($changed) {
+               $this->changed = $changed;
+       }
+
+       public function getChanges() {
+               return $this->changes;
+       }
+
+       public function setChanges($changes) {
+               $this->changes = $changes;
+       }
+}
+
+/**
+ * A comparator used when calculating the difference in ancestry of two Nodes.
+ */
+class AncestorComparator{
+
+       public $ancestors;
+       public $ancestorsText;
+
+       function __construct(/*array*/ $ancestors) {
+               $this->ancestors = $ancestors;
+               $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
+       }
+
+       private $compareTxt = "";
+
+       public function getCompareTxt() {
+               return $this->compareTxt;
+       }
+
+       public function getResult(AncestorComparator $other) {
+               $result = new AncestorComparatorResult();
+
+               $diffengine = new _DiffEngine();
+               $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
+
+               if (sizeof($differences) == 0){
+                       return $result;
+               }
+               $changeTxt = new ChangeTextGenerator($this, $other);
+
+               $result->setChanged(true);
+               $result->setChanges($changeTxt->getChanged($differences)->toString());
+
+               return $result;
+       }
+}
+
+class ChangeTextGenerator {
+
+       private $new;
+       private $old;
+
+       private $factory;
+
+       function __construct(AncestorComparator $old, AncestorComparator $new) {
+               $this->new = $new;
+               $this->old = $old;
+               $this->factory = new TagToStringFactory();
+       }
+
+       public function getChanged(/*array*/ $differences) {
+               $txt = new ChangeText;
+
+               $rootlistopened = false;
+
+               if (sizeof($differences) > 1) {
+                       $txt->addHtml('<ul class="changelist">');
+                       $rootlistopened = true;
+               }
+
+               $nbDifferences = sizeof($differences);
+               for ($j = 0; $j < $nbDifferences; ++$j) {
+                       $d = $differences[$j];
+
+                       $lvl1listopened = false;
+
+                       if ($rootlistopened) {
+                               $txt->addHtml('<li>');
+                       }
+
+                       if ($d->leftlength + $d->rightlength > 1) {
+                               $txt->addHtml('<ul class="changelist">');
+                               $lvl1listopened = true;
+                       }
+
+                       // left are the old ones
+                       for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
+                               if ($lvl1listopened){
+                                       $txt->addHtml('<li>');
+                               }
+                               // add a bullet for a old tag
+                               $this->addTagOld($txt, $this->old->ancestors[$i]);
+
+                               if ($lvl1listopened){
+                                       $txt->addHtml('</li>');
+                               }
+                       }
+
+                       // right are the new ones
+                       for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
+                               if ($lvl1listopened){
+                                       $txt->addHtml('<li>');
+                               }
+
+                               // add a bullet for a new tag
+                               $this->addTagNew($txt, $this->new->ancestors[$i]);
+
+                               if ($lvl1listopened){
+                                       $txt->addHtml('</li>');
+                               }
+
+                       }
+
+                       if ($lvl1listopened) {
+                               $txt->addHtml('</ul>');
+                       }
+
+                       if ($rootlistopened) {
+                               $txt->addHtml('</li>');
+                       }
+               }
+
+               if ($rootlistopened) {
+                       $txt->addHtml('</ul>');
+               }
+
+               return $txt;
+
+       }
+
+       private function addTagOld(ChangeText $txt, TagNode $ancestor) {
+               $this->factory->create($ancestor)->getRemovedDescription($txt);
+       }
+
+       private function addTagNew(ChangeText $txt, TagNode $ancestor) {
+               $this->factory->create($ancestor)->getAddedDescription($txt);
+       }
+}
+
+class ChangeText {
+
+       private $txt = "";
+
+       const newLine = "<br/>";
+
+       public function addText($s) {
+               $s = $this->clean($s);
+               $this->txt .= $s;
+       }
+
+       public function addHtml($s) {
+               $this->txt .= $s;
+       }
+
+       public function addNewLine() {
+               $this->addHtml(self::newLine);
+       }
+
+       public function toString() {
+               return $this->txt;
+       }
+
+       private function clean($s) {
+               return htmlspecialchars($s);
+       }
+}
+
+class TagToStringFactory {
+
+       private static $containerTags = array(
+        'html' => TRUE, 
+        'body' => TRUE, 
+        'p'  => TRUE, 
+        'blockquote' => TRUE, 
+        'h1' => TRUE, 
+        'h2' => TRUE, 
+        'h3' => TRUE, 
+        'h4' => TRUE, 
+        'h5' => TRUE, 
+        'pre' => TRUE, 
+        'div' => TRUE, 
+        'ul' => TRUE, 
+        'ol' => TRUE, 
+        'li' => TRUE, 
+        'table' => TRUE, 
+        'tbody' => TRUE, 
+        'tr' => TRUE, 
+        'td' => TRUE, 
+        'th' => TRUE, 
+        'br' => TRUE, 
+        'hr' => TRUE, 
+        'code' => TRUE, 
+        'dl' => TRUE, 
+        'dt' => TRUE, 
+        'dd' => TRUE, 
+        'input' => TRUE, 
+        'form' => TRUE, 
+        'img' => TRUE,
+       // in-line tags that can be considered containers not styles
+        'span' => TRUE, 
+        'a' => TRUE
+       );
+
+       private static $styleTags = array(
+        'i' => TRUE, 
+        'b' => TRUE, 
+        'strong' => TRUE, 
+        'em' => TRUE, 
+        'font' => TRUE, 
+        'big' => TRUE, 
+        'del' => TRUE, 
+        'tt' => TRUE, 
+        'sub' => TRUE, 
+        'sup' => TRUE, 
+        'strike' => TRUE
+       );
+
+       const MOVED = 1;
+       const STYLE = 2;
+       const UNKNOWN = 4;
+
+       public function create(TagNode $node) {
+               $sem = $this->getChangeSemantic($node->getQName());
+               if (0 == strcasecmp($node->getQName(),'a')){
+                       return new AnchorToString($node, $sem);
+               }
+               if (0 == strcasecmp($node->getQName(),'img')){
+                       return new NoContentTagToString($node, $sem);
+               }
+               return new TagToString($node, $sem);
+       }
+
+       protected function getChangeSemantic($qname) {
+               if (array_key_exists(strtolower($qname),self::$containerTags)){
+                       return self::MOVED;
+               }
+               if (array_key_exists(strtolower($qname),self::$styleTags)){
+                       return self::STYLE;
+               }
+               return self::UNKNOWN;
+       }
+}
+
+class TagToString {
+
+       protected $node;
+
+       protected $sem;
+
+       function __construct(TagNode $node, $sem) {
+               $this->node = $node;
+               $this->sem = $sem;
+       }
+
+       public function getDescription() {
+               return $this->getString('diff-' . $this->node->getQName());
+       }
+
+       public function getRemovedDescription(ChangeText $txt) {
+
+               if ($this->sem == TagToStringFactory::MOVED) {
+                       $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
+                       $txt->addHtml('<b>');
+                       $txt->addText(strtolower($this->getDescription()));
+                       $txt->addHtml('</b>');
+               } else if ($this->sem == TagToStringFactory::STYLE) {
+                       $txt->addHtml('<b>');
+                       $txt->addText($this->getDescription());
+                       $txt->addHtml('</b>');
+                       $txt->addText(' ' . strtolower($this->getStyleRemoved()));
+               } else {
+                       $txt->addHtml('<b>');
+                       $txt->addText($this->getDescription());
+                       $txt->addHtml('</b>');
+                       $txt->addText(' ' . strtolower($this->getRemoved()));
+               }
+               $this->addAttributes($txt, $this->node->getAttributes());
+               $txt->addText('.');
+       }
+
+       public function getAddedDescription(ChangeText $txt) {
+
+               if ($this->sem == TagToStringFactory::MOVED) {
+                       $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
+                       $txt->addHtml('<b>');
+                       $txt->addText(strtolower($this->getDescription()));
+                       $txt->addHtml('</b>');
+               } else if ($this->sem == TagToStringFactory::STYLE) {
+                       $txt->addHtml('<b>');
+                       $txt->addText($this->getDescription());
+                       $txt->addHtml('</b>');
+                       $txt->addText(' ' . strtolower($this->getStyleAdded()));
+               } else {
+                       $txt->addHtml('<b>');
+                       $txt->addText($this->getDescription());
+                       $txt->addHtml('</b>');
+                       $txt->addText(' ' . strtolower($this->getAdded()));
+               }
+               $this->addAttributes($txt, $this->node->getAttributes());
+               $txt->addText('.');
+       }
+
+       protected function getMovedTo() {
+               return $this->getString('diff-movedto');
+       }
+
+       protected function getStyleAdded() {
+               return $this->getString('diff-styleadded');
+       }
+
+       protected function getAdded() {
+               return $this->getString('diff-added');
+       }
+
+       protected function getMovedOutOf() {
+               return $this->getString('diff-movedoutof');
+       }
+
+       protected function getStyleRemoved() {
+               return $this->getString('diff-styleremoved');
+       }
+
+       protected function getRemoved() {
+               return $this->getString('diff-removed');
+       }
+
+       protected function addAttributes(ChangeText $txt, array $attributes) {
+               if (sizeof($attributes) < 1)
+               return;
+
+               $keys = array_keys($attributes);
+
+               $txt->addText(' ' . strtolower($this->getWith()) . ' '
+               . $this->translateArgument($keys[0]) . ' '
+               . $attributes[$keys[0]]);
+               for ($i = 1; $i < sizeof($attributes) - 1; $i++) {
+                       $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' '
+                       . $attributes[$keys[$i]]);
+               }
+               if (sizeof($attributes) > 1) {
+                       $txt->addText(' '
+                       . strtolower($this->getAnd())
+                       . ' '
+                       . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' '
+                       . $attributes[$keys[sizeof($attributes) - 1]]);
+               }
+       }
+
+       private function getAnd() {
+               return $this->getString('diff-and');
+       }
+
+       private function getWith() {
+               return $this->getString('diff-with');
+       }
+
+       protected function translateArgument($name) {
+               if (0 == strcasecmp($name,'src'))
+               return strtolower($this->getSource());
+               if (0 == strcasecmp($name,'width'))
+               return strtolower($this->getWidth());
+               if (0 == strcasecmp($name,'height'))
+               return strtolower($this->getHeight());
+               return $name;
+       }
+
+       private function getHeight() {
+               return $this->getString('diff-height');
+       }
+
+       private function getWidth() {
+               return $this->getString('diff-width');
+       }
+
+       protected function getSource() {
+               return $this->getString('diff-source');
+       }
+
+       protected function getArticle() {
+               return $this->getString('diff-' . $this->node->getQName() . '-article');
+       }
+
+       public static $bundle = array(
+    'diff-movedto' => 'Moved to',
+       'diff-styleadded' => 'Style added',
+       'diff-added' => 'Added',
+       'diff-changedto' => 'Changed to',
+       'diff-movedoutof' => 'Moved out of',
+       'diff-styleremoved' => 'Style removed',
+       'diff-removed' => 'Removed',
+       'diff-changedfrom' => 'Changed from',
+       'diff-source' => 'Source',
+       'diff-withdestination' => 'With destination',
+       'diff-and' => 'And',
+       'diff-with' => 'With',
+       'diff-width' => 'Width',
+       'diff-height' => 'Height',
+       'diff-html-article' => 'A',
+       'diff-html' => 'Html page',
+       'diff-body-article' => 'A',
+       'diff-body' => 'Html document',
+       'diff-p-article' => 'A',
+       'diff-p' => 'Paragraph',
+       'diff-blockquote-article' => 'A',
+       'diff-blockquote' => 'Quote',
+       'diff-h1-article' => 'A',
+       'diff-h1' => 'Heading (level 1)',
+       'diff-h2-article' => 'A',
+       'diff-h2' => 'Heading (level 2)',
+       'diff-h3-article' => 'A',
+       'diff-h3' => 'Heading (level 3)',
+       'diff-h4-article' => 'A',
+       'diff-h4' => 'Heading (level 4)',
+       'diff-h5-article' => 'A',
+       'diff-h5' => 'Heading (level 5)',
+       'diff-pre-article' => 'A',
+       'diff-pre' => 'Preformatted block',
+       'diff-div-article' => 'A',
+       'diff-div' => 'Division',
+       'diff-ul-article' => 'An',
+       'diff-ul' => 'Unordered list',
+       'diff-ol-article' => 'An',
+       'diff-ol' => 'Ordered list',
+       'diff-li-article' => 'A',
+       'diff-li' => 'List item',
+       'diff-table-article' => 'A',
+       'diff-table' => 'Table',
+       'diff-tbody-article' => 'A',
+       'diff-tbody' => "Table's content",
+       'diff-tr-article' => 'A',
+       'diff-tr' => 'Row',
+       'diff-td-article' => 'A',
+       'diff-td' => 'Cell',
+       'diff-th-article' => 'A',
+       'diff-th' => 'Header',
+       'diff-br-article' => 'A',
+       'diff-br' => 'Break',
+       'diff-hr-article' => 'A',
+       'diff-hr' => 'Horizontal rule',
+       'diff-code-article' => 'A',
+       'diff-code' => 'Computer code block',
+       'diff-dl-article' => 'A',
+       'diff-dl' => 'Definition list',
+       'diff-dt-article' => 'A',
+       'diff-dt' => 'Definition term',
+       'diff-dd-article' => 'A',
+       'diff-dd' => 'Definition',
+       'diff-input-article' => 'An',
+       'diff-input' => 'Input',
+       'diff-form-article' => 'A',
+       'diff-form' => 'Form',
+       'diff-img-article' => 'An',
+       'diff-img' => 'Image',
+       'diff-span-article' => 'A',
+       'diff-span' => 'Span',
+       'diff-a-article' => 'A',
+       'diff-a' => 'Link',
+       'diff-i' => 'Italics',
+       'diff-b' => 'Bold',
+       'diff-strong' => 'Strong',
+       'diff-em' => 'Emphasis',
+       'diff-font' => 'Font',
+       'diff-big' => 'Big',
+       'diff-del' => 'Deleted',
+       'diff-tt' => 'Fixed width',
+       'diff-sub' => 'Subscript',
+       'diff-sup' => 'Superscript',
+       'diff-strike' => 'Strikethrough'
+       );
+
+       public function getString($key) {
+               return self::$bundle[$key];
+       }
+}
+
+class NoContentTagToString extends TagToString {
+
+       function __construct(TagNode $node, $sem) {
+               parent::__construct($node, $sem);
+       }
+
+       public function getAddedDescription(ChangeText $txt) {
+               $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
+               $txt.addHtml('<b>');
+               $txt.addText(strtolower($this->getDescription()));
+               $txt.addHtml('</b>');
+
+               $this->addAttributes($txt, $this->node->getAttributes());
+               $txt.addText('.');
+       }
+
+       private function getChangedTo() {
+               return $this->getString('diff-changedto');
+       }
+
+       public function getRemovedDescription(ChangeText $txt) {
+               $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
+               $txt.addHtml('<b>');
+               $txt.addText(strtolower($this->getDescription()));
+               $txt.addHtml('</b>');
+
+               $this->addAttributes($txt, $this->node->getAttributes());
+               $txt.addText('.');
+       }
+
+       private function getChangedFrom() {
+               return $this->getString('diff-changedfrom');
+       }
+}
+
+class AnchorToString extends TagToString {
+
+       function __construct(TagNode $node, $sem) {
+               parent::__construct($node, $sem);
+       }
+
+       protected function addAttributes(ChangeText $txt, array $attributes) {
+               if (array_key_exists('href',$attributes)) {
+                       $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
+                       unset($attributes['href']);
+               }
+               parent::addAttributes($txt, $attributes);
+       }
+
+       private function getWithDestination() {
+               return $this->getString('diff-withdestination');
+       }
+
+}
+
+/**
+ * Takes a branch root and creates an HTML file for it.
+ */
+class HTMLOutput{
+
+       private $prefix;
+       private $handler;
+
+       function __construct($prefix, $handler) {
+               $this->prefix = $prefix;
+               $this->handler = $handler;
+       }
+
+       public function parse(TagNode $node) {
+
+               if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) {
+                       $this->handler->startElement($node->getQName(), $node->getAttributes());
+               }
+
+               $newStarted = false;
+               $remStarted = false;
+               $changeStarted = false;
+               $changeTXT = '';
+
+               foreach ($node->children as $child) {
+                       if ($child instanceof TagNode) {
+                               if ($newStarted) {
+                                       $this->handler->endElement('span');
+                                       $newStarted = false;
+                               } else if ($changeStarted) {
+                                       $this->handler->endElement('span');
+                                       $changeStarted = false;
+                               } else if ($remStarted) {
+                                       $this->handler->endElement('span');
+                                       $remStarted = false;
+                               }
+                               $this->parse($child);
+                       } else if ($child instanceof TextNode) {
+                               $mod = $child->getModification();
+
+                               if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) {
+                                       $this->handler->endElement('span');
+                                       $newStarted = false;
+                               } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) {
+                                       $this->handler->endElement('span');
+                                       $changeStarted = false;
+                               } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) {
+                                       $this->handler->endElement('span');
+                                       $remStarted = false;
+                               }
+
+                               // no else because a removed part can just be closed and a new
+                               // part can start
+                               if (!$newStarted && $mod->getType() == Modification::ADDED) {
+                                       $attrs = array('class'=>'diff-html-added');
+                                       if ($mod->isFirstOfID()) {
+                                               $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID();
+                                       }
+                                       $this->addAttributes($mod, $attrs);
+                                       $attrs['onclick'] = 'return tipA(constructToolTipA(this));';
+                                       $this->handler->startElement('span', $attrs);
+                                       $newStarted = true;
+                               } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) {
+                                       $attrs = array('class'=>'diff-html-changed');
+                                       if ($mod->isFirstOfID()) {
+                                               $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID();
+                                       }
+                                       $this->addAttributes($mod, $attrs);
+                                       $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
+                                       $this->handler->startElement('span', $attrs);
+                                       
+                                       //tooltip
+                                       $this->handler->startElement('span', array('class'=>'tip'));
+                                       $this->handler->characters($mod->getChanges());
+                                       $this->handler->endElement('span');
+                       
+                                       $changeStarted = true;
+                                       $changeTXT = $mod->getChanges();
+                               } else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
+                                       $attrs = array('class'=>'diff-html-removed');
+                                       if ($mod->isFirstOfID()) {
+                                               $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID();
+                                       }
+                                       $this->addAttributes($mod, $attrs);
+                                       $attrs['onclick'] = 'return tipR(constructToolTipR(this));';
+                                       $this->handler->startElement('span', $attrs);
+                                       $remStarted = true;
+                               }
+
+                               $chars = $child->getText();
+
+                               if ($child instanceof ImageNode) {
+                                       $this->writeImage($child);
+                               } else {
+                                       $this->handler->characters($chars);
+                               }
+
+                       }
+               }
+
+               if ($newStarted) {
+                       $this->handler->endElement('span');
+                       $newStarted = false;
+               } else if ($changeStarted) {
+                       $this->handler->endElement('span');
+                       $changeStarted = false;
+               } else if ($remStarted) {
+                       $this->handler->endElement('span');
+                       $remStarted = false;
+               }
+
+               if (0 != strcasecmp($node->getQName(),'img')
+               && 0 != strcasecmp($node->getQName(),'body'))
+               $this->handler->endElement($node->getQName());
+       }
+
+       private function writeImage(ImageNode  $imgNode){
+               $attrs = $imgNode->getAttributes();
+               if ($imgNode->getModification()->getType() == Modification::REMOVED)
+               $attrs['changeType']='diff-removed-image';
+               else if ($imgNode->getModification()->getType() == Modification::ADDED)
+               $attrs['changeType'] = 'diff-added-image';
+               $attrs['onload'] = 'updateOverlays()';
+               $attrs['onError'] = 'updateOverlays()';
+               $attrs['onAbort'] = 'updateOverlays()';
+               $this->handler->startElement('img', $attrs);
+               $this->handler->endElement('img');
+       }
+
+       private function addAttributes(Modification $mod, /*array*/ &$attrs) {
+               if (is_null($mod->getPrevious())) {
+                       $previous = 'first-' . $this->prefix;
+               } else {
+                       $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-'
+                       . $mod->getPrevious()->getID();
+               }
+               $attrs['previous'] = $previous;
+
+               $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID();
+               $attrs['changeId'] = $changeId;
+
+               if (is_null($mod->getNext())) {
+                       $next = 'last-' . $this->prefix;
+               } else {
+                       $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-'
+                       . $mod->getNext()->getID();
+               }
+               $attrs['next'] = $next;
+       }
+}
+
+class EchoingContentHandler{
+
+       function startElement($qname, /*array*/ $arguments){
+               echo '<'.$qname;
+               foreach($arguments as $key => $value){
+                       echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"';
+               }
+               echo '>';
+       }
+
+       function endElement($qname){
+               echo '</'.$qname.'>';
+       }
+
+       function characters($chars){
+               echo $chars;
+       }
+
+}
+
+class DelegatingContentHandler{
+       
+       private $delegate;
+       
+       function __construct($delegate){
+               $this->delegate=$delegate;
+       }
+
+       function startElement($qname, /*array*/ $arguments){
+               $this->delegate->addHtml('<'.$qname) ;
+               foreach($arguments as $key => $value){
+                       $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"');
+               }
+               $this->delegate->addHtml('>');
+       }
+
+       function endElement($qname){
+               $this->delegate->addHtml('</'.$qname.'>');
+       }
+
+       function characters($chars){
+               $this->delegate->addHtml( $chars );
+       }
+
+}
index b262222..d2075ba 100644 (file)
@@ -74,3 +74,66 @@ table.diff td div {
           table: */
        /* overflow: visible; */
 }
+
+/*
+ * Styles for the HTML Diff
+ */
+span.diff-html-added {
+  font-size: 100%;
+  background-color: #20ff20
+}
+
+span.diff-html-removed {
+  font-size: 100%;
+  text-decoration: line-through;
+  background-color: #ff2020
+}
+
+span.diff-html-changed {
+  background: url(images/diffunderline.gif) bottom repeat-x;
+  *background-color: #c6c6fd; /* light blue */
+}
+
+span.diff-html-added img{
+ border: 5px solid #ccffcc;
+}
+
+span.diff-html-removed img{
+ border: 5px solid #fdc6c6;
+}
+
+span.diff-html-changed img{
+ border: 5px dotted #000099;
+}
+
+span.diff-html-changed  {
+  position: relative;   /* this is key */
+  cursor: help;
+}
+span.diff-html-changed span.tip {
+  display: none;        /* so is this */
+}
+
+/* tooltip will display on :hover event */
+span.diff-html-changed:hover span.tip {
+  display: block;
+  z-index: 95;
+  position: absolute;
+  top: 2.5em;
+  left: 0;
+  width: auto;
+  line-height: 1.2em;
+  padding: 3px 7px 4px 6px;
+  border: 1px solid #336;
+  background-color: #f7f7ee;
+  font-family: arial, helvetica, sans-serif;
+  font-size: 10px;
+  font-weight: normal;
+  color: #000;
+  text-align: left;
+}
+
+
index e80a895..3362f48 100644 (file)
@@ -17,4 +17,16 @@ if (navigator && navigator.product == "Gecko" && navigator.productSub < "2002113
        lastSheet.insertRule(
                "table.diff td div { overflow: visible; }",
                lastSheet.cssRules.length);
-}
\ No newline at end of file
+}
+    
+function tipA(content){
+    return false;
+}
+
+function tipR(content){
+    return false;
+}
+
+function tipC(content){
+    return false;
+}
diff --git a/skins/common/images/diffunderline.gif b/skins/common/images/diffunderline.gif
new file mode 100644 (file)
index 0000000..e062c56
Binary files /dev/null and b/skins/common/images/diffunderline.gif differ