From 89f8141a7f08f3da7c3fad86cbf70dc407693f60 Mon Sep 17 00:00:00 2001 From: Guy Van den Broeck Date: Fri, 15 Aug 2008 11:46:46 +0000 Subject: [PATCH] New implementation of wikidiff3 algorithm replacing the previous one and basic HTML diffing support, both integrated with DifferenceEngine --- includes/Diff.php | 826 +++++----- includes/DifferenceEngine.php | 233 ++- includes/HTMLDiff.php | 1997 +++++++++++++++++++++++++ skins/common/diff.css | 63 + skins/common/diff.js | 14 +- skins/common/images/diffunderline.gif | Bin 0 -> 52 bytes 6 files changed, 2686 insertions(+), 447 deletions(-) create mode 100644 includes/HTMLDiff.php create mode 100644 skins/common/images/diffunderline.gif diff --git a/includes/Diff.php b/includes/Diff.php index 7c4ebcdefa..df8293b5a0 100644 --- a/includes/Diff.php +++ b/includes/Diff.php @@ -18,454 +18,488 @@ */ /** - * Implementation of the Diff algorithm. - * - * DO NOT USE ON PRODUCTION SYSTEMS - * - * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" - * and extended with my own ideas. + * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which + * in turn is based on Myers' "An O(ND) difference algorithm and its variations" + * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s + * "An O(NP) Sequence Comparison Algorithm"). * - * @return array($from_removed, $to_added) - * TRUE if the token was removed or added. + * This implementation supports an upper bound on the excution time. + * + * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space * * @author Guy Van den Broeck + * */ -function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){ - wfProfileIn( __METHOD__ ); - - $m = sizeof($from); - $n = sizeof($to); - - $result_from = array_fill(0,$m,TRUE); - $result_to = array_fill(0,$n,TRUE); - - //reduce the complexity for the next step (intentionally done twice) - //remove common tokens at the start - $i=0; - while($i<$m && $i<$n && $from[$i]===$to[$i]){ - $result_from[$i] = $result_to[$i] = FALSE; - unset($from[$i],$to[$i]); - ++$i; - } +class WikiDiff3{ - //remove common tokens at the end - $j=1; - while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ - $result_from[$m-$j] = $result_to[$n-$j] = FALSE; - unset($from[$m-$j],$to[$n-$j]); - ++$j; + //Input variables + private $from; + private $to; + private $m; + private $n; + + private $tooLong; + private $powLimit; + + //State variables + private $maxDifferences; + + //Output variables + public $length; + public $added; + public $removed; + public $heuristicUsed; + + function __construct($tooLong = 2000000, $powLimit = 1.45){ + $this->tooLong = $tooLong; + $this->powLimit = $powLimit; } - $newFrom = $newFromIndex = $newTo = $newToIndex = array(); + public function diff(/*array*/ $from, /*array*/ $to){ + $m = sizeof($from); + $n = sizeof($to); + + $this->heuristicUsed = FALSE; + + $result_from = $m>0?array_fill(0,$m,true):array(); + $result_to = $n>0?array_fill(0,$n,true):array(); + + //reduce the complexity for the next step (intentionally done twice) + //remove common tokens at the start + $i=0; + while($i<$m && $i<$n && $from[$i]===$to[$i]){ + $result_from[$i] = $result_to[$i] = false; + unset($from[$i],$to[$i]); + ++$i; + } - //remove tokens not in both sequences - $shared = array_fill_keys($from,FALSE); - foreach($to as $index => $el){ - if(array_key_exists($el,$shared)){ - //keep it - $shared[$el] = TRUE; - $newTo[] = $el; - $newToIndex[] = $index; + //remove common tokens at the end + $j=1; + while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ + $result_from[$m-$j] = $result_to[$n-$j] = false; + unset($from[$m-$j],$to[$n-$j]); + ++$j; } - } - foreach($from as $index => $el){ - if($shared[$el]){ - //keep it - $newFrom[] = $el; - $newFromIndex[] = $index; + + $newFrom = $newFromIndex = $newTo = $newToIndex = array(); + + //remove tokens not in both sequences + $shared = array_fill_keys($from,false); + foreach($to as $index => $el){ + if(array_key_exists($el,$shared)){ + //keep it + $this->to[] = $el; + $shared[$el] = true; + $newToIndex[] = $index; + } + } + foreach($from as $index => $el){ + if($shared[$el]){ + //keep it + $this->from[] = $el; + $newFromIndex[] = $index; + } } - } - unset($from, $to, $shared); + unset($shared); - $m2 = sizeof($newFrom); - $n2 = sizeof($newTo); - $offsetx = $offsety = 0; - $from_inLcs = new InLcs($m2); - $to_inLcs = new InLcs($n2); + $this->m = sizeof($this->from); + $this->n = sizeof($this->to); + $this->removed = $this->m>0?array_fill(0,$this->m,true):array(); + $this->added = $this->n>0?array_fill(0,$this->n,true):array(); - wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound); + $this->longestCommonSubsequence(); - foreach($from_inLcs->inLcs as $key => $in){ - if($in){ - $result_from[$newFromIndex[$key]] = FALSE; + $this->m = $m; + $this->n = $n; + $this->length += $i+$j-1; + + foreach($this->removed as $key => $removed){ + if(!$removed){ + $result_from[$newFromIndex[$key]] = false; + } } - } - foreach($to_inLcs->inLcs as $key => $in){ - if($in){ - $result_to[$newToIndex[$key]] = FALSE; + foreach($this->added as $key => $added){ + if(!$added){ + $result_to[$newToIndex[$key]] = false; + } } + unset($this->added, $this->removed); + return array($result_from, $result_to); } - - wfProfileOut( __METHOD__ ); - return array($result_from, $result_to); -} -function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){ - if($bestKnownLcs==0 || $m==0 || $n==0){ - return; - } - - if($m>$n){ - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound); - } + public function longestCommonSubsequence() { + if ($this->m == 0 || $this->n == 0) { + $this->length = 0; + return; + } - $a_inLcs_sym = &$a_inLcs->inLcs; - $b_inLcs_sym = &$b_inLcs->inLcs; - - //remove common tokens at the start - while($m>0 && $n>0 && $a[0]===$b[0]){ - $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE; - ++$offsetx; ++$offsety; - --$m; --$n; - --$bestKnownLcs; - array_shift($a); - array_shift($b); - } + $this->maxDifferences = ceil(($this->m + $this->n) / 2.0); + if ($this->m * $this->n > $this->tooLong) { + // limit complexity to D^POW_LIMIT for long sequences + $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0)); + wfDebug("Limiting max number of differences to $this->maxDifferences"); + } - //remove common tokens at the end - while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){ - $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE; - --$m; --$n; - --$bestKnownLcs; - array_pop($a); - array_pop($b); - } + /* + * The common prefixes and suffixes are always part of some LCS, include + * them now to reduce our search space + */ + $max = min($this->m, $this->n); + for ($forwardBound = 0; $forwardBound < $max + && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) { + $this->removed[$forwardBound] = $this->added[$forwardBound] = false; + } - if($bestKnownLcs==0 || $m==0 || $n==0){ - return; - } - if($m>$n){ - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound); + $backBoundL1 = $this->m - 1; + $backBoundL2 = $this->n - 1; + + while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound + && $this->from[$backBoundL1] === $this->to[$backBoundL2]) { + $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; + } + + $temp = array_fill(0,$this->m + $this->n + 1, 0); + $V = array($temp,$temp); + $snake = array(0,0,0); + + $this->length = $forwardBound + $this->m - $backBoundL1 - 1 + + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, + $V, $snake); } - $delta = $n-$m; - $delta_plus_1 = $delta+1; - $delta_min_1 = $delta-1; - - $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1); - $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0); - $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n); - $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array(); - $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1; - - if($boundRunningTime){ - $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2); - if($maxp_before_bound>=$m){ - $boundRunningTime = false; - unset($maxp_before_bound); + private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) { + // check that both sequences are non-empty + if ($bottoml1 > $topl1 || $bottoml2 > $topl2) { + return 0; } + + $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake); + + // need to store these so we don't lose them when they're overwritten by + // the recursion + $len = $snake[2]; + $startx = $snake[0]; + $starty = $snake[1]; + + // the middle snake is part of the LCS, store it + for ($i = 0; $i < $len; ++$i) { + $this->removed[$startx + $i] = $this->added[$starty + $i] = false; + } + + if ($d > 1) { + return $len + + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake) + + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake); + } else if ($d == 1) { + /* + * In this case the sequences differ by exactly 1 line. We have + * already saved all the lines after the difference in the for loop + * above, now we need to save all the lines before the difference. + */ + $max = min($startx - $bottoml1, $starty - $bottoml2); + for ($i = 0; $i < $max; ++$i) { + $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false; + } + return $max + $len; + } + return $len; } - $p=-1; - $m_min_1 = $m-1; - $maxp=$m_min_1-$bestLcsLength; - - while($p<$maxp){ - - if($boundRunningTime && $p>$maxp_before_bound){ - // bound the running time by stopping early - if($bestLcsLength>=0){ - // accept the best LCS thusfar - break; - }else{ - $bestLcsProgressForw = $bestkForw = 0; - foreach($lcsSizeForw as $k => $localLcsProgress){ - if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){ - $bestLcsProgressForw = $localLcsProgress; - $bestkForw = $k; + private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) { + $from = &$this->from; + $to = &$this->to; + $V0 = &$V[0]; + $V1 = &$V[1]; + $snake0 = &$snake[0]; + $snake1 = &$snake[1]; + $snake2 = &$snake[2]; + $bottoml1_min_1 = $bottoml1-1; + $bottoml2_min_1 = $bottoml2-1; + $N = $topl1 - $bottoml1_min_1; + $M = $topl2 - $bottoml2_min_1; + $delta = $N - $M; + $maxabsx = $N+$bottoml1; + $maxabsy = $M+$bottoml2; + $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2) + + //value_to_add_forward: a 0 or 1 that we add to the start + // offset + // to make it odd/even + if (($M & 1) == 1) { + $value_to_add_forward = 1; + } else { + $value_to_add_forward = 0; + } + + if (($N & 1) == 1) { + $value_to_add_backward = 1; + } else { + $value_to_add_backward = 0; + } + + $start_forward = -$M; + $end_forward = $N; + $start_backward = -$N; + $end_backward = $M; + + $limit_min_1 = $limit-1; + $limit_plus_1 = $limit+1; + + $V0[$limit_plus_1] = 0; + $V1[$limit_min_1] = $N; + $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2) + + if (($delta & 1) == 1) { + for ($d = 0; $d <= $limit; ++$d) { + $start_diag = max($value_to_add_forward + $start_forward, -$d); + $end_diag = min($end_forward, $d); + $value_to_add_forward = 1 - $value_to_add_forward; + + // compute forward furthest reaching paths + for ($k = $start_diag; $k <= $end_diag; $k += 2) { + if ($k == -$d + || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) { + $x = $V0[$limit_plus_1 + $k]; + } else { + $x = $V0[$limit_min_1 + $k] + 1; } - } - $bestLcsProgressBack = $bestkBack = 0; - foreach($lcsSizeBack as $k => $localLcsProgress){ - if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){ - $bestLcsProgressBack = $localLcsProgress; - $bestkBack = $k; + + $absx = $snake0 = $x + $bottoml1; + $absy = $snake1 = $x - $k + $bottoml2; + + while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { + ++$absx; + ++$absy; } - } - if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){ - // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway. - }else{ - // lets do this - $topSnakeStart = $snakeBeginForw[$bestkForw]; - $topSnakeEnd = $snakeEndForw[$bestkForw]; - $topSnakek = $snakekForw[$bestkForw]; - $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd; - - $bottomSnakeStart = $snakeEndBack[$bestkBack]+1; - $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1; - $bottomSnakek = $snakekBack[$bestkBack]; - $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd; - - if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){ - // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered) - // also diff the middle now - if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){ - // do the middle diff between the snakes - wfDebug(" Limiting diff run time from both sides, middle between snakes\n"); - $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek); - $n_t = $bottomSnakeStart-$topSnakeEnd; - $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t); - $b_t = array_slice($b, $topSnakeEnd, $n_t); - $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek); - $offsety_t = $offsety+$topSnakeEnd; - }else{ - // the snake is too early in the sequence, do the middle diff between endpoints of progress - wfDebug(" Limiting diff run time from both sides, middle between endpoints\n"); - $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw); - $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw]; - $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t); - $b_t = array_slice($b, $fpForw[$bestkForw], $n_t); - $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw); - $offsety_t = $offsety+$fpForw[$bestkForw]; - } - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound); - }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){ - // the bottom snake is more interesting - wfDebug("Limiting diff run time from bottom side\n"); - $topSnakeStart = $bottomSnakeStart; - $topSnakeEnd = $bottomSnakeEnd; - $topSnakek = $bottomSnakek; - $bestLcsLengthTop = $topSnakeEnd-$topSnakek; - $bottomSnakeStart = $bottomSnakeEnd; - }else{ - // the top snake is more interesting - wfDebug(" Limiting diff run time from top side\n"); - $bottomSnakeStart = $topSnakeEnd; - $bottomSnakeEnd = $topSnakeEnd; - $bottomSnakek = $topSnakek; - $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek); + $x = $absx-$bottoml1; + + $snake2 = $absx -$snake0; + $V0[$limit + $k] = $x; + if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1 + && $x >= $V1[$limit + $k - $delta]) { + return 2 * $d - 1; + } + + // check to see if we can cut down the diagonal range + if ($x >= $N && $end_forward > $k - 1) { + $end_forward = $k - 1; + } else if ($absy-$bottoml2 >= $M) { + $start_forward = $k + 1; + $value_to_add_forward = 0; } - break; - } - } - } - ++$p; - $overlap[-$p] = $overlap[$delta+$p] = FALSE; - - $min_p_min_1 = -$p-1; - $delta_plus_1_plus_p = $delta_plus_1+$p; - - // forward - $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] = - $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1; - $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0; - - $k=-$p; - do { - $k_plus_1 = $k+1; - $k_min_1 = $k-1; - - $fpBelow = $fpForw[$k_min_1]+1; - $fpAbove = $fpForw[$k_plus_1]; - $y = &$fpForw[$k]; - if($fpBelow>$fpAbove){ - $oldy = $y = $fpBelow; - $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1]; - $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1]; - $snakeEndForw[$k] = $snakeEndForw[$k_min_1]; - $snakekForw[$k] = $snakekForw[$k_min_1]; - }else{ - $oldy = $y = $fpAbove; - $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1]; - $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1]; - $snakeEndForw[$k] = $snakeEndForw[$k_plus_1]; - $snakekForw[$k] = $snakekForw[$k_plus_1]; - } - $x = $y-$k; - while($x < $m && $y < $n && $a[$x]===$b[$y]){ - ++$x; - ++$y; - } - if($y-$oldy>0){ - $lcsSizeForw[$k] += $y-$oldy; - $snakeBeginForw[$k] = $oldy; - $snakeEndForw[$k]= $y; - $snakekForw[$k] = $k; - } - if($y>=$fpBack[$k] && !$overlap[$k]){ - // there is overlap - $overlap[$k]=TRUE; - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k]; - if($y>$fpBack[$k]+1){ - $snakeoverlap = $y-$fpBack[$k]-1; - $lcsLength -= $snakeoverlap; } - if($lcsLength>$bestLcsLength){ - // a better sequence found! - $bestLcsLength = $lcsLength; - - $topSnakeStart = $snakeBeginForw[$k]; - $topSnakeEnd = $snakeEndForw[$k]; - $topSnakek = $snakekForw[$k]; - - // aligned snakes bite (\ ) - // ( \ \) - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k])); - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart); - $bottomSnakek = $snakekBack[$k]; - - if($bottomSnakeEnd<$y){ - $bottomSnakeStart = $y; - $bottomSnakeEnd = $y; - $bottomSnakek = $k; + + $start_diag = max($value_to_add_backward + $start_backward, -$d); + $end_diag = min($end_backward, $d); + $value_to_add_backward = 1 - $value_to_add_backward; + + // compute backward furthest reaching paths + for ($k = $start_diag; $k <= $end_diag; $k += 2) { + if ($k == $d + || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) { + $x = $V1[$limit_min_1 + $k]; + } else { + $x = $V1[$limit_plus_1 + $k] - 1; } - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]); - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]); - if($bestKnownLcs==$lcsLength){ - $fpForw[$k]=$y; - break 2; + $y = $x - $k - $delta; + + $snake2 = 0; + while ($x > 0 && $y > 0 + && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) { + --$x; + --$y; + ++$snake2; + } + $V1[$limit + $k] = $x; + + // check to see if we can cut down our diagonal range + if ($x <= 0) { + $start_backward = $k + 1; + $value_to_add_backward = 0; + } else if ($y <= 0 && $end_backward > $k - 1) { + $end_backward = $k - 1; } - $maxp=$m_min_1-$bestLcsLength; } } - if($k<$delta_min_1){ - ++$k; - }else if($k>$delta){ - --$k; - }else if($k==$delta_min_1){ - $k = $delta+$p; - }else{ - break; - } - } while(TRUE); - - // backward - $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] = - $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n; - $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0; - - $k=$delta+$p; - do { - $k_plus_1 = $k+1; - $k_min_1 = $k-1; - - $fpBelow = $fpBack[$k_min_1]; - $fpAbove = $fpBack[$k_plus_1]-1; - $y = &$fpBack[$k]; - if($fpBelow<$fpAbove){ - $oldy = $y = $fpBelow; - $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1]; - $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1]; - $snakeEndBack[$k] = $snakeEndBack[$k_min_1]; - $snakekBack[$k] = $snakekBack[$k_min_1]; - }else{ - $oldy = $y = $fpAbove; - $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1]; - $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1]; - $snakeEndBack[$k] = $snakeEndBack[$k_plus_1]; - $snakekBack[$k] = $snakekBack[$k_plus_1]; - } - $x = $y-$k; - while($x > -1 && $y > -1 && $a[$x]===$b[$y]){ - --$x; - --$y; - } - if($oldy-$y>0){ - $lcsSizeBack[$k] += $oldy-$y; - $snakeBeginBack[$k] = $oldy; - $snakeEndBack[$k] = $y; - $snakekBack[$k] = $k; - } - if($fpForw[$k]>=$y && !$overlap[$k]){ - // there is overlap - $overlap[$k]=TRUE; - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k]; - if($fpForw[$k]>$y+1){ - $snakeoverlap = $fpForw[$k]-$y-1; - $lcsLength -= $snakeoverlap; + } else { + for ($d = 0; $d <= $limit; ++$d) { + $start_diag = max($value_to_add_forward + $start_forward, -$d); + $end_diag = min($end_forward, $d); + $value_to_add_forward = 1 - $value_to_add_forward; + + // compute forward furthest reaching paths + for ($k = $start_diag; $k <= $end_diag; $k += 2) { + if ($k == -$d + || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) { + $x = $V0[$limit_plus_1 + $k]; + } else { + $x = $V0[$limit_min_1 + $k] + 1; + } + + $absx = $snake0 = $x + $bottoml1; + $absy = $snake1 = $x - $k + $bottoml2; + + while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { + ++$absx; + ++$absy; + } + $x = $absx-$bottoml1; + $snake2 = $absx -$snake0; + $V0[$limit + $k] = $x; + + // check to see if we can cut down the diagonal range + if ($x >= $N && $end_forward > $k - 1) { + $end_forward = $k - 1; + } else if ($absy-$bottoml2 >= $M) { + $start_forward = $k + 1; + $value_to_add_forward = 0; + } } - if($lcsLength>$bestLcsLength){ - // a better sequence found! - $bestLcsLength = $lcsLength; - - $topSnakeStart = $snakeBeginForw[$k]; - $topSnakeEnd = $snakeEndForw[$k]; - $topSnakek = $snakekForw[$k]; - - // aligned snakes bite (\ ) - // ( \ \) - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k])); - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart); - $bottomSnakek = $snakekBack[$k]; - - if($bottomSnakeEnd<$fpForw[$k]){ - $bottomSnakeStart = $fpForw[$k]; - $bottomSnakeEnd = $fpForw[$k]; - $bottomSnakek = $k; + + $start_diag = max($value_to_add_backward + $start_backward, -$d); + $end_diag = min($end_backward, $d); + $value_to_add_backward = 1 - $value_to_add_backward; + + // compute backward furthest reaching paths + for ($k = $start_diag; $k <= $end_diag; $k += 2) { + if ($k == $d + || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) { + $x = $V1[$limit_min_1 + $k]; + } else { + $x = $V1[$limit_plus_1 + $k] - 1; + } + + $y = $x - $k - $delta; + + $snake2 = 0; + while ($x > 0 && $y > 0 + && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) { + --$x; + --$y; + ++$snake2; } + $V1[$limit + $k] = $x; - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]); - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]); - if($bestKnownLcs==$lcsLength){ - $fpBack[$k] = $y; - break 2; + if ($k >= -$delta - $d && $k <= $d - $delta + && $x <= $V0[$limit + $k + $delta]) { + $snake0 = $bottoml1 + $x; + $snake1 = $bottoml2 + $y; + return 2 * $d; + } + + // check to see if we can cut down our diagonal range + if ($x <= 0) { + $start_backward = $k + 1; + $value_to_add_backward = 0; + } else if ($y <= 0 && $end_backward > $k - 1) { + $end_backward = $k - 1; } - $maxp=$m_min_1-$bestLcsLength; } } - if($k>1){ - --$k; - }else if($k<0){ - ++$k; - }else if($k==1){ - $k = -$p; - }else{ - break; - } - } while(TRUE); + } + /* + * computing the true LCS is too expensive, instead find the diagonal + * with the most progress and pretend a midle snake of length 0 occurs + * there. + */ + + $most_progress = self::findMostProgress($M, $N, $limit, $V); + + $snake0 = $bottoml1 + $most_progress[0]; + $snake1 = $bottoml2 + $most_progress[1]; + $snake2 = 0; + wfDebug('Computing the LCS is too expensive. Using a heuristic.'); + $this->heuristicUsed = true; + return 5; /* + * HACK: since we didn't really finish the LCS computation + * we don't really know the length of the SES. We don't do + * anything with the result anyway, unless it's <=1. We know + * for a fact SES > 1 so 5 is as good a number as any to + * return here + */ } - unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack); - unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack); - unset($overlap); + private static function findMostProgress($M, $N, $limit, $V) { + $delta = $N - $M; - // Mark snakes as in LCS - $maxi = $offsetx+$topSnakeEnd-$topSnakek; - for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){ - $a_inLcs_sym[$i] = TRUE; - } - $maxi = $offsety+$topSnakeEnd; - for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){ - $b_inLcs_sym[$i] = TRUE; - } - $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek; - for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){ - $a_inLcs_sym[$i] = TRUE; - } - $maxi = $offsety+$bottomSnakeEnd; - for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){ - $b_inLcs_sym[$i] = TRUE; - } + if (($M & 1) == ($limit & 1)) { + $forward_start_diag = max(-$M, -$limit); + } else { + $forward_start_diag = max(1 - $M, -$limit); + } - $m_t = $topSnakeStart-$topSnakek; - $a_t = array_slice($a, 0, $m_t); - $b_t = array_slice($b, 0, $topSnakeStart); + $forward_end_diag = min($N, $limit); - $m_b = $m-($bottomSnakeEnd-$bottomSnakek); - $n_b = $n-$bottomSnakeEnd; - $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b); - $b_b = array_slice($b, $bottomSnakeEnd, $n_b); + if (($N & 1) == ($limit & 1)) { + $backward_start_diag = max(-$N, -$limit); + } else { + $backward_start_diag = max(1 - $N, -$limit); + } - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound); + $backward_end_diag = -min($M, $limit); - wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound); -} + $temp = array(0,0,0); -class InLcs { - public $inLcs; + $max_progress = array_fill(0,ceil(max($forward_end_diag + - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp); + $num_progress = 0; // the 1st entry is current, it is initialized + // with 0s - function __construct($length){ - $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array(); - } + // first search the forward diagonals + for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) { + $x = $V[0][$limit + $k]; + $y = $x - $k; + if ($x > $N || $y > $M) { + continue; + } + + $progress = $x + $y; + if ($progress > $max_progress[0][2]) { + $num_progress = 0; + $max_progress[0][0] = $x; + $max_progress[0][1] = $y; + $max_progress[0][2] = $progress; + } else if ($progress == $max_progress[0][2]) { + ++$num_progress; + $max_progress[$num_progress][0] = $x; + $max_progress[$num_progress][1] = $y; + $max_progress[$num_progress][2] = $progress; + } + } + + $max_progress_forward = true; // initially the maximum + // progress is in the forward + // direction - /** - * Get the length of the Longest Common Subsequence (computed) - */ - public function getLcsLength(){ - return array_sum($this->inLcs); + // now search the backward diagonals + for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) { + $x = $V[1][$limit + $k]; + $y = $x - $k - $delta; + if ($x < 0 || $y < 0) { + continue; + } + + $progress = $N - $x + $M - $y; + if ($progress > $max_progress[0][2]) { + $num_progress = 0; + $max_progress_forward = false; + $max_progress[0][0] = $x; + $max_progress[0][1] = $y; + $max_progress[0][2] = $progress; + } else if ($progress == $max_progress[0][2] && !$max_progress_forward) { + ++$num_progress; + $max_progress[$num_progress][0] = $x; + $max_progress[$num_progress][1] = $y; + $max_progress[$num_progress][2] = $progress; + } + } + + // return the middle diagonal with maximal progress. + return $max_progress[floor($num_progress / 2)]; } } + diff --git a/includes/DifferenceEngine.php b/includes/DifferenceEngine.php index 1df4fea583..c02eab2a33 100644 --- a/includes/DifferenceEngine.php +++ b/includes/DifferenceEngine.php @@ -74,9 +74,10 @@ class DifferenceEngine { } function showDiffPage( $diffOnly = false ) { - global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol; + global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff; wfProfileIn( __METHOD__ ); + # If external diffs are enabled both globally and for the user, # we'll use the application/x-external-editor interface to call # an external diff tool like kompare, kdiff3, etc. @@ -265,11 +266,16 @@ CONTROL; '
' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "
" . '
' . $nextlink . $patrol . '
'; - $this->showDiff( $oldHeader, $newHeader ); + if($wgEnableHtmlDiff){ + $this->renderHtmlDiff(); + }else{ - if ( !$diffOnly ) - $this->renderNewRevision(); + $this->showDiff( $oldHeader, $newHeader ); + if ( !$diffOnly ){ + $this->renderNewRevision(); + } + } wfProfileOut( __METHOD__ ); } @@ -319,6 +325,65 @@ CONTROL; wfProfileOut( __METHOD__ ); } + + function renderHtmlDiff() { + global $wgOut, $IP; + wfProfileIn( __METHOD__ ); + + $this->showDiffStyle(); + + require_once( "$IP/includes/HTMLDiff.php" ); + + #add deleted rev tag if needed + if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) { + $wgOut->addWikiMsg( 'rev-deleted-text-permission' ); + } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) { + $wgOut->addWikiMsg( 'rev-deleted-text-view' ); + } + + if( !$this->mNewRev->isCurrent() ) { + $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false ); + } + + $this->loadText(); + + if( is_object( $this->mOldRev ) ) { + $wgOut->setRevisionId( $this->mOldRev->getId() ); + } + + global $wgTitle, $wgParser, $wgTitle; + $popts = $wgOut->parserOptions(); + $oldTidy = $popts->setTidy( TRUE ); + + $parserOutput = $wgParser->parse($this->mOldtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId ); + $popts->setTidy( $oldTidy ); + + //only for new? + //$wgOut->addParserOutputNoText( $parserOutput ); + $oldHtml = $parserOutput->getText(); + wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$oldHtml ) ); + + if( is_object( $this->mNewRev ) ) { + $wgOut->setRevisionId( $this->mNewRev->getId() ); + } + + $popts = $wgOut->parserOptions(); + $oldTidy = $popts->setTidy( TRUE ); + + $parserOutput = $wgParser->parse($this->mNewtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId ); + $popts->setTidy( $oldTidy ); + + //only for new? + $wgOut->addParserOutputNoText( $parserOutput ); + $newHtml = $parserOutput->getText(); + wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) ); + + $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut)); + $differ->htmlDiff($oldHtml, $newHtml); + + wfProfileOut( __METHOD__ ); + } + /** * Show the first revision of an article. Uses normal diff headers in * contrast to normal "old revision" display style. @@ -892,6 +957,30 @@ class _DiffOp_Change extends _DiffOp { } } +/** + * Alternative representation of a set of changes, by the index + * ranges that are changed. + */ +class RangeDifference { + + public $leftstart; + public $leftend; + public $leftlength; + + public $rightstart; + public $rightend; + public $rightlength; + + function __construct($leftstart, $leftend, $rightstart, $rightend){ + $this->leftstart = $leftstart; + $this->leftend = $leftend; + $this->leftlength = $leftend-$leftstart; + $this->rightstart = $rightstart; + $this->rightend = $rightend; + $this->rightlength = $rightend-$rightstart; + } +} + /** * Class used internally by Diff to actually compute the diffs. * @@ -920,22 +1009,108 @@ class _DiffEngine { const MAX_XREF_LENGTH = 10000; - function diff ($from_lines, $to_lines) { + function diff ($from_lines, $to_lines){ wfProfileIn( __METHOD__ ); - global $wgExternalDiffEngine; + // Diff and store locally + $this->diff_local($from_lines, $to_lines); + + // Merge edits when possible + $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); + $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); + + // Compute the edit operations. + $n_from = sizeof($from_lines); + $n_to = sizeof($to_lines); + + $edits = array(); + $xi = $yi = 0; + while ($xi < $n_from || $yi < $n_to) { + USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); + USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); + + // Skip matching "snake". + $copy = array(); + while ( $xi < $n_from && $yi < $n_to + && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { + $copy[] = $from_lines[$xi++]; + ++$yi; + } + if ($copy) + $edits[] = new _DiffOp_Copy($copy); + + // Find deletes & adds. + $delete = array(); + while ($xi < $n_from && $this->xchanged[$xi]) + $delete[] = $from_lines[$xi++]; + + $add = array(); + while ($yi < $n_to && $this->ychanged[$yi]) + $add[] = $to_lines[$yi++]; + if ($delete && $add) + $edits[] = new _DiffOp_Change($delete, $add); + elseif ($delete) + $edits[] = new _DiffOp_Delete($delete); + elseif ($add) + $edits[] = new _DiffOp_Add($add); + } + wfProfileOut( __METHOD__ ); + return $edits; + } + + function diff_range ($from_lines, $to_lines){ + wfProfileIn( __METHOD__ ); + + // Diff and store locally + $this->diff_local($from_lines, $to_lines); + + // Compute the ranges operations. $n_from = sizeof($from_lines); $n_to = sizeof($to_lines); + $ranges = array(); + $xi = $yi = 0; + while ($xi < $n_from || $yi < $n_to) { + // Matching "snake". + while ( $xi < $n_from && $yi < $n_to + && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { + ++$xi; + ++$yi; + } + // Find deletes & adds. + $xstart = $xi; + while ($xi < $n_from && $this->xchanged[$xi]){ + ++$xi; + } + + $ystart = $yi; + while ($yi < $n_to && $this->ychanged[$yi]){ + ++$yi; + } + + if ($xi>$xstart || $yi>$ystart){ + $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi); + } + } + wfProfileOut( __METHOD__ ); + return $ranges; + } + + function diff_local ($from_lines, $to_lines) { + global $wgExternalDiffEngine; + wfProfileIn( __METHOD__); + if($wgExternalDiffEngine == 'wikidiff3'){ // wikidiff3 global $IP; require_once( "$IP/includes/Diff.php" ); - list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000); + $wikidiff3 = new WikiDiff3(); + list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines); }else{ // old diff - wfProfileIn( __METHOD__ ." - basicdiff"); + $n_from = sizeof($from_lines); + $n_to = sizeof($to_lines); $this->xchanged = $this->ychanged = array(); $this->xv = $this->yv = array(); $this->xind = $this->yind = array(); @@ -980,48 +1155,8 @@ class _DiffEngine { // Find the LCS. $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv)); - wfProfileOut( __METHOD__ ." - basicdiff"); - } - - // Merge edits when possible - $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); - $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); - - // Compute the edit operations. - $edits = array(); - $xi = $yi = 0; - while ($xi < $n_from || $yi < $n_to) { - USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); - USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); - - // Skip matching "snake". - $copy = array(); - while ( $xi < $n_from && $yi < $n_to - && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { - $copy[] = $from_lines[$xi++]; - ++$yi; - } - if ($copy) - $edits[] = new _DiffOp_Copy($copy); - - // Find deletes & adds. - $delete = array(); - while ($xi < $n_from && $this->xchanged[$xi]) - $delete[] = $from_lines[$xi++]; - - $add = array(); - while ($yi < $n_to && $this->ychanged[$yi]) - $add[] = $to_lines[$yi++]; - - if ($delete && $add) - $edits[] = new _DiffOp_Change($delete, $add); - elseif ($delete) - $edits[] = new _DiffOp_Delete($delete); - elseif ($add) - $edits[] = new _DiffOp_Add($add); } wfProfileOut( __METHOD__ ); - return $edits; } /** @@ -1077,7 +1212,6 @@ class _DiffEngine { $numer = $xlim - $xoff + $nchunks - 1; $x = $xoff; for ($chunk = 0; $chunk < $nchunks; $chunk++) { - wfProfileIn( __METHOD__ . "-chunk" ); if ($chunk > 0) for ($i = 0; $i <= $this->lcs; $i++) $ymids[$i][$chunk-1] = $this->seq[$i]; @@ -1130,7 +1264,6 @@ class _DiffEngine { if ($end == 0 || $ypos > $this->seq[$end]) { $this->seq[++$this->lcs] = $ypos; $this->in_seq[$ypos] = 1; - wfProfileOut( __METHOD__ ); return $this->lcs; } diff --git a/includes/HTMLDiff.php b/includes/HTMLDiff.php new file mode 100644 index 0000000000..7e1cc16547 --- /dev/null +++ b/includes/HTMLDiff.php @@ -0,0 +1,1997 @@ + + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * or see http://www.gnu.org/ + */ + + +/** + * Any element in the DOM tree of an HTML document. + */ +abstract class Node{ + + protected $parent; + + + function __construct($parent){ + $this->parent = $parent; + if(!is_null($parent)){ + $parent->addChild($this); + } + } + + public function getParent(){ + return $this->parent; + } + + public function getParentTree(){ + if(!is_null($this->parent)){ + $parentTree = $this->parent->getParentTree(); + $parentTree[] = $this->parent; + return $parentTree; + }else{ + return array(); + } + } + + public abstract function getMinimalDeletedSet($id); + + public function detectIgnorableWhiteSpace(){ + // no op + } + + public function getLastCommonParent(Node $other){ + if(is_null($other)) + throw new Exception('The given node is NULL'); + + $result = new LastCommonParentResult(); + + $myParents = $this->getParentTree(); + $otherParents = $other->getParentTree(); + + $i = 1; + $isSame = true; + while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) { + if (!$myParents[$i]->isSameTag($otherParents[$i])) { + $isSame = false; + } else { + // After the while, the index i-1 must be the last common parent + $i++; + } + } + + $result->setLastCommonParentDepth($i - 1); + $result->setLastCommonParent($myParents[$i - 1]); + + if (!$isSame) { + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i])); + $result->setSplittingNeeded(); + } else if (sizeof($myParents) < sizeof($otherParents)) { + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this)); + } else if (sizeof($myParents) > sizeof($otherParents)) { + // All tags matched but there are tags left in this tree + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i])); + $result->setSplittingNeeded(); + } else { + // All tags matched untill the very last one in both trees + // or there were no tags besides the BODY + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this)); + } + return $result; + } + + public function setParent($parent) { + $this->parent = $parent; + } + + public abstract function copyTree(); + + public function inPre() { + $tree = $this->getParentTree(); + foreach ($tree as $ancestor) { + if ($ancestor->isPre()) { + return true; + } + } + return false; + } + + private $whiteBefore = false; + + private $whiteAfter = false; + + public function isWhiteBefore() { + return $this->whiteBefore; + } + + public function setWhiteBefore($whiteBefore) { + $this->whiteBefore = $whiteBefore; + } + + public function isWhiteAfter() { + return $this->whiteAfter; + } + + public function setWhiteAfter($whiteAfter) { + $this->whiteAfter = $whiteAfter; + } + + public abstract function getLeftMostChild(); + + public abstract function getRightMostChild(); +} + +/** + * Node that can contain other nodes. Represents an HTML tag. + */ +class TagNode extends Node{ + + public $children = array(); + + protected $qName; + + protected $attributes = array(); + + function __construct($parent, $qName, /*array*/ $attributes) { + parent::__construct($parent); + $this->qName = strtolower($qName); + foreach($attributes as $key => $value){ + $this->attributes[strtolower($key)] = $value; + } + } + + public function addChild(Node $node, $index=-1) { + if ($node->getParent() !== $this) + throw new Exception( + 'The new child must have this node as a parent.'); + if($index == -1){ + $this->children[] = $node; + }else{ + array_splice($this->children,$index,0,array($node)); + } + } + + public function getIndexOf(Node $child) { + // don't trust array_search with objects + foreach($this->children as $key=>$value){ + if($value === $child){ + return $key; + } + } + return NULL; + } + + public function getChild($i) { + return $this->children[$i]; + } + + public function getNbChildren() { + return count($this->children); + } + + public function getQName() { + return $this->qName; + } + + public function getAttributes() { + return $this->attributes; + } + + public function isSameTag(TagNode $other) { + if (is_null($other)) + return false; + return $this->getOpeningTag() === $other->getOpeningTag(); + } + + public function getOpeningTag() { + $s = '<'.$this->getQName(); + foreach ($this->attributes as $attribute => $value) { + $s .= ' ' . $attribute . '="' . $value . '"'; + } + return $s .= '>'; + } + + public function getEndTag() { + return 'getQName() + '>"'; + } + + public function getMinimalDeletedSet($id) { + $nodes = array(); + + if ($this->getNbChildren() == 0) + return $nodes; + + $hasNotDeletedDescendant = false; + + foreach ($this->children as $child) { + $childrenChildren = $child->getMinimalDeletedSet($id); + $nodes = array_merge($nodes, $childrenChildren); + if (!$hasNotDeletedDescendant + && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) { + // This child is not entirely deleted + $hasNotDeletedDescendant = true; + } + } + if (!$hasNotDeletedDescendant) { + $nodes = array($this); + } + return $nodes; + } + + public function toString() { + return $this->getOpeningTag(); + } + + public function splitUntill(TagNode $parent, Node $split, $includeLeft) { + $splitOccured = false; + if ($parent !== $this) { + $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes()); + $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes()); + $part1->setParent($this->getParent()); + $part2->setParent($this->getParent()); + + $i = 0; + $nbChildren = $this->getNbChildren(); + while ($i < $nbChildren && $this->children[$i] !== $split) { + $this->children[$i]->setParent($part1); + $part1->addChild($this->children[$i]); + ++$i; + } + if ($i < $nbChildren) { + if ($includeLeft) { + $this->children[$i]->setParent($part1); + $part1->addChild($this->children[$i]); + } else { + $this->children[$i]->setParent($part2); + $part2->addChild($this->children[$i]); + } + ++$i; + } + while ($i < $nbChildren) { + $this->children[$i]->setParent($part2); + $part2->addChild($this->children[$i]); + ++$i; + } + $myindexinparent = $this->parent->getIndexOf($this); + if ($part1->getNbChildren() > 0) + $this->parent->addChild($part1,$myindexinparent); + + if ($part2->getNbChildren() > 0) + $this->parent->addChild($part2,$myindexinparent); + + if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) { + $splitOccured = true; + } + + $this->parent->removeChild($myindexinparent); + + if ($includeLeft) + $this->parent->splitUntill($parent, $part1, $includeLeft); + else + $this->parent->splitUntill($parent, $part2, $includeLeft); + } + return $splitOccured; + + } + + private function removeChild($index) { + unset($this->children[$index]); + $this->children = array_values($this->children); + } + + public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE, + 'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE, + 'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE); + + public static function isBlockLevelName($qName) { + return array_key_exists(strtolower($qName),self::$blocks); + } + + public static function isBlockLevelNode(Node $node) { + if(! $node instanceof TagNode) + return false; + return self::isBlockLevelName($node->getQName()); + } + + public function isBlockLevel() { + return  self::isBlockLevelNode($this); + } + + public static function isInlineName($qName) { + return !self::isBlockLevelName($qName); + } + + public static function isInlineNode(Node $node) { + return !self::isBlockLevelNode($node); + } + + public function isInline() { + return self::isInlineNode($this); + } + + public function copyTree() { + $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes()); + $newThis->setWhiteBefore($this->isWhiteBefore()); + $newThis->setWhiteAfter($this->isWhiteAfter()); + foreach($this->children as $child) { + $newChild = $child->copyTree(); + $newChild->setParent($newThis); + $newThis->addChild($newChild); + } + return $newThis; + } + + public function getMatchRatio(TagNode $other) { + $txtComp = new TextOnlyComparator($other); + return $txtComp->getMatchRatio(new TextOnlyComparator($this)); + } + + public function expandWhiteSpace() { + $shift = 0; + $spaceAdded = false; + + $nbOriginalChildren = $this->getNbChildren(); + for ($i = 0; $i < $nbOriginalChildren; ++$i) { + $child = $this->getChild($i + $shift); + + if($child instanceof TagNode){ + if (!$child->isPre()) { + $child->expandWhiteSpace(); + } + } + if (!$spaceAdded && $child->isWhiteBefore()) { + $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild()); + $ws->setParent($this); + $this->addChild($ws,$i + ($shift++)); + } + if ($child->isWhiteAfter()) { + $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild()); + $ws->setParent($this); + $this->addChild($ws,$i + 1 + ($shift++)); + $spaceAdded = true; + } else { + $spaceAdded = false; + } + + } + } + + public function getLeftMostChild() { + if ($this->getNbChildren() < 1) + return $this; + return $this->getChild(0)->getLeftMostChild(); + + } + + public function getRightMostChild() { + if ($this->getNbChildren() < 1) + return $this; + return $this->getChild($this->getNbChildren() - 1)->getRightMostChild(); + } + + public function isPre() { + return 0 == strcasecmp($this->getQName(),'pre'); + } + + public static function toDiffLine(TagNode $node){ + return $node->getOpeningTag(); + } +} + +/** + * Represents a piece of text in the HTML file. + */ +class TextNode extends Node{ + + private $s; + + private $modification; + + function __construct($parent, $s) { + parent::__construct($parent); + $this->modification = new Modification(Modification::NONE); + $this->s = $s; + } + + public function copyTree() { + $clone = clone $this; + $clone->setParent(NULL); + return $clone; + } + + public function getLeftMostChild() { + return $this; + } + + public function getRightMostChild() { + return $this; + } + + public function getMinimalDeletedSet($id) { + if ($this->getModification()->getType() == Modification::REMOVED + && $this->getModification()->getID() == $id){ + return array($this); + } + return array(); + } + + public function getModification() { + return $this->modification; + } + + public function getText() { + return $this->s; + } + + public function isSameText($other) { + if (is_null($other) || ! $other instanceof TextNode){ + return false; + } + return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText()); + } + + public function setModification(Modification $m) { + //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType())); + $this->modification = $m; + } + + public function toString() { + return $this->getText(); + } + + public static function toDiffLine(TextNode $node){ + return str_replace('\n', ' ',$node->getText()); + } +} + +class WhiteSpaceNode extends TextNode { + + function __construct($parent, $s, Node $like = NULL) { + parent::__construct($parent, $s); + if(!is_null($like) && $like instanceof TextNode){ + $newModification = clone $like->getModification(); + $newModification->setFirstOfID(false); + $this->setModification($newModification); + } + } + + public static function isWhiteSpace($c) { + switch ($c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + default: + return false; + } + } +} + +/** + * Represents the root of a HTML document. + */ +class BodyNode extends TagNode { + + function __construct() { + parent::__construct(NULL, 'body', array()); + } + + public function copyTree() { + $newThis = new BodyNode(); + foreach ($this->children as $child) { + $newChild = $child->copyTree(); + $newChild->setParent($newThis); + $newThis->addChild($newChild); + } + return $newThis; + } + + public function getMinimalDeletedSet($id) { + $nodes = array(); + foreach ($this->children as $child) { + $childrenChildren = $child->getMinimalDeletedSet($id); + $nodes = array_merge($nodes, $childrenChildren); + } + return $nodes; + } + +} + +/** + * Represents an image in HTML. Even though images do not contain any text they + * are independent visible objects on the page. They are logically a TextNode. + */ +class ImageNode extends TextNode { + + private $attributes; + + function __construct(TagNode $parent, /*array*/ $attrs) { + if(!array_key_exists('src',$attrs)){ + //wfDebug('Image without a source:'); + foreach($attrs as $key => $value){ + //wfDebug("$key = $value"); + } + parent::__construct($parent, ''); + }else{ + parent::__construct($parent, '' . strtolower($attrs['src']) . ''); + } + $this->attributes = $attrs; + } + + public function isSameText($other) { + if (is_null($other) || ! $other instanceof ImageNode) + return false; + return $this->getText() === $other->getText(); + } + + public function getAttributes() { + return $this->attributes; + } + +} + +/** + * When detecting the last common parent of two nodes, all results are stored as + * a LastCommonParentResult. + */ +class LastCommonParentResult { + + // Parent + private $parent; + + public function getLastCommonParent() { + return $this->parent; + } + + public function setLastCommonParent(TagNode $parent) { + $this->parent = $parent; + } + + // Splitting + private $splittingNeeded = false; + + public function isSplittingNeeded() { + return $this->splittingNeeded; + } + + public function setSplittingNeeded() { + $this->splittingNeeded = true; + } + + // Depth + private $lastCommonParentDepth = -1; + + public function getLastCommonParentDepth() { + return $this->lastCommonParentDepth; + } + + public function setLastCommonParentDepth($depth) { + $this->lastCommonParentDepth = $depth; + } + + // Index + private $indexInLastCommonParent = -1; + + public function getIndexInLastCommonParent() { + return $this->indexInLastCommonParent; + } + + public function setIndexInLastCommonParent($index) { + $this->indexInLastCommonParent = $index; + } +} + +class Modification{ + + const NONE = 1; + const REMOVED = 2; + const ADDED = 4; + const CHANGED = 8; + + private $type; + + private $id = -1; + + private $prevMod; + + private $nextMod; + + private $firstOfID = false; + + function __construct($type) { + $this->type = $type; + } + + public function copy() { + $newM = new Modification($this->getType()); + $newM->setID($this->getID()); + $newM->setChanges($this->getChanges()); + $newM->setFirstOfID($this->isFirstOfID()); + $newM->setNext($this->getNext()); + $newM->setPrevious($this->getPrevious()); + return $newM; + } + + public function getType() { + return $this->type; + } + + public function setID($id) { + $this->id = $id; + } + + public function getID() { + return $this->id; + } + + public function setPrevious($m) { + $this->prevMod = $m; + } + + public function getPrevious() { + return $this->prevMod; + } + + public function setNext($m) { + $this->nextMod = $m; + } + + public function getNext() { + return $this->nextMod; + } + + private $changes; + + public function setChanges($changes) { + $this->changes = $changes; + } + + public function getChanges() { + return $this->changes; + } + + public function isFirstOfID() { + return $this->firstOfID; + } + + public function setFirstOfID($firstOfID) { + $this->firstOfID = $firstOfID; + } + + public static function typeToString($type){ + switch($type){ + case self::NONE: return 'none'; + case self::REMOVED: return 'removed'; + case self::ADDED: return 'added'; + case self::CHANGED: return 'changed'; + } + } +} + +class DomTreeBuilder { + + private $textNodes = array(); + + private $bodyNode; + + private $currentParent; + + private $newWord = ""; + + protected $bodyStarted = false; + + protected $bodyEnded = false; + + private $whiteSpaceBeforeThis = false; + + private $lastSibling; + + function __construct(){ + $this->bodyNode = $this->currentParent = new BodyNode(); + } + + public function getBodyNode() { + return $this->bodyNode; + } + + public function getTextNodes() { + return $this->textNodes; + } + + /** + * Must be called manually + */ + public function endDocument() { + $this->endWord(); + //wfDebug(sizeof($this->textNodes) . ' text nodes in document.'); + } + + public function startElement($parser, $name, /*array*/ $attributes) { + if(!strcasecmp($name, 'body')==0){ + //wfDebug("Starting $name node."); + $this->endWord(); + + $newTagNode = new TagNode($this->currentParent, $name, $attributes); + $this->currentParent = $newTagNode; + $this->lastSibling = NULL; + if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) { + $newTagNode->setWhiteBefore(true); + } + $this->whiteSpaceBeforeThis = false; + } + } + + public function endElement($parser, $name) { + if(!strcasecmp($name, 'body')==0){ + //wfDebug("Ending $name node."); + if (0 == strcasecmp($name,'img')) { + // Insert a dummy leaf for the image + $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes()); + $img->setWhiteBefore($this->whiteSpaceBeforeThis); + $this->lastSibling = $img; + $this->textNodes[] = $img; + } + $this->endWord(); + if ($this->currentParent->isInline()) { + $this->lastSibling = $this->currentParent; + } else { + $this->lastSibling = NULL; + } + $this->currentParent = $this->currentParent->getParent(); + $this->whiteSpaceBeforeThis = false; + }else{ + $this->endDocument(); + } + } + + public function characters($parser, $data){ + //wfDebug('Parsing '. strlen($data).' characters.'); + $array = str_split($data); + foreach($array as $c) { + if (self::isDelimiter($c)) { + $this->endWord(); + if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre() + && !$this->currentParent->inPre()) { + if (!is_null($this->lastSibling)){ + $this->lastSibling->setWhiteAfter(true); + } + $this->whiteSpaceBeforeThis = true; + } else { + $textNode = new TextNode($this->currentParent, $c); + $textNode->setWhiteBefore($this->whiteSpaceBeforeThis); + $this->whiteSpaceBeforeThis = false; + $this->lastSibling = $textNode; + $this->textNodes[] = $textNode; + } + } else { + $this->newWord .= $c; + } + + } + } + + private function endWord() { + if (strlen($this->newWord) > 0) { + $node = new TextNode($this->currentParent, $this->newWord); + $node->setWhiteBefore($this->whiteSpaceBeforeThis); + $this->whiteSpaceBeforeThis = false; + $this->lastSibling = $node; + $this->textNodes[] = $node; + $this->newWord = ""; + } + } + + public static function isDelimiter($c) { + switch ($c) { + // Basic Delimiters + case '/': + case '.': + case '!': + case ',': + case ';': + case '?': + case '=': + case '\'': + case '"': + // Extra Delimiters + case '[': + case ']': + case '{': + case '}': + case '(': + case ')': + case '&': + case '|': + case '\\': + case '-': + case '_': + case '+': + case '*': + case ':': + return true; + default: + return WhiteSpaceNode::isWhiteSpace($c); + } + } + + public function getDiffLines(){ + return array_map(array('TextNode','toDiffLine'), $this->textNodes); + } +} + +class TextNodeDiffer{ + + private $textNodes; + private $bodyNode; + + private $oldTextNodes; + private $oldBodyNode; + + private $lastModified = array(); + + function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) { + $this->textNodes = $tree->getTextNodes(); + $this->bodyNode = $tree->getBodyNode(); + $this->oldTextNodes = $oldTree->getTextNodes(); + $this->oldBodyNode = $oldTree->getBodyNode(); + } + + public function getBodyNode() { + return $this->bodyNode; + } + + private $newID = 0; + + public function markAsNew($start, $end) { + if ($end <= $start) + return; + + if ($this->whiteAfterLastChangedPart) + $this->textNodes[$start]->setWhiteBefore(false); + + $nextLastModified = array(); + + for ($i = $start; $i < $end; ++$i) { + $mod = new Modification(Modification::ADDED); + $mod->setID($this->newID); + if (sizeof($this->lastModified) > 0) { + $mod->setPrevious($this->lastModified[0]); + if (is_null($this->lastModified[0]->getNext())) { + foreach ($this->lastModified as $lastMod) { + $lastMod->setNext($mod); + } + } + } + $nextLastModified[] = $mod; + $this->textNodes[$i]->setModification($mod); + } + if ($start < $end) { + $this->textNodes[$start]->getModification()->setFirstOfID(true); + } + ++$this->newID; + $this->lastModified = $nextLastModified; + } + + private $changedID = 0; + + private $changedIDUsed = false; + + public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) { + $i = $rightstart; + $j = $leftstart; + + if ($this->changedIDUsed) { + ++$this->changedID; + $this->changedIDUsed = false; + } + + $nextLastModified = array(); + + $changes; + while ($i < $rightend) { + $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree()); + $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree()); + $result = $acthis->getResult($acother); + unset($acthis, $acother); + + $nbLastModified = sizeof($this->lastModified); + if ($result->isChanged()) { + $mod = new Modification(Modification::CHANGED); + + if (!$this->changedIDUsed) { + $mod->setFirstOfID(true); + if (sizeof($nextLastModified) > 0) { + $this->lastModified = $nextLastModified; + $nextLastModified = array(); + } + } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) { + ++$this->changedID; + $mod->setFirstOfID(true); + if (sizeof($nextLastModified) > 0) { + $this->lastModified = $nextLastModified; + $nextLastModified = array(); + } + } + + if ($nbLastModified > 0) { + $mod->setPrevious($this->lastModified[0]); + if (is_null($this->lastModified[0]->getNext())) { + foreach ($this->lastModified as $lastMod) { + $lastMod->setNext($mod); + } + } + } + $nextLastModified[] = $mod; + + $mod->setChanges($result->getChanges()); + $mod->setID($this->changedID); + + $this->textNodes[$i]->setModification($mod); + $this->changes = $result->getChanges(); + $this->changedIDUsed = true; + } else if ($this->changedIDUsed) { + ++$this->changedID; + $this->changedIDUsed = false; + } + ++$i; + ++$j; + } + if (sizeof($nextLastModified) > 0){ + $this->lastModified = $nextLastModified; + } + } + + // used to remove the whitespace between a red and green block + private $whiteAfterLastChangedPart = false; + + private $deletedID = 0; + + public function markAsDeleted($start, $end, $before) { + + if ($end <= $start) + return; + + if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) { + $this->whiteAfterLastChangedPart = true; + } else { + $this->whiteAfterLastChangedPart = false; + } + + $nextLastModified = array(); + + for ($i = $start; $i < $end; ++$i) { + $mod = new Modification(Modification::REMOVED); + $mod->setID($this->deletedID); + if (sizeof($this->lastModified) > 0) { + $mod->setPrevious($this->lastModified[0]); + if (is_null($this->lastModified[0]->getNext())) { + foreach ($this->lastModified as $lastMod) { + $lastMod->setNext($mod); + } + } + } + $nextLastModified[] = $mod; + + // oldTextNodes is used here because we're going to move its deleted + // elements + // to this tree! + $this->oldTextNodes[$i]->setModification($mod); + } + $this->oldTextNodes[$start]->getModification()->setFirstOfID(true); + + $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID); + + //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes)); + + // Set prevLeaf to the leaf after which the old HTML needs to be + // inserted + if ($before > 0){ + $prevLeaf = $this->textNodes[$before - 1]; + } + // Set nextLeaf to the leaf before which the old HTML needs to be + // inserted + if ($before < sizeof($this->textNodes)){ + $nextLeaf = $this->textNodes[$before]; + } + + while (sizeof($deletedNodes) > 0) { + if (isset($prevLeaf)) { + $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]); + } else { + $prevResult = new LastCommonParentResult(); + $prevResult->setLastCommonParent($this->getBodyNode()); + $prevResult->setIndexInLastCommonParent(0); + } + if (isset($nextleaf)) { + $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]); + } else { + $nextResult = new LastCommonParentResult(); + $nextResult->setLastCommonParent($this->getBodyNode()); + $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren()); + } + + if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) { + // We need some metric to choose which way to add-... + if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent() + && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) { + // The difference is not in the parent + $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); + } else { + // The difference is in the parent, so compare them + // now THIS is tricky + $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent()); + $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent()); + + if ($distancePrev <= $distanceNext) { + $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); + } else { + $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1); + } + } + + } + + if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) { + // Inserting at the front + if ($prevResult->isSplittingNeeded()) { + $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true); + } + $prevLeaf = $deletedNodes[0]->copyTree(); + unset($deletedNodes[0]); + $deletedNodes = array_values($deletedNodes); + $prevLeaf->setParent($prevResult->getLastCommonParent()); + $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1); + } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) { + // Inserting at the back + if ($nextResult->isSplittingNeeded()) { + $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false); + if ($splitOccured) { + // The place where to insert is shifted one place to the + // right + $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1); + } + } + $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree(); + unset($deletedNodes[sizeof(deletedNodes) - 1]); + $deletedNodes = array_values($deletedNodes); + $nextLeaf->setParent($nextResult->getLastCommonParent()); + $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent()); + } else + throw new Exception("Uh?"); + } + $this->lastModified = $nextLastModified; + ++$this->deletedID; + } + + public function expandWhiteSpace() { + $this->getBodyNode()->expandWhiteSpace(); + } + + public function lengthNew(){ + return sizeof($this->textNodes); + } + + public function lengthOld(){ + return sizeof($this->oldTextNodes); + } +} + +class HTMLDiffer{ + + private $output; + + function __construct($output){ + $this->output = $output; + } + + function htmlDiff($from, $to){ + // Create an XML parser + $xml_parser = xml_parser_create(''); + + $domfrom = new DomTreeBuilder(); + + // Set the functions to handle opening and closing tags + xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement")); + + // Set the function to handle blocks of character data + xml_set_character_data_handler($xml_parser, array($domfrom,"characters")); + + ; + //wfDebug('Parsing '.strlen($from)." characters worth of HTML"); + if (!xml_parse($xml_parser, ''.Sanitizer::hackDocType().'', FALSE) + || !xml_parse($xml_parser, $from, FALSE) + || !xml_parse($xml_parser, '', TRUE)){ + wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); + } + xml_parser_free($xml_parser); + unset($from); + + $xml_parser = xml_parser_create(''); + + $domto = new DomTreeBuilder(); + + // Set the functions to handle opening and closing tags + xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement")); + + // Set the function to handle blocks of character data + xml_set_character_data_handler($xml_parser, array($domto,"characters")); + + //wfDebug('Parsing '.strlen($to)." characters worth of HTML"); + if (!xml_parse($xml_parser, ''.Sanitizer::hackDocType().'', FALSE) + || !xml_parse($xml_parser, $to, FALSE) + || !xml_parse($xml_parser, '', TRUE)){ + wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); + } + xml_parser_free($xml_parser); + unset($to); + + $diffengine = new _DiffEngine(); + $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines())); + unset($xml_parser,$diffengine); + + $domdiffer = new TextNodeDiffer($domto, $domfrom); + + $currentIndexLeft = 0; + $currentIndexRight = 0; + foreach ($differences as $d) { + if ($d->leftstart > $currentIndexLeft) { + $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart, + $currentIndexRight, $d->rightstart); + } + if ($d->leftlength > 0) { + $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart); + } + $domdiffer->markAsNew($d->rightstart, $d->rightend); + + $currentIndexLeft = $d->leftend; + $currentIndexRight = $d->rightend; + } + if ($currentIndexLeft < $domdiffer->lengthOld()) { + $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew()); + } + + $domdiffer->expandWhiteSpace(); + $output = new HTMLOutput('htmldiff', $this->output); + $output->parse($domdiffer->getBodyNode()); + } + + private function preProcess(/*array*/ $differences){ + $newRanges = array(); + + $nbDifferences = sizeof($differences); + for ($i = 0; $i < $nbDifferences; ++$i) { + $leftStart = $differences[$i]->leftstart; + $leftEnd = $differences[$i]->leftend; + $rightStart = $differences[$i]->rightstart; + $rightEnd = $differences[$i]->rightend; + + $leftLength = $leftEnd - $leftStart; + $rightLength = $rightEnd - $rightStart; + + while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength, + $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) { + $leftEnd = $differences[$i + 1]->leftend; + $rightEnd = $differences[$i + 1]->rightend; + $leftLength = $leftEnd - $leftStart; + $rightLength = $rightEnd - $rightStart; + ++$i; + } + $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd); + } + return $newRanges; + } + + /** + * Heuristic to merge differences for readability. + */ + public static function score($ll, $nll, $rl, $nrl) { + if (($ll == 0 && $nll == 0) + || ($rl == 0 && $nrl == 0)){ + return 0; + } + $numbers = array($ll, $nll, $rl, $nrl); + $d = 0; + foreach ($numbers as $number) { + while ($number > 3) { + $d += 3; + $number -= 3; + $number *= 0.5; + } + $d += $number; + + } + return $d / (1.5 * sizeof($numbers)); + } + +} + +class TextOnlyComparator{ + + public $leafs = array(); + + function _construct(TagNode $tree) { + $this->addRecursive($tree); + $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs); + } + + private function addRecursive(TagNode $tree) { + foreach ($tree->children as $child) { + if ($child instanceof TagNode) { + $this->addRecursive($child); + } else if ($child instanceof TextNode) { + $this->leafs[] = $node; + } + } + } + + public function getMatchRatio(TextOnlyComparator $other) { + $nbOthers = sizeof($other->leafs); + $nbThis = sizeof($this->leafs); + if($nbOthers == 0 || $nbThis == 0){ + return -log(0); + } + + $diffengine = new _DiffEngine(); + $diffengine->diff_local($this->leafs, $other->leafs); + + $distanceThis = array_sum($diffengine->xchanged); + $distanceOther = array_sum($diffengine->ychanged); + + return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis) + / $nbThis) / 2.0; + } +} + +class AncestorComparatorResult { + + private $changed = false; + + private $changes = ""; + + public function isChanged() { + return $this->changed; + } + + public function setChanged($changed) { + $this->changed = $changed; + } + + public function getChanges() { + return $this->changes; + } + + public function setChanges($changes) { + $this->changes = $changes; + } +} + +/** + * A comparator used when calculating the difference in ancestry of two Nodes. + */ +class AncestorComparator{ + + public $ancestors; + public $ancestorsText; + + function __construct(/*array*/ $ancestors) { + $this->ancestors = $ancestors; + $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors); + } + + private $compareTxt = ""; + + public function getCompareTxt() { + return $this->compareTxt; + } + + public function getResult(AncestorComparator $other) { + $result = new AncestorComparatorResult(); + + $diffengine = new _DiffEngine(); + $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText); + + if (sizeof($differences) == 0){ + return $result; + } + $changeTxt = new ChangeTextGenerator($this, $other); + + $result->setChanged(true); + $result->setChanges($changeTxt->getChanged($differences)->toString()); + + return $result; + } +} + +class ChangeTextGenerator { + + private $new; + private $old; + + private $factory; + + function __construct(AncestorComparator $old, AncestorComparator $new) { + $this->new = $new; + $this->old = $old; + $this->factory = new TagToStringFactory(); + } + + public function getChanged(/*array*/ $differences) { + $txt = new ChangeText; + + $rootlistopened = false; + + if (sizeof($differences) > 1) { + $txt->addHtml(''); + } + + return $txt; + + } + + private function addTagOld(ChangeText $txt, TagNode $ancestor) { + $this->factory->create($ancestor)->getRemovedDescription($txt); + } + + private function addTagNew(ChangeText $txt, TagNode $ancestor) { + $this->factory->create($ancestor)->getAddedDescription($txt); + } +} + +class ChangeText { + + private $txt = ""; + + const newLine = "
"; + + public function addText($s) { + $s = $this->clean($s); + $this->txt .= $s; + } + + public function addHtml($s) { + $this->txt .= $s; + } + + public function addNewLine() { + $this->addHtml(self::newLine); + } + + public function toString() { + return $this->txt; + } + + private function clean($s) { + return htmlspecialchars($s); + } +} + +class TagToStringFactory { + + private static $containerTags = array( + 'html' => TRUE, + 'body' => TRUE, + 'p' => TRUE, + 'blockquote' => TRUE, + 'h1' => TRUE, + 'h2' => TRUE, + 'h3' => TRUE, + 'h4' => TRUE, + 'h5' => TRUE, + 'pre' => TRUE, + 'div' => TRUE, + 'ul' => TRUE, + 'ol' => TRUE, + 'li' => TRUE, + 'table' => TRUE, + 'tbody' => TRUE, + 'tr' => TRUE, + 'td' => TRUE, + 'th' => TRUE, + 'br' => TRUE, + 'hr' => TRUE, + 'code' => TRUE, + 'dl' => TRUE, + 'dt' => TRUE, + 'dd' => TRUE, + 'input' => TRUE, + 'form' => TRUE, + 'img' => TRUE, + // in-line tags that can be considered containers not styles + 'span' => TRUE, + 'a' => TRUE + ); + + private static $styleTags = array( + 'i' => TRUE, + 'b' => TRUE, + 'strong' => TRUE, + 'em' => TRUE, + 'font' => TRUE, + 'big' => TRUE, + 'del' => TRUE, + 'tt' => TRUE, + 'sub' => TRUE, + 'sup' => TRUE, + 'strike' => TRUE + ); + + const MOVED = 1; + const STYLE = 2; + const UNKNOWN = 4; + + public function create(TagNode $node) { + $sem = $this->getChangeSemantic($node->getQName()); + if (0 == strcasecmp($node->getQName(),'a')){ + return new AnchorToString($node, $sem); + } + if (0 == strcasecmp($node->getQName(),'img')){ + return new NoContentTagToString($node, $sem); + } + return new TagToString($node, $sem); + } + + protected function getChangeSemantic($qname) { + if (array_key_exists(strtolower($qname),self::$containerTags)){ + return self::MOVED; + } + if (array_key_exists(strtolower($qname),self::$styleTags)){ + return self::STYLE; + } + return self::UNKNOWN; + } +} + +class TagToString { + + protected $node; + + protected $sem; + + function __construct(TagNode $node, $sem) { + $this->node = $node; + $this->sem = $sem; + } + + public function getDescription() { + return $this->getString('diff-' . $this->node->getQName()); + } + + public function getRemovedDescription(ChangeText $txt) { + + if ($this->sem == TagToStringFactory::MOVED) { + $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' '); + $txt->addHtml(''); + $txt->addText(strtolower($this->getDescription())); + $txt->addHtml(''); + } else if ($this->sem == TagToStringFactory::STYLE) { + $txt->addHtml(''); + $txt->addText($this->getDescription()); + $txt->addHtml(''); + $txt->addText(' ' . strtolower($this->getStyleRemoved())); + } else { + $txt->addHtml(''); + $txt->addText($this->getDescription()); + $txt->addHtml(''); + $txt->addText(' ' . strtolower($this->getRemoved())); + } + $this->addAttributes($txt, $this->node->getAttributes()); + $txt->addText('.'); + } + + public function getAddedDescription(ChangeText $txt) { + + if ($this->sem == TagToStringFactory::MOVED) { + $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' '); + $txt->addHtml(''); + $txt->addText(strtolower($this->getDescription())); + $txt->addHtml(''); + } else if ($this->sem == TagToStringFactory::STYLE) { + $txt->addHtml(''); + $txt->addText($this->getDescription()); + $txt->addHtml(''); + $txt->addText(' ' . strtolower($this->getStyleAdded())); + } else { + $txt->addHtml(''); + $txt->addText($this->getDescription()); + $txt->addHtml(''); + $txt->addText(' ' . strtolower($this->getAdded())); + } + $this->addAttributes($txt, $this->node->getAttributes()); + $txt->addText('.'); + } + + protected function getMovedTo() { + return $this->getString('diff-movedto'); + } + + protected function getStyleAdded() { + return $this->getString('diff-styleadded'); + } + + protected function getAdded() { + return $this->getString('diff-added'); + } + + protected function getMovedOutOf() { + return $this->getString('diff-movedoutof'); + } + + protected function getStyleRemoved() { + return $this->getString('diff-styleremoved'); + } + + protected function getRemoved() { + return $this->getString('diff-removed'); + } + + protected function addAttributes(ChangeText $txt, array $attributes) { + if (sizeof($attributes) < 1) + return; + + $keys = array_keys($attributes); + + $txt->addText(' ' . strtolower($this->getWith()) . ' ' + . $this->translateArgument($keys[0]) . ' ' + . $attributes[$keys[0]]); + for ($i = 1; $i < sizeof($attributes) - 1; $i++) { + $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' ' + . $attributes[$keys[$i]]); + } + if (sizeof($attributes) > 1) { + $txt->addText(' ' + . strtolower($this->getAnd()) + . ' ' + . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' ' + . $attributes[$keys[sizeof($attributes) - 1]]); + } + } + + private function getAnd() { + return $this->getString('diff-and'); + } + + private function getWith() { + return $this->getString('diff-with'); + } + + protected function translateArgument($name) { + if (0 == strcasecmp($name,'src')) + return strtolower($this->getSource()); + if (0 == strcasecmp($name,'width')) + return strtolower($this->getWidth()); + if (0 == strcasecmp($name,'height')) + return strtolower($this->getHeight()); + return $name; + } + + private function getHeight() { + return $this->getString('diff-height'); + } + + private function getWidth() { + return $this->getString('diff-width'); + } + + protected function getSource() { + return $this->getString('diff-source'); + } + + protected function getArticle() { + return $this->getString('diff-' . $this->node->getQName() . '-article'); + } + + public static $bundle = array( + 'diff-movedto' => 'Moved to', + 'diff-styleadded' => 'Style added', + 'diff-added' => 'Added', + 'diff-changedto' => 'Changed to', + 'diff-movedoutof' => 'Moved out of', + 'diff-styleremoved' => 'Style removed', + 'diff-removed' => 'Removed', + 'diff-changedfrom' => 'Changed from', + 'diff-source' => 'Source', + 'diff-withdestination' => 'With destination', + 'diff-and' => 'And', + 'diff-with' => 'With', + 'diff-width' => 'Width', + 'diff-height' => 'Height', + 'diff-html-article' => 'A', + 'diff-html' => 'Html page', + 'diff-body-article' => 'A', + 'diff-body' => 'Html document', + 'diff-p-article' => 'A', + 'diff-p' => 'Paragraph', + 'diff-blockquote-article' => 'A', + 'diff-blockquote' => 'Quote', + 'diff-h1-article' => 'A', + 'diff-h1' => 'Heading (level 1)', + 'diff-h2-article' => 'A', + 'diff-h2' => 'Heading (level 2)', + 'diff-h3-article' => 'A', + 'diff-h3' => 'Heading (level 3)', + 'diff-h4-article' => 'A', + 'diff-h4' => 'Heading (level 4)', + 'diff-h5-article' => 'A', + 'diff-h5' => 'Heading (level 5)', + 'diff-pre-article' => 'A', + 'diff-pre' => 'Preformatted block', + 'diff-div-article' => 'A', + 'diff-div' => 'Division', + 'diff-ul-article' => 'An', + 'diff-ul' => 'Unordered list', + 'diff-ol-article' => 'An', + 'diff-ol' => 'Ordered list', + 'diff-li-article' => 'A', + 'diff-li' => 'List item', + 'diff-table-article' => 'A', + 'diff-table' => 'Table', + 'diff-tbody-article' => 'A', + 'diff-tbody' => "Table's content", + 'diff-tr-article' => 'A', + 'diff-tr' => 'Row', + 'diff-td-article' => 'A', + 'diff-td' => 'Cell', + 'diff-th-article' => 'A', + 'diff-th' => 'Header', + 'diff-br-article' => 'A', + 'diff-br' => 'Break', + 'diff-hr-article' => 'A', + 'diff-hr' => 'Horizontal rule', + 'diff-code-article' => 'A', + 'diff-code' => 'Computer code block', + 'diff-dl-article' => 'A', + 'diff-dl' => 'Definition list', + 'diff-dt-article' => 'A', + 'diff-dt' => 'Definition term', + 'diff-dd-article' => 'A', + 'diff-dd' => 'Definition', + 'diff-input-article' => 'An', + 'diff-input' => 'Input', + 'diff-form-article' => 'A', + 'diff-form' => 'Form', + 'diff-img-article' => 'An', + 'diff-img' => 'Image', + 'diff-span-article' => 'A', + 'diff-span' => 'Span', + 'diff-a-article' => 'A', + 'diff-a' => 'Link', + 'diff-i' => 'Italics', + 'diff-b' => 'Bold', + 'diff-strong' => 'Strong', + 'diff-em' => 'Emphasis', + 'diff-font' => 'Font', + 'diff-big' => 'Big', + 'diff-del' => 'Deleted', + 'diff-tt' => 'Fixed width', + 'diff-sub' => 'Subscript', + 'diff-sup' => 'Superscript', + 'diff-strike' => 'Strikethrough' + ); + + public function getString($key) { + return self::$bundle[$key]; + } +} + +class NoContentTagToString extends TagToString { + + function __construct(TagNode $node, $sem) { + parent::__construct($node, $sem); + } + + public function getAddedDescription(ChangeText $txt) { + $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' '); + $txt.addHtml(''); + $txt.addText(strtolower($this->getDescription())); + $txt.addHtml(''); + + $this->addAttributes($txt, $this->node->getAttributes()); + $txt.addText('.'); + } + + private function getChangedTo() { + return $this->getString('diff-changedto'); + } + + public function getRemovedDescription(ChangeText $txt) { + $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' '); + $txt.addHtml(''); + $txt.addText(strtolower($this->getDescription())); + $txt.addHtml(''); + + $this->addAttributes($txt, $this->node->getAttributes()); + $txt.addText('.'); + } + + private function getChangedFrom() { + return $this->getString('diff-changedfrom'); + } +} + +class AnchorToString extends TagToString { + + function __construct(TagNode $node, $sem) { + parent::__construct($node, $sem); + } + + protected function addAttributes(ChangeText $txt, array $attributes) { + if (array_key_exists('href',$attributes)) { + $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']); + unset($attributes['href']); + } + parent::addAttributes($txt, $attributes); + } + + private function getWithDestination() { + return $this->getString('diff-withdestination'); + } + +} + +/** + * Takes a branch root and creates an HTML file for it. + */ +class HTMLOutput{ + + private $prefix; + private $handler; + + function __construct($prefix, $handler) { + $this->prefix = $prefix; + $this->handler = $handler; + } + + public function parse(TagNode $node) { + + if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) { + $this->handler->startElement($node->getQName(), $node->getAttributes()); + } + + $newStarted = false; + $remStarted = false; + $changeStarted = false; + $changeTXT = ''; + + foreach ($node->children as $child) { + if ($child instanceof TagNode) { + if ($newStarted) { + $this->handler->endElement('span'); + $newStarted = false; + } else if ($changeStarted) { + $this->handler->endElement('span'); + $changeStarted = false; + } else if ($remStarted) { + $this->handler->endElement('span'); + $remStarted = false; + } + $this->parse($child); + } else if ($child instanceof TextNode) { + $mod = $child->getModification(); + + if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) { + $this->handler->endElement('span'); + $newStarted = false; + } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) { + $this->handler->endElement('span'); + $changeStarted = false; + } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) { + $this->handler->endElement('span'); + $remStarted = false; + } + + // no else because a removed part can just be closed and a new + // part can start + if (!$newStarted && $mod->getType() == Modification::ADDED) { + $attrs = array('class'=>'diff-html-added'); + if ($mod->isFirstOfID()) { + $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID(); + } + $this->addAttributes($mod, $attrs); + $attrs['onclick'] = 'return tipA(constructToolTipA(this));'; + $this->handler->startElement('span', $attrs); + $newStarted = true; + } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) { + $attrs = array('class'=>'diff-html-changed'); + if ($mod->isFirstOfID()) { + $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID(); + } + $this->addAttributes($mod, $attrs); + $attrs['onclick'] = 'return tipC(constructToolTipC(this));'; + $this->handler->startElement('span', $attrs); + + //tooltip + $this->handler->startElement('span', array('class'=>'tip')); + $this->handler->characters($mod->getChanges()); + $this->handler->endElement('span'); + + $changeStarted = true; + $changeTXT = $mod->getChanges(); + } else if (!$remStarted && $mod->getType() == Modification::REMOVED) { + $attrs = array('class'=>'diff-html-removed'); + if ($mod->isFirstOfID()) { + $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID(); + } + $this->addAttributes($mod, $attrs); + $attrs['onclick'] = 'return tipR(constructToolTipR(this));'; + $this->handler->startElement('span', $attrs); + $remStarted = true; + } + + $chars = $child->getText(); + + if ($child instanceof ImageNode) { + $this->writeImage($child); + } else { + $this->handler->characters($chars); + } + + } + } + + if ($newStarted) { + $this->handler->endElement('span'); + $newStarted = false; + } else if ($changeStarted) { + $this->handler->endElement('span'); + $changeStarted = false; + } else if ($remStarted) { + $this->handler->endElement('span'); + $remStarted = false; + } + + if (0 != strcasecmp($node->getQName(),'img') + && 0 != strcasecmp($node->getQName(),'body')) + $this->handler->endElement($node->getQName()); + } + + private function writeImage(ImageNode $imgNode){ + $attrs = $imgNode->getAttributes(); + if ($imgNode->getModification()->getType() == Modification::REMOVED) + $attrs['changeType']='diff-removed-image'; + else if ($imgNode->getModification()->getType() == Modification::ADDED) + $attrs['changeType'] = 'diff-added-image'; + $attrs['onload'] = 'updateOverlays()'; + $attrs['onError'] = 'updateOverlays()'; + $attrs['onAbort'] = 'updateOverlays()'; + $this->handler->startElement('img', $attrs); + $this->handler->endElement('img'); + } + + private function addAttributes(Modification $mod, /*array*/ &$attrs) { + if (is_null($mod->getPrevious())) { + $previous = 'first-' . $this->prefix; + } else { + $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-' + . $mod->getPrevious()->getID(); + } + $attrs['previous'] = $previous; + + $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID(); + $attrs['changeId'] = $changeId; + + if (is_null($mod->getNext())) { + $next = 'last-' . $this->prefix; + } else { + $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-' + . $mod->getNext()->getID(); + } + $attrs['next'] = $next; + } +} + +class EchoingContentHandler{ + + function startElement($qname, /*array*/ $arguments){ + echo '<'.$qname; + foreach($arguments as $key => $value){ + echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"'; + } + echo '>'; + } + + function endElement($qname){ + echo ''; + } + + function characters($chars){ + echo $chars; + } + +} + +class DelegatingContentHandler{ + + private $delegate; + + function __construct($delegate){ + $this->delegate=$delegate; + } + + function startElement($qname, /*array*/ $arguments){ + $this->delegate->addHtml('<'.$qname) ; + foreach($arguments as $key => $value){ + $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"'); + } + $this->delegate->addHtml('>'); + } + + function endElement($qname){ + $this->delegate->addHtml(''); + } + + function characters($chars){ + $this->delegate->addHtml( $chars ); + } + +} diff --git a/skins/common/diff.css b/skins/common/diff.css index b262222a1b..d2075ba89b 100644 --- a/skins/common/diff.css +++ b/skins/common/diff.css @@ -74,3 +74,66 @@ table.diff td div { table: */ /* overflow: visible; */ } + +/* + * Styles for the HTML Diff + */ +span.diff-html-added { + font-size: 100%; + background-color: #20ff20 +} + +span.diff-html-removed { + font-size: 100%; + text-decoration: line-through; + background-color: #ff2020 +} + +span.diff-html-changed { + background: url(images/diffunderline.gif) bottom repeat-x; + *background-color: #c6c6fd; /* light blue */ +} + +span.diff-html-added img{ + border: 5px solid #ccffcc; +} + +span.diff-html-removed img{ + border: 5px solid #fdc6c6; +} + +span.diff-html-changed img{ + border: 5px dotted #000099; + +} + +span.diff-html-changed { + position: relative; /* this is key */ + cursor: help; +} + +span.diff-html-changed span.tip { + display: none; /* so is this */ +} + +/* tooltip will display on :hover event */ + +span.diff-html-changed:hover span.tip { + display: block; + z-index: 95; + position: absolute; + top: 2.5em; + left: 0; + width: auto; + line-height: 1.2em; + padding: 3px 7px 4px 6px; + border: 1px solid #336; + background-color: #f7f7ee; + font-family: arial, helvetica, sans-serif; + font-size: 10px; + font-weight: normal; + color: #000; + text-align: left; +} + + diff --git a/skins/common/diff.js b/skins/common/diff.js index e80a895c0b..3362f48108 100644 --- a/skins/common/diff.js +++ b/skins/common/diff.js @@ -17,4 +17,16 @@ if (navigator && navigator.product == "Gecko" && navigator.productSub < "2002113 lastSheet.insertRule( "table.diff td div { overflow: visible; }", lastSheet.cssRules.length); -} \ No newline at end of file +} + +function tipA(content){ + return false; +} + +function tipR(content){ + return false; +} + +function tipC(content){ + return false; +} diff --git a/skins/common/images/diffunderline.gif b/skins/common/images/diffunderline.gif new file mode 100644 index 0000000000000000000000000000000000000000..e062c560de82f9be73b048ee9aacf2afdb15fdd9 GIT binary patch literal 52 zcmZ?wbhEHbWMN=tn8?8J|NsBn|Nk>E%w%9-Q2fclBEs;WK?lfY0Le2jv9_oxrn53w F0|1fU43Gc- literal 0 HcmV?d00001 -- 2.20.1