*/
/**
- * Implementation of the Diff algorithm.
- *
- * DO NOT USE ON PRODUCTION SYSTEMS
- *
- * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
- * and extended with my own ideas.
+ * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which
+ * in turn is based on Myers' "An O(ND) difference algorithm and its variations"
+ * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s
+ * "An O(NP) Sequence Comparison Algorithm").
*
- * @return array($from_removed, $to_added)
- * TRUE if the token was removed or added.
+ * This implementation supports an upper bound on the excution time.
+ *
+ * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space
*
* @author Guy Van den Broeck
+ *
*/
-function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
- wfProfileIn( __METHOD__ );
-
- $m = sizeof($from);
- $n = sizeof($to);
-
- $result_from = array_fill(0,$m,TRUE);
- $result_to = array_fill(0,$n,TRUE);
-
- //reduce the complexity for the next step (intentionally done twice)
- //remove common tokens at the start
- $i=0;
- while($i<$m && $i<$n && $from[$i]===$to[$i]){
- $result_from[$i] = $result_to[$i] = FALSE;
- unset($from[$i],$to[$i]);
- ++$i;
- }
+class WikiDiff3{
- //remove common tokens at the end
- $j=1;
- while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
- $result_from[$m-$j] = $result_to[$n-$j] = FALSE;
- unset($from[$m-$j],$to[$n-$j]);
- ++$j;
+ //Input variables
+ private $from;
+ private $to;
+ private $m;
+ private $n;
+
+ private $tooLong;
+ private $powLimit;
+
+ //State variables
+ private $maxDifferences;
+
+ //Output variables
+ public $length;
+ public $added;
+ public $removed;
+ public $heuristicUsed;
+
+ function __construct($tooLong = 2000000, $powLimit = 1.45){
+ $this->tooLong = $tooLong;
+ $this->powLimit = $powLimit;
}
- $newFrom = $newFromIndex = $newTo = $newToIndex = array();
+ public function diff(/*array*/ $from, /*array*/ $to){
+ $m = sizeof($from);
+ $n = sizeof($to);
+
+ $this->heuristicUsed = FALSE;
+
+ $result_from = $m>0?array_fill(0,$m,true):array();
+ $result_to = $n>0?array_fill(0,$n,true):array();
+
+ //reduce the complexity for the next step (intentionally done twice)
+ //remove common tokens at the start
+ $i=0;
+ while($i<$m && $i<$n && $from[$i]===$to[$i]){
+ $result_from[$i] = $result_to[$i] = false;
+ unset($from[$i],$to[$i]);
+ ++$i;
+ }
- //remove tokens not in both sequences
- $shared = array_fill_keys($from,FALSE);
- foreach($to as $index => $el){
- if(array_key_exists($el,$shared)){
- //keep it
- $shared[$el] = TRUE;
- $newTo[] = $el;
- $newToIndex[] = $index;
+ //remove common tokens at the end
+ $j=1;
+ while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
+ $result_from[$m-$j] = $result_to[$n-$j] = false;
+ unset($from[$m-$j],$to[$n-$j]);
+ ++$j;
}
- }
- foreach($from as $index => $el){
- if($shared[$el]){
- //keep it
- $newFrom[] = $el;
- $newFromIndex[] = $index;
+
+ $newFrom = $newFromIndex = $newTo = $newToIndex = array();
+
+ //remove tokens not in both sequences
+ $shared = array_fill_keys($from,false);
+ foreach($to as $index => $el){
+ if(array_key_exists($el,$shared)){
+ //keep it
+ $this->to[] = $el;
+ $shared[$el] = true;
+ $newToIndex[] = $index;
+ }
+ }
+ foreach($from as $index => $el){
+ if($shared[$el]){
+ //keep it
+ $this->from[] = $el;
+ $newFromIndex[] = $index;
+ }
}
- }
- unset($from, $to, $shared);
+ unset($shared);
- $m2 = sizeof($newFrom);
- $n2 = sizeof($newTo);
- $offsetx = $offsety = 0;
- $from_inLcs = new InLcs($m2);
- $to_inLcs = new InLcs($n2);
+ $this->m = sizeof($this->from);
+ $this->n = sizeof($this->to);
+ $this->removed = $this->m>0?array_fill(0,$this->m,true):array();
+ $this->added = $this->n>0?array_fill(0,$this->n,true):array();
- wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
+ $this->longestCommonSubsequence();
- foreach($from_inLcs->inLcs as $key => $in){
- if($in){
- $result_from[$newFromIndex[$key]] = FALSE;
+ $this->m = $m;
+ $this->n = $n;
+ $this->length += $i+$j-1;
+
+ foreach($this->removed as $key => $removed){
+ if(!$removed){
+ $result_from[$newFromIndex[$key]] = false;
+ }
}
- }
- foreach($to_inLcs->inLcs as $key => $in){
- if($in){
- $result_to[$newToIndex[$key]] = FALSE;
+ foreach($this->added as $key => $added){
+ if(!$added){
+ $result_to[$newToIndex[$key]] = false;
+ }
}
+ unset($this->added, $this->removed);
+ return array($result_from, $result_to);
}
-
- wfProfileOut( __METHOD__ );
- return array($result_from, $result_to);
-}
-function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
- if($bestKnownLcs==0 || $m==0 || $n==0){
- return;
- }
-
- if($m>$n){
- return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
- }
+ public function longestCommonSubsequence() {
+ if ($this->m == 0 || $this->n == 0) {
+ $this->length = 0;
+ return;
+ }
- $a_inLcs_sym = &$a_inLcs->inLcs;
- $b_inLcs_sym = &$b_inLcs->inLcs;
-
- //remove common tokens at the start
- while($m>0 && $n>0 && $a[0]===$b[0]){
- $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
- ++$offsetx; ++$offsety;
- --$m; --$n;
- --$bestKnownLcs;
- array_shift($a);
- array_shift($b);
- }
+ $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
+ if ($this->m * $this->n > $this->tooLong) {
+ // limit complexity to D^POW_LIMIT for long sequences
+ $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
+ wfDebug("Limiting max number of differences to $this->maxDifferences");
+ }
- //remove common tokens at the end
- while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
- $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
- --$m; --$n;
- --$bestKnownLcs;
- array_pop($a);
- array_pop($b);
- }
+ /*
+ * The common prefixes and suffixes are always part of some LCS, include
+ * them now to reduce our search space
+ */
+ $max = min($this->m, $this->n);
+ for ($forwardBound = 0; $forwardBound < $max
+ && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
+ $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
+ }
- if($bestKnownLcs==0 || $m==0 || $n==0){
- return;
- }
- if($m>$n){
- return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
+ $backBoundL1 = $this->m - 1;
+ $backBoundL2 = $this->n - 1;
+
+ while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
+ && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
+ $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
+ }
+
+ $temp = array_fill(0,$this->m + $this->n + 1, 0);
+ $V = array($temp,$temp);
+ $snake = array(0,0,0);
+
+ $this->length = $forwardBound + $this->m - $backBoundL1 - 1
+ + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
+ $V, $snake);
}
- $delta = $n-$m;
- $delta_plus_1 = $delta+1;
- $delta_min_1 = $delta-1;
-
- $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
- $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
- $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
- $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
- $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
-
- if($boundRunningTime){
- $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
- if($maxp_before_bound>=$m){
- $boundRunningTime = false;
- unset($maxp_before_bound);
+ private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
+ // check that both sequences are non-empty
+ if ($bottoml1 > $topl1 || $bottoml2 > $topl2) {
+ return 0;
}
+
+ $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake);
+
+ // need to store these so we don't lose them when they're overwritten by
+ // the recursion
+ $len = $snake[2];
+ $startx = $snake[0];
+ $starty = $snake[1];
+
+ // the middle snake is part of the LCS, store it
+ for ($i = 0; $i < $len; ++$i) {
+ $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
+ }
+
+ if ($d > 1) {
+ return $len
+ + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake)
+ + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake);
+ } else if ($d == 1) {
+ /*
+ * In this case the sequences differ by exactly 1 line. We have
+ * already saved all the lines after the difference in the for loop
+ * above, now we need to save all the lines before the difference.
+ */
+ $max = min($startx - $bottoml1, $starty - $bottoml2);
+ for ($i = 0; $i < $max; ++$i) {
+ $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false;
+ }
+ return $max + $len;
+ }
+ return $len;
}
- $p=-1;
- $m_min_1 = $m-1;
- $maxp=$m_min_1-$bestLcsLength;
-
- while($p<$maxp){
-
- if($boundRunningTime && $p>$maxp_before_bound){
- // bound the running time by stopping early
- if($bestLcsLength>=0){
- // accept the best LCS thusfar
- break;
- }else{
- $bestLcsProgressForw = $bestkForw = 0;
- foreach($lcsSizeForw as $k => $localLcsProgress){
- if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
- $bestLcsProgressForw = $localLcsProgress;
- $bestkForw = $k;
+ private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) {
+ $from = &$this->from;
+ $to = &$this->to;
+ $V0 = &$V[0];
+ $V1 = &$V[1];
+ $snake0 = &$snake[0];
+ $snake1 = &$snake[1];
+ $snake2 = &$snake[2];
+ $bottoml1_min_1 = $bottoml1-1;
+ $bottoml2_min_1 = $bottoml2-1;
+ $N = $topl1 - $bottoml1_min_1;
+ $M = $topl2 - $bottoml2_min_1;
+ $delta = $N - $M;
+ $maxabsx = $N+$bottoml1;
+ $maxabsy = $M+$bottoml2;
+ $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
+
+ //value_to_add_forward: a 0 or 1 that we add to the start
+ // offset
+ // to make it odd/even
+ if (($M & 1) == 1) {
+ $value_to_add_forward = 1;
+ } else {
+ $value_to_add_forward = 0;
+ }
+
+ if (($N & 1) == 1) {
+ $value_to_add_backward = 1;
+ } else {
+ $value_to_add_backward = 0;
+ }
+
+ $start_forward = -$M;
+ $end_forward = $N;
+ $start_backward = -$N;
+ $end_backward = $M;
+
+ $limit_min_1 = $limit-1;
+ $limit_plus_1 = $limit+1;
+
+ $V0[$limit_plus_1] = 0;
+ $V1[$limit_min_1] = $N;
+ $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
+
+ if (($delta & 1) == 1) {
+ for ($d = 0; $d <= $limit; ++$d) {
+ $start_diag = max($value_to_add_forward + $start_forward, -$d);
+ $end_diag = min($end_forward, $d);
+ $value_to_add_forward = 1 - $value_to_add_forward;
+
+ // compute forward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == -$d
+ || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+ $x = $V0[$limit_plus_1 + $k];
+ } else {
+ $x = $V0[$limit_min_1 + $k] + 1;
}
- }
- $bestLcsProgressBack = $bestkBack = 0;
- foreach($lcsSizeBack as $k => $localLcsProgress){
- if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
- $bestLcsProgressBack = $localLcsProgress;
- $bestkBack = $k;
+
+ $absx = $snake0 = $x + $bottoml1;
+ $absy = $snake1 = $x - $k + $bottoml2;
+
+ while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+ ++$absx;
+ ++$absy;
}
- }
- if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
- // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
- }else{
- // lets do this
- $topSnakeStart = $snakeBeginForw[$bestkForw];
- $topSnakeEnd = $snakeEndForw[$bestkForw];
- $topSnakek = $snakekForw[$bestkForw];
- $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
-
- $bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
- $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
- $bottomSnakek = $snakekBack[$bestkBack];
- $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
-
- if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
- // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
- // also diff the middle now
- if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
- // do the middle diff between the snakes
- wfDebug(" Limiting diff run time from both sides, middle between snakes\n");
- $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
- $n_t = $bottomSnakeStart-$topSnakeEnd;
- $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
- $b_t = array_slice($b, $topSnakeEnd, $n_t);
- $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
- $offsety_t = $offsety+$topSnakeEnd;
- }else{
- // the snake is too early in the sequence, do the middle diff between endpoints of progress
- wfDebug(" Limiting diff run time from both sides, middle between endpoints\n");
- $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
- $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
- $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
- $b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
- $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
- $offsety_t = $offsety+$fpForw[$bestkForw];
- }
- wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
- }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
- // the bottom snake is more interesting
- wfDebug("Limiting diff run time from bottom side\n");
- $topSnakeStart = $bottomSnakeStart;
- $topSnakeEnd = $bottomSnakeEnd;
- $topSnakek = $bottomSnakek;
- $bestLcsLengthTop = $topSnakeEnd-$topSnakek;
- $bottomSnakeStart = $bottomSnakeEnd;
- }else{
- // the top snake is more interesting
- wfDebug(" Limiting diff run time from top side\n");
- $bottomSnakeStart = $topSnakeEnd;
- $bottomSnakeEnd = $topSnakeEnd;
- $bottomSnakek = $topSnakek;
- $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek);
+ $x = $absx-$bottoml1;
+
+ $snake2 = $absx -$snake0;
+ $V0[$limit + $k] = $x;
+ if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
+ && $x >= $V1[$limit + $k - $delta]) {
+ return 2 * $d - 1;
+ }
+
+ // check to see if we can cut down the diagonal range
+ if ($x >= $N && $end_forward > $k - 1) {
+ $end_forward = $k - 1;
+ } else if ($absy-$bottoml2 >= $M) {
+ $start_forward = $k + 1;
+ $value_to_add_forward = 0;
}
- break;
- }
- }
- }
- ++$p;
- $overlap[-$p] = $overlap[$delta+$p] = FALSE;
-
- $min_p_min_1 = -$p-1;
- $delta_plus_1_plus_p = $delta_plus_1+$p;
-
- // forward
- $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
- $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
- $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
-
- $k=-$p;
- do {
- $k_plus_1 = $k+1;
- $k_min_1 = $k-1;
-
- $fpBelow = $fpForw[$k_min_1]+1;
- $fpAbove = $fpForw[$k_plus_1];
- $y = &$fpForw[$k];
- if($fpBelow>$fpAbove){
- $oldy = $y = $fpBelow;
- $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
- $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
- $snakeEndForw[$k] = $snakeEndForw[$k_min_1];
- $snakekForw[$k] = $snakekForw[$k_min_1];
- }else{
- $oldy = $y = $fpAbove;
- $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
- $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
- $snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
- $snakekForw[$k] = $snakekForw[$k_plus_1];
- }
- $x = $y-$k;
- while($x < $m && $y < $n && $a[$x]===$b[$y]){
- ++$x;
- ++$y;
- }
- if($y-$oldy>0){
- $lcsSizeForw[$k] += $y-$oldy;
- $snakeBeginForw[$k] = $oldy;
- $snakeEndForw[$k]= $y;
- $snakekForw[$k] = $k;
- }
- if($y>=$fpBack[$k] && !$overlap[$k]){
- // there is overlap
- $overlap[$k]=TRUE;
- $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
- if($y>$fpBack[$k]+1){
- $snakeoverlap = $y-$fpBack[$k]-1;
- $lcsLength -= $snakeoverlap;
}
- if($lcsLength>$bestLcsLength){
- // a better sequence found!
- $bestLcsLength = $lcsLength;
-
- $topSnakeStart = $snakeBeginForw[$k];
- $topSnakeEnd = $snakeEndForw[$k];
- $topSnakek = $snakekForw[$k];
-
- // aligned snakes bite (\ )
- // ( \ \)
- $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
- $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
- $bottomSnakek = $snakekBack[$k];
-
- if($bottomSnakeEnd<$y){
- $bottomSnakeStart = $y;
- $bottomSnakeEnd = $y;
- $bottomSnakek = $k;
+
+ $start_diag = max($value_to_add_backward + $start_backward, -$d);
+ $end_diag = min($end_backward, $d);
+ $value_to_add_backward = 1 - $value_to_add_backward;
+
+ // compute backward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == $d
+ || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+ $x = $V1[$limit_min_1 + $k];
+ } else {
+ $x = $V1[$limit_plus_1 + $k] - 1;
}
- $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
- $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
- if($bestKnownLcs==$lcsLength){
- $fpForw[$k]=$y;
- break 2;
+ $y = $x - $k - $delta;
+
+ $snake2 = 0;
+ while ($x > 0 && $y > 0
+ && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+ --$x;
+ --$y;
+ ++$snake2;
+ }
+ $V1[$limit + $k] = $x;
+
+ // check to see if we can cut down our diagonal range
+ if ($x <= 0) {
+ $start_backward = $k + 1;
+ $value_to_add_backward = 0;
+ } else if ($y <= 0 && $end_backward > $k - 1) {
+ $end_backward = $k - 1;
}
- $maxp=$m_min_1-$bestLcsLength;
}
}
- if($k<$delta_min_1){
- ++$k;
- }else if($k>$delta){
- --$k;
- }else if($k==$delta_min_1){
- $k = $delta+$p;
- }else{
- break;
- }
- } while(TRUE);
-
- // backward
- $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
- $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
- $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
-
- $k=$delta+$p;
- do {
- $k_plus_1 = $k+1;
- $k_min_1 = $k-1;
-
- $fpBelow = $fpBack[$k_min_1];
- $fpAbove = $fpBack[$k_plus_1]-1;
- $y = &$fpBack[$k];
- if($fpBelow<$fpAbove){
- $oldy = $y = $fpBelow;
- $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
- $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
- $snakeEndBack[$k] = $snakeEndBack[$k_min_1];
- $snakekBack[$k] = $snakekBack[$k_min_1];
- }else{
- $oldy = $y = $fpAbove;
- $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
- $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
- $snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
- $snakekBack[$k] = $snakekBack[$k_plus_1];
- }
- $x = $y-$k;
- while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
- --$x;
- --$y;
- }
- if($oldy-$y>0){
- $lcsSizeBack[$k] += $oldy-$y;
- $snakeBeginBack[$k] = $oldy;
- $snakeEndBack[$k] = $y;
- $snakekBack[$k] = $k;
- }
- if($fpForw[$k]>=$y && !$overlap[$k]){
- // there is overlap
- $overlap[$k]=TRUE;
- $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
- if($fpForw[$k]>$y+1){
- $snakeoverlap = $fpForw[$k]-$y-1;
- $lcsLength -= $snakeoverlap;
+ } else {
+ for ($d = 0; $d <= $limit; ++$d) {
+ $start_diag = max($value_to_add_forward + $start_forward, -$d);
+ $end_diag = min($end_forward, $d);
+ $value_to_add_forward = 1 - $value_to_add_forward;
+
+ // compute forward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == -$d
+ || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+ $x = $V0[$limit_plus_1 + $k];
+ } else {
+ $x = $V0[$limit_min_1 + $k] + 1;
+ }
+
+ $absx = $snake0 = $x + $bottoml1;
+ $absy = $snake1 = $x - $k + $bottoml2;
+
+ while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+ ++$absx;
+ ++$absy;
+ }
+ $x = $absx-$bottoml1;
+ $snake2 = $absx -$snake0;
+ $V0[$limit + $k] = $x;
+
+ // check to see if we can cut down the diagonal range
+ if ($x >= $N && $end_forward > $k - 1) {
+ $end_forward = $k - 1;
+ } else if ($absy-$bottoml2 >= $M) {
+ $start_forward = $k + 1;
+ $value_to_add_forward = 0;
+ }
}
- if($lcsLength>$bestLcsLength){
- // a better sequence found!
- $bestLcsLength = $lcsLength;
-
- $topSnakeStart = $snakeBeginForw[$k];
- $topSnakeEnd = $snakeEndForw[$k];
- $topSnakek = $snakekForw[$k];
-
- // aligned snakes bite (\ )
- // ( \ \)
- $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
- $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
- $bottomSnakek = $snakekBack[$k];
-
- if($bottomSnakeEnd<$fpForw[$k]){
- $bottomSnakeStart = $fpForw[$k];
- $bottomSnakeEnd = $fpForw[$k];
- $bottomSnakek = $k;
+
+ $start_diag = max($value_to_add_backward + $start_backward, -$d);
+ $end_diag = min($end_backward, $d);
+ $value_to_add_backward = 1 - $value_to_add_backward;
+
+ // compute backward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == $d
+ || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+ $x = $V1[$limit_min_1 + $k];
+ } else {
+ $x = $V1[$limit_plus_1 + $k] - 1;
+ }
+
+ $y = $x - $k - $delta;
+
+ $snake2 = 0;
+ while ($x > 0 && $y > 0
+ && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+ --$x;
+ --$y;
+ ++$snake2;
}
+ $V1[$limit + $k] = $x;
- $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
- $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
- if($bestKnownLcs==$lcsLength){
- $fpBack[$k] = $y;
- break 2;
+ if ($k >= -$delta - $d && $k <= $d - $delta
+ && $x <= $V0[$limit + $k + $delta]) {
+ $snake0 = $bottoml1 + $x;
+ $snake1 = $bottoml2 + $y;
+ return 2 * $d;
+ }
+
+ // check to see if we can cut down our diagonal range
+ if ($x <= 0) {
+ $start_backward = $k + 1;
+ $value_to_add_backward = 0;
+ } else if ($y <= 0 && $end_backward > $k - 1) {
+ $end_backward = $k - 1;
}
- $maxp=$m_min_1-$bestLcsLength;
}
}
- if($k>1){
- --$k;
- }else if($k<0){
- ++$k;
- }else if($k==1){
- $k = -$p;
- }else{
- break;
- }
- } while(TRUE);
+ }
+ /*
+ * computing the true LCS is too expensive, instead find the diagonal
+ * with the most progress and pretend a midle snake of length 0 occurs
+ * there.
+ */
+
+ $most_progress = self::findMostProgress($M, $N, $limit, $V);
+
+ $snake0 = $bottoml1 + $most_progress[0];
+ $snake1 = $bottoml2 + $most_progress[1];
+ $snake2 = 0;
+ wfDebug('Computing the LCS is too expensive. Using a heuristic.');
+ $this->heuristicUsed = true;
+ return 5; /*
+ * HACK: since we didn't really finish the LCS computation
+ * we don't really know the length of the SES. We don't do
+ * anything with the result anyway, unless it's <=1. We know
+ * for a fact SES > 1 so 5 is as good a number as any to
+ * return here
+ */
}
- unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
- unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
- unset($overlap);
+ private static function findMostProgress($M, $N, $limit, $V) {
+ $delta = $N - $M;
- // Mark snakes as in LCS
- $maxi = $offsetx+$topSnakeEnd-$topSnakek;
- for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
- $a_inLcs_sym[$i] = TRUE;
- }
- $maxi = $offsety+$topSnakeEnd;
- for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
- $b_inLcs_sym[$i] = TRUE;
- }
- $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
- for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
- $a_inLcs_sym[$i] = TRUE;
- }
- $maxi = $offsety+$bottomSnakeEnd;
- for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
- $b_inLcs_sym[$i] = TRUE;
- }
+ if (($M & 1) == ($limit & 1)) {
+ $forward_start_diag = max(-$M, -$limit);
+ } else {
+ $forward_start_diag = max(1 - $M, -$limit);
+ }
- $m_t = $topSnakeStart-$topSnakek;
- $a_t = array_slice($a, 0, $m_t);
- $b_t = array_slice($b, 0, $topSnakeStart);
+ $forward_end_diag = min($N, $limit);
- $m_b = $m-($bottomSnakeEnd-$bottomSnakek);
- $n_b = $n-$bottomSnakeEnd;
- $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
- $b_b = array_slice($b, $bottomSnakeEnd, $n_b);
+ if (($N & 1) == ($limit & 1)) {
+ $backward_start_diag = max(-$N, -$limit);
+ } else {
+ $backward_start_diag = max(1 - $N, -$limit);
+ }
- wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
+ $backward_end_diag = -min($M, $limit);
- wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
-}
+ $temp = array(0,0,0);
-class InLcs {
- public $inLcs;
+ $max_progress = array_fill(0,ceil(max($forward_end_diag
+ - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp);
+ $num_progress = 0; // the 1st entry is current, it is initialized
+ // with 0s
- function __construct($length){
- $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
- }
+ // first search the forward diagonals
+ for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) {
+ $x = $V[0][$limit + $k];
+ $y = $x - $k;
+ if ($x > $N || $y > $M) {
+ continue;
+ }
+
+ $progress = $x + $y;
+ if ($progress > $max_progress[0][2]) {
+ $num_progress = 0;
+ $max_progress[0][0] = $x;
+ $max_progress[0][1] = $y;
+ $max_progress[0][2] = $progress;
+ } else if ($progress == $max_progress[0][2]) {
+ ++$num_progress;
+ $max_progress[$num_progress][0] = $x;
+ $max_progress[$num_progress][1] = $y;
+ $max_progress[$num_progress][2] = $progress;
+ }
+ }
+
+ $max_progress_forward = true; // initially the maximum
+ // progress is in the forward
+ // direction
- /**
- * Get the length of the Longest Common Subsequence (computed)
- */
- public function getLcsLength(){
- return array_sum($this->inLcs);
+ // now search the backward diagonals
+ for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) {
+ $x = $V[1][$limit + $k];
+ $y = $x - $k - $delta;
+ if ($x < 0 || $y < 0) {
+ continue;
+ }
+
+ $progress = $N - $x + $M - $y;
+ if ($progress > $max_progress[0][2]) {
+ $num_progress = 0;
+ $max_progress_forward = false;
+ $max_progress[0][0] = $x;
+ $max_progress[0][1] = $y;
+ $max_progress[0][2] = $progress;
+ } else if ($progress == $max_progress[0][2] && !$max_progress_forward) {
+ ++$num_progress;
+ $max_progress[$num_progress][0] = $x;
+ $max_progress[$num_progress][1] = $y;
+ $max_progress[$num_progress][2] = $progress;
+ }
+ }
+
+ // return the middle diagonal with maximal progress.
+ return $max_progress[floor($num_progress / 2)];
}
}
+
--- /dev/null
+<?php
+/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * or see http://www.gnu.org/
+ */
+
+
+/**
+ * Any element in the DOM tree of an HTML document.
+ */
+abstract class Node{
+
+ protected $parent;
+
+
+ function __construct($parent){
+ $this->parent = $parent;
+ if(!is_null($parent)){
+ $parent->addChild($this);
+ }
+ }
+
+ public function getParent(){
+ return $this->parent;
+ }
+
+ public function getParentTree(){
+ if(!is_null($this->parent)){
+ $parentTree = $this->parent->getParentTree();
+ $parentTree[] = $this->parent;
+ return $parentTree;
+ }else{
+ return array();
+ }
+ }
+
+ public abstract function getMinimalDeletedSet($id);
+
+ public function detectIgnorableWhiteSpace(){
+ // no op
+ }
+
+ public function getLastCommonParent(Node $other){
+ if(is_null($other))
+ throw new Exception('The given node is NULL');
+
+ $result = new LastCommonParentResult();
+
+ $myParents = $this->getParentTree();
+ $otherParents = $other->getParentTree();
+
+ $i = 1;
+ $isSame = true;
+ while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
+ if (!$myParents[$i]->isSameTag($otherParents[$i])) {
+ $isSame = false;
+ } else {
+ // After the while, the index i-1 must be the last common parent
+ $i++;
+ }
+ }
+
+ $result->setLastCommonParentDepth($i - 1);
+ $result->setLastCommonParent($myParents[$i - 1]);
+
+ if (!$isSame) {
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+ $result->setSplittingNeeded();
+ } else if (sizeof($myParents) < sizeof($otherParents)) {
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+ } else if (sizeof($myParents) > sizeof($otherParents)) {
+ // All tags matched but there are tags left in this tree
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+ $result->setSplittingNeeded();
+ } else {
+ // All tags matched untill the very last one in both trees
+ // or there were no tags besides the BODY
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+ }
+ return $result;
+ }
+
+ public function setParent($parent) {
+ $this->parent = $parent;
+ }
+
+ public abstract function copyTree();
+
+ public function inPre() {
+ $tree = $this->getParentTree();
+ foreach ($tree as $ancestor) {
+ if ($ancestor->isPre()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private $whiteBefore = false;
+
+ private $whiteAfter = false;
+
+ public function isWhiteBefore() {
+ return $this->whiteBefore;
+ }
+
+ public function setWhiteBefore($whiteBefore) {
+ $this->whiteBefore = $whiteBefore;
+ }
+
+ public function isWhiteAfter() {
+ return $this->whiteAfter;
+ }
+
+ public function setWhiteAfter($whiteAfter) {
+ $this->whiteAfter = $whiteAfter;
+ }
+
+ public abstract function getLeftMostChild();
+
+ public abstract function getRightMostChild();
+}
+
+/**
+ * Node that can contain other nodes. Represents an HTML tag.
+ */
+class TagNode extends Node{
+
+ public $children = array();
+
+ protected $qName;
+
+ protected $attributes = array();
+
+ function __construct($parent, $qName, /*array*/ $attributes) {
+ parent::__construct($parent);
+ $this->qName = strtolower($qName);
+ foreach($attributes as $key => $value){
+ $this->attributes[strtolower($key)] = $value;
+ }
+ }
+
+ public function addChild(Node $node, $index=-1) {
+ if ($node->getParent() !== $this)
+ throw new Exception(
+ 'The new child must have this node as a parent.');
+ if($index == -1){
+ $this->children[] = $node;
+ }else{
+ array_splice($this->children,$index,0,array($node));
+ }
+ }
+
+ public function getIndexOf(Node $child) {
+ // don't trust array_search with objects
+ foreach($this->children as $key=>$value){
+ if($value === $child){
+ return $key;
+ }
+ }
+ return NULL;
+ }
+
+ public function getChild($i) {
+ return $this->children[$i];
+ }
+
+ public function getNbChildren() {
+ return count($this->children);
+ }
+
+ public function getQName() {
+ return $this->qName;
+ }
+
+ public function getAttributes() {
+ return $this->attributes;
+ }
+
+ public function isSameTag(TagNode $other) {
+ if (is_null($other))
+ return false;
+ return $this->getOpeningTag() === $other->getOpeningTag();
+ }
+
+ public function getOpeningTag() {
+ $s = '<'.$this->getQName();
+ foreach ($this->attributes as $attribute => $value) {
+ $s .= ' ' . $attribute . '="' . $value . '"';
+ }
+ return $s .= '>';
+ }
+
+ public function getEndTag() {
+ return '</' . $this->getQName() + '>"';
+ }
+
+ public function getMinimalDeletedSet($id) {
+ $nodes = array();
+
+ if ($this->getNbChildren() == 0)
+ return $nodes;
+
+ $hasNotDeletedDescendant = false;
+
+ foreach ($this->children as $child) {
+ $childrenChildren = $child->getMinimalDeletedSet($id);
+ $nodes = array_merge($nodes, $childrenChildren);
+ if (!$hasNotDeletedDescendant
+ && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
+ // This child is not entirely deleted
+ $hasNotDeletedDescendant = true;
+ }
+ }
+ if (!$hasNotDeletedDescendant) {
+ $nodes = array($this);
+ }
+ return $nodes;
+ }
+
+ public function toString() {
+ return $this->getOpeningTag();
+ }
+
+ public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
+ $splitOccured = false;
+ if ($parent !== $this) {
+ $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+ $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+ $part1->setParent($this->getParent());
+ $part2->setParent($this->getParent());
+
+ $i = 0;
+ $nbChildren = $this->getNbChildren();
+ while ($i < $nbChildren && $this->children[$i] !== $split) {
+ $this->children[$i]->setParent($part1);
+ $part1->addChild($this->children[$i]);
+ ++$i;
+ }
+ if ($i < $nbChildren) {
+ if ($includeLeft) {
+ $this->children[$i]->setParent($part1);
+ $part1->addChild($this->children[$i]);
+ } else {
+ $this->children[$i]->setParent($part2);
+ $part2->addChild($this->children[$i]);
+ }
+ ++$i;
+ }
+ while ($i < $nbChildren) {
+ $this->children[$i]->setParent($part2);
+ $part2->addChild($this->children[$i]);
+ ++$i;
+ }
+ $myindexinparent = $this->parent->getIndexOf($this);
+ if ($part1->getNbChildren() > 0)
+ $this->parent->addChild($part1,$myindexinparent);
+
+ if ($part2->getNbChildren() > 0)
+ $this->parent->addChild($part2,$myindexinparent);
+
+ if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
+ $splitOccured = true;
+ }
+
+ $this->parent->removeChild($myindexinparent);
+
+ if ($includeLeft)
+ $this->parent->splitUntill($parent, $part1, $includeLeft);
+ else
+ $this->parent->splitUntill($parent, $part2, $includeLeft);
+ }
+ return $splitOccured;
+
+ }
+
+ private function removeChild($index) {
+ unset($this->children[$index]);
+ $this->children = array_values($this->children);
+ }
+
+ public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
+ 'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
+ 'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
+
+ public static function isBlockLevelName($qName) {
+ return array_key_exists(strtolower($qName),self::$blocks);
+ }
+
+ public static function isBlockLevelNode(Node $node) {
+ if(! $node instanceof TagNode)
+ return false;
+ return self::isBlockLevelName($node->getQName());
+ }
+
+ public function isBlockLevel() {
+ return  self::isBlockLevelNode($this);
+ }
+
+ public static function isInlineName($qName) {
+ return !self::isBlockLevelName($qName);
+ }
+
+ public static function isInlineNode(Node $node) {
+ return !self::isBlockLevelNode($node);
+ }
+
+ public function isInline() {
+ return self::isInlineNode($this);
+ }
+
+ public function copyTree() {
+ $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+ $newThis->setWhiteBefore($this->isWhiteBefore());
+ $newThis->setWhiteAfter($this->isWhiteAfter());
+ foreach($this->children as $child) {
+ $newChild = $child->copyTree();
+ $newChild->setParent($newThis);
+ $newThis->addChild($newChild);
+ }
+ return $newThis;
+ }
+
+ public function getMatchRatio(TagNode $other) {
+ $txtComp = new TextOnlyComparator($other);
+ return $txtComp->getMatchRatio(new TextOnlyComparator($this));
+ }
+
+ public function expandWhiteSpace() {
+ $shift = 0;
+ $spaceAdded = false;
+
+ $nbOriginalChildren = $this->getNbChildren();
+ for ($i = 0; $i < $nbOriginalChildren; ++$i) {
+ $child = $this->getChild($i + $shift);
+
+ if($child instanceof TagNode){
+ if (!$child->isPre()) {
+ $child->expandWhiteSpace();
+ }
+ }
+ if (!$spaceAdded && $child->isWhiteBefore()) {
+ $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
+ $ws->setParent($this);
+ $this->addChild($ws,$i + ($shift++));
+ }
+ if ($child->isWhiteAfter()) {
+ $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
+ $ws->setParent($this);
+ $this->addChild($ws,$i + 1 + ($shift++));
+ $spaceAdded = true;
+ } else {
+ $spaceAdded = false;
+ }
+
+ }
+ }
+
+ public function getLeftMostChild() {
+ if ($this->getNbChildren() < 1)
+ return $this;
+ return $this->getChild(0)->getLeftMostChild();
+
+ }
+
+ public function getRightMostChild() {
+ if ($this->getNbChildren() < 1)
+ return $this;
+ return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
+ }
+
+ public function isPre() {
+ return 0 == strcasecmp($this->getQName(),'pre');
+ }
+
+ public static function toDiffLine(TagNode $node){
+ return $node->getOpeningTag();
+ }
+}
+
+/**
+ * Represents a piece of text in the HTML file.
+ */
+class TextNode extends Node{
+
+ private $s;
+
+ private $modification;
+
+ function __construct($parent, $s) {
+ parent::__construct($parent);
+ $this->modification = new Modification(Modification::NONE);
+ $this->s = $s;
+ }
+
+ public function copyTree() {
+ $clone = clone $this;
+ $clone->setParent(NULL);
+ return $clone;
+ }
+
+ public function getLeftMostChild() {
+ return $this;
+ }
+
+ public function getRightMostChild() {
+ return $this;
+ }
+
+ public function getMinimalDeletedSet($id) {
+ if ($this->getModification()->getType() == Modification::REMOVED
+ && $this->getModification()->getID() == $id){
+ return array($this);
+ }
+ return array();
+ }
+
+ public function getModification() {
+ return $this->modification;
+ }
+
+ public function getText() {
+ return $this->s;
+ }
+
+ public function isSameText($other) {
+ if (is_null($other) || ! $other instanceof TextNode){
+ return false;
+ }
+ return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
+ }
+
+ public function setModification(Modification $m) {
+ //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
+ $this->modification = $m;
+ }
+
+ public function toString() {
+ return $this->getText();
+ }
+
+ public static function toDiffLine(TextNode $node){
+ return str_replace('\n', ' ',$node->getText());
+ }
+}
+
+class WhiteSpaceNode extends TextNode {
+
+ function __construct($parent, $s, Node $like = NULL) {
+ parent::__construct($parent, $s);
+ if(!is_null($like) && $like instanceof TextNode){
+ $newModification = clone $like->getModification();
+ $newModification->setFirstOfID(false);
+ $this->setModification($newModification);
+ }
+ }
+
+ public static function isWhiteSpace($c) {
+ switch ($c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
+/**
+ * Represents the root of a HTML document.
+ */
+class BodyNode extends TagNode {
+
+ function __construct() {
+ parent::__construct(NULL, 'body', array());
+ }
+
+ public function copyTree() {
+ $newThis = new BodyNode();
+ foreach ($this->children as $child) {
+ $newChild = $child->copyTree();
+ $newChild->setParent($newThis);
+ $newThis->addChild($newChild);
+ }
+ return $newThis;
+ }
+
+ public function getMinimalDeletedSet($id) {
+ $nodes = array();
+ foreach ($this->children as $child) {
+ $childrenChildren = $child->getMinimalDeletedSet($id);
+ $nodes = array_merge($nodes, $childrenChildren);
+ }
+ return $nodes;
+ }
+
+}
+
+/**
+ * Represents an image in HTML. Even though images do not contain any text they
+ * are independent visible objects on the page. They are logically a TextNode.
+ */
+class ImageNode extends TextNode {
+
+ private $attributes;
+
+ function __construct(TagNode $parent, /*array*/ $attrs) {
+ if(!array_key_exists('src',$attrs)){
+ //wfDebug('Image without a source:');
+ foreach($attrs as $key => $value){
+ //wfDebug("$key = $value");
+ }
+ parent::__construct($parent, '<img></img>');
+ }else{
+ parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
+ }
+ $this->attributes = $attrs;
+ }
+
+ public function isSameText($other) {
+ if (is_null($other) || ! $other instanceof ImageNode)
+ return false;
+ return $this->getText() === $other->getText();
+ }
+
+ public function getAttributes() {
+ return $this->attributes;
+ }
+
+}
+
+/**
+ * When detecting the last common parent of two nodes, all results are stored as
+ * a LastCommonParentResult.
+ */
+class LastCommonParentResult {
+
+ // Parent
+ private $parent;
+
+ public function getLastCommonParent() {
+ return $this->parent;
+ }
+
+ public function setLastCommonParent(TagNode $parent) {
+ $this->parent = $parent;
+ }
+
+ // Splitting
+ private $splittingNeeded = false;
+
+ public function isSplittingNeeded() {
+ return $this->splittingNeeded;
+ }
+
+ public function setSplittingNeeded() {
+ $this->splittingNeeded = true;
+ }
+
+ // Depth
+ private $lastCommonParentDepth = -1;
+
+ public function getLastCommonParentDepth() {
+ return $this->lastCommonParentDepth;
+ }
+
+ public function setLastCommonParentDepth($depth) {
+ $this->lastCommonParentDepth = $depth;
+ }
+
+ // Index
+ private $indexInLastCommonParent = -1;
+
+ public function getIndexInLastCommonParent() {
+ return $this->indexInLastCommonParent;
+ }
+
+ public function setIndexInLastCommonParent($index) {
+ $this->indexInLastCommonParent = $index;
+ }
+}
+
+class Modification{
+
+ const NONE = 1;
+ const REMOVED = 2;
+ const ADDED = 4;
+ const CHANGED = 8;
+
+ private $type;
+
+ private $id = -1;
+
+ private $prevMod;
+
+ private $nextMod;
+
+ private $firstOfID = false;
+
+ function __construct($type) {
+ $this->type = $type;
+ }
+
+ public function copy() {
+ $newM = new Modification($this->getType());
+ $newM->setID($this->getID());
+ $newM->setChanges($this->getChanges());
+ $newM->setFirstOfID($this->isFirstOfID());
+ $newM->setNext($this->getNext());
+ $newM->setPrevious($this->getPrevious());
+ return $newM;
+ }
+
+ public function getType() {
+ return $this->type;
+ }
+
+ public function setID($id) {
+ $this->id = $id;
+ }
+
+ public function getID() {
+ return $this->id;
+ }
+
+ public function setPrevious($m) {
+ $this->prevMod = $m;
+ }
+
+ public function getPrevious() {
+ return $this->prevMod;
+ }
+
+ public function setNext($m) {
+ $this->nextMod = $m;
+ }
+
+ public function getNext() {
+ return $this->nextMod;
+ }
+
+ private $changes;
+
+ public function setChanges($changes) {
+ $this->changes = $changes;
+ }
+
+ public function getChanges() {
+ return $this->changes;
+ }
+
+ public function isFirstOfID() {
+ return $this->firstOfID;
+ }
+
+ public function setFirstOfID($firstOfID) {
+ $this->firstOfID = $firstOfID;
+ }
+
+ public static function typeToString($type){
+ switch($type){
+ case self::NONE: return 'none';
+ case self::REMOVED: return 'removed';
+ case self::ADDED: return 'added';
+ case self::CHANGED: return 'changed';
+ }
+ }
+}
+
+class DomTreeBuilder {
+
+ private $textNodes = array();
+
+ private $bodyNode;
+
+ private $currentParent;
+
+ private $newWord = "";
+
+ protected $bodyStarted = false;
+
+ protected $bodyEnded = false;
+
+ private $whiteSpaceBeforeThis = false;
+
+ private $lastSibling;
+
+ function __construct(){
+ $this->bodyNode = $this->currentParent = new BodyNode();
+ }
+
+ public function getBodyNode() {
+ return $this->bodyNode;
+ }
+
+ public function getTextNodes() {
+ return $this->textNodes;
+ }
+
+ /**
+ * Must be called manually
+ */
+ public function endDocument() {
+ $this->endWord();
+ //wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
+ }
+
+ public function startElement($parser, $name, /*array*/ $attributes) {
+ if(!strcasecmp($name, 'body')==0){
+ //wfDebug("Starting $name node.");
+ $this->endWord();
+
+ $newTagNode = new TagNode($this->currentParent, $name, $attributes);
+ $this->currentParent = $newTagNode;
+ $this->lastSibling = NULL;
+ if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
+ $newTagNode->setWhiteBefore(true);
+ }
+ $this->whiteSpaceBeforeThis = false;
+ }
+ }
+
+ public function endElement($parser, $name) {
+ if(!strcasecmp($name, 'body')==0){
+ //wfDebug("Ending $name node.");
+ if (0 == strcasecmp($name,'img')) {
+ // Insert a dummy leaf for the image
+ $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
+ $img->setWhiteBefore($this->whiteSpaceBeforeThis);
+ $this->lastSibling = $img;
+ $this->textNodes[] = $img;
+ }
+ $this->endWord();
+ if ($this->currentParent->isInline()) {
+ $this->lastSibling = $this->currentParent;
+ } else {
+ $this->lastSibling = NULL;
+ }
+ $this->currentParent = $this->currentParent->getParent();
+ $this->whiteSpaceBeforeThis = false;
+ }else{
+ $this->endDocument();
+ }
+ }
+
+ public function characters($parser, $data){
+ //wfDebug('Parsing '. strlen($data).' characters.');
+ $array = str_split($data);
+ foreach($array as $c) {
+ if (self::isDelimiter($c)) {
+ $this->endWord();
+ if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
+ && !$this->currentParent->inPre()) {
+ if (!is_null($this->lastSibling)){
+ $this->lastSibling->setWhiteAfter(true);
+ }
+ $this->whiteSpaceBeforeThis = true;
+ } else {
+ $textNode = new TextNode($this->currentParent, $c);
+ $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
+ $this->whiteSpaceBeforeThis = false;
+ $this->lastSibling = $textNode;
+ $this->textNodes[] = $textNode;
+ }
+ } else {
+ $this->newWord .= $c;
+ }
+
+ }
+ }
+
+ private function endWord() {
+ if (strlen($this->newWord) > 0) {
+ $node = new TextNode($this->currentParent, $this->newWord);
+ $node->setWhiteBefore($this->whiteSpaceBeforeThis);
+ $this->whiteSpaceBeforeThis = false;
+ $this->lastSibling = $node;
+ $this->textNodes[] = $node;
+ $this->newWord = "";
+ }
+ }
+
+ public static function isDelimiter($c) {
+ switch ($c) {
+ // Basic Delimiters
+ case '/':
+ case '.':
+ case '!':
+ case ',':
+ case ';':
+ case '?':
+ case '=':
+ case '\'':
+ case '"':
+ // Extra Delimiters
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '&':
+ case '|':
+ case '\\':
+ case '-':
+ case '_':
+ case '+':
+ case '*':
+ case ':':
+ return true;
+ default:
+ return WhiteSpaceNode::isWhiteSpace($c);
+ }
+ }
+
+ public function getDiffLines(){
+ return array_map(array('TextNode','toDiffLine'), $this->textNodes);
+ }
+}
+
+class TextNodeDiffer{
+
+ private $textNodes;
+ private $bodyNode;
+
+ private $oldTextNodes;
+ private $oldBodyNode;
+
+ private $lastModified = array();
+
+ function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
+ $this->textNodes = $tree->getTextNodes();
+ $this->bodyNode = $tree->getBodyNode();
+ $this->oldTextNodes = $oldTree->getTextNodes();
+ $this->oldBodyNode = $oldTree->getBodyNode();
+ }
+
+ public function getBodyNode() {
+ return $this->bodyNode;
+ }
+
+ private $newID = 0;
+
+ public function markAsNew($start, $end) {
+ if ($end <= $start)
+ return;
+
+ if ($this->whiteAfterLastChangedPart)
+ $this->textNodes[$start]->setWhiteBefore(false);
+
+ $nextLastModified = array();
+
+ for ($i = $start; $i < $end; ++$i) {
+ $mod = new Modification(Modification::ADDED);
+ $mod->setID($this->newID);
+ if (sizeof($this->lastModified) > 0) {
+ $mod->setPrevious($this->lastModified[0]);
+ if (is_null($this->lastModified[0]->getNext())) {
+ foreach ($this->lastModified as $lastMod) {
+ $lastMod->setNext($mod);
+ }
+ }
+ }
+ $nextLastModified[] = $mod;
+ $this->textNodes[$i]->setModification($mod);
+ }
+ if ($start < $end) {
+ $this->textNodes[$start]->getModification()->setFirstOfID(true);
+ }
+ ++$this->newID;
+ $this->lastModified = $nextLastModified;
+ }
+
+ private $changedID = 0;
+
+ private $changedIDUsed = false;
+
+ public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
+ $i = $rightstart;
+ $j = $leftstart;
+
+ if ($this->changedIDUsed) {
+ ++$this->changedID;
+ $this->changedIDUsed = false;
+ }
+
+ $nextLastModified = array();
+
+ $changes;
+ while ($i < $rightend) {
+ $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
+ $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
+ $result = $acthis->getResult($acother);
+ unset($acthis, $acother);
+
+ $nbLastModified = sizeof($this->lastModified);
+ if ($result->isChanged()) {
+ $mod = new Modification(Modification::CHANGED);
+
+ if (!$this->changedIDUsed) {
+ $mod->setFirstOfID(true);
+ if (sizeof($nextLastModified) > 0) {
+ $this->lastModified = $nextLastModified;
+ $nextLastModified = array();
+ }
+ } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
+ ++$this->changedID;
+ $mod->setFirstOfID(true);
+ if (sizeof($nextLastModified) > 0) {
+ $this->lastModified = $nextLastModified;
+ $nextLastModified = array();
+ }
+ }
+
+ if ($nbLastModified > 0) {
+ $mod->setPrevious($this->lastModified[0]);
+ if (is_null($this->lastModified[0]->getNext())) {
+ foreach ($this->lastModified as $lastMod) {
+ $lastMod->setNext($mod);
+ }
+ }
+ }
+ $nextLastModified[] = $mod;
+
+ $mod->setChanges($result->getChanges());
+ $mod->setID($this->changedID);
+
+ $this->textNodes[$i]->setModification($mod);
+ $this->changes = $result->getChanges();
+ $this->changedIDUsed = true;
+ } else if ($this->changedIDUsed) {
+ ++$this->changedID;
+ $this->changedIDUsed = false;
+ }
+ ++$i;
+ ++$j;
+ }
+ if (sizeof($nextLastModified) > 0){
+ $this->lastModified = $nextLastModified;
+ }
+ }
+
+ // used to remove the whitespace between a red and green block
+ private $whiteAfterLastChangedPart = false;
+
+ private $deletedID = 0;
+
+ public function markAsDeleted($start, $end, $before) {
+
+ if ($end <= $start)
+ return;
+
+ if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
+ $this->whiteAfterLastChangedPart = true;
+ } else {
+ $this->whiteAfterLastChangedPart = false;
+ }
+
+ $nextLastModified = array();
+
+ for ($i = $start; $i < $end; ++$i) {
+ $mod = new Modification(Modification::REMOVED);
+ $mod->setID($this->deletedID);
+ if (sizeof($this->lastModified) > 0) {
+ $mod->setPrevious($this->lastModified[0]);
+ if (is_null($this->lastModified[0]->getNext())) {
+ foreach ($this->lastModified as $lastMod) {
+ $lastMod->setNext($mod);
+ }
+ }
+ }
+ $nextLastModified[] = $mod;
+
+ // oldTextNodes is used here because we're going to move its deleted
+ // elements
+ // to this tree!
+ $this->oldTextNodes[$i]->setModification($mod);
+ }
+ $this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
+
+ $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
+
+ //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
+
+ // Set prevLeaf to the leaf after which the old HTML needs to be
+ // inserted
+ if ($before > 0){
+ $prevLeaf = $this->textNodes[$before - 1];
+ }
+ // Set nextLeaf to the leaf before which the old HTML needs to be
+ // inserted
+ if ($before < sizeof($this->textNodes)){
+ $nextLeaf = $this->textNodes[$before];
+ }
+
+ while (sizeof($deletedNodes) > 0) {
+ if (isset($prevLeaf)) {
+ $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
+ } else {
+ $prevResult = new LastCommonParentResult();
+ $prevResult->setLastCommonParent($this->getBodyNode());
+ $prevResult->setIndexInLastCommonParent(0);
+ }
+ if (isset($nextleaf)) {
+ $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
+ } else {
+ $nextResult = new LastCommonParentResult();
+ $nextResult->setLastCommonParent($this->getBodyNode());
+ $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
+ }
+
+ if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
+ // We need some metric to choose which way to add-...
+ if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
+ && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
+ // The difference is not in the parent
+ $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+ } else {
+ // The difference is in the parent, so compare them
+ // now THIS is tricky
+ $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
+ $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
+
+ if ($distancePrev <= $distanceNext) {
+ $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+ } else {
+ $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
+ }
+ }
+
+ }
+
+ if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
+ // Inserting at the front
+ if ($prevResult->isSplittingNeeded()) {
+ $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
+ }
+ $prevLeaf = $deletedNodes[0]->copyTree();
+ unset($deletedNodes[0]);
+ $deletedNodes = array_values($deletedNodes);
+ $prevLeaf->setParent($prevResult->getLastCommonParent());
+ $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
+ } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
+ // Inserting at the back
+ if ($nextResult->isSplittingNeeded()) {
+ $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
+ if ($splitOccured) {
+ // The place where to insert is shifted one place to the
+ // right
+ $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
+ }
+ }
+ $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
+ unset($deletedNodes[sizeof(deletedNodes) - 1]);
+ $deletedNodes = array_values($deletedNodes);
+ $nextLeaf->setParent($nextResult->getLastCommonParent());
+ $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
+ } else
+ throw new Exception("Uh?");
+ }
+ $this->lastModified = $nextLastModified;
+ ++$this->deletedID;
+ }
+
+ public function expandWhiteSpace() {
+ $this->getBodyNode()->expandWhiteSpace();
+ }
+
+ public function lengthNew(){
+ return sizeof($this->textNodes);
+ }
+
+ public function lengthOld(){
+ return sizeof($this->oldTextNodes);
+ }
+}
+
+class HTMLDiffer{
+
+ private $output;
+
+ function __construct($output){
+ $this->output = $output;
+ }
+
+ function htmlDiff($from, $to){
+ // Create an XML parser
+ $xml_parser = xml_parser_create('');
+
+ $domfrom = new DomTreeBuilder();
+
+ // Set the functions to handle opening and closing tags
+ xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
+
+ // Set the function to handle blocks of character data
+ xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
+
+ ;
+ //wfDebug('Parsing '.strlen($from)." characters worth of HTML");
+ if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+ || !xml_parse($xml_parser, $from, FALSE)
+ || !xml_parse($xml_parser, '</body>', TRUE)){
+ wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+ }
+ xml_parser_free($xml_parser);
+ unset($from);
+
+ $xml_parser = xml_parser_create('');
+
+ $domto = new DomTreeBuilder();
+
+ // Set the functions to handle opening and closing tags
+ xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement"));
+
+ // Set the function to handle blocks of character data
+ xml_set_character_data_handler($xml_parser, array($domto,"characters"));
+
+ //wfDebug('Parsing '.strlen($to)." characters worth of HTML");
+ if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+ || !xml_parse($xml_parser, $to, FALSE)
+ || !xml_parse($xml_parser, '</body>', TRUE)){
+ wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+ }
+ xml_parser_free($xml_parser);
+ unset($to);
+
+ $diffengine = new _DiffEngine();
+ $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
+ unset($xml_parser,$diffengine);
+
+ $domdiffer = new TextNodeDiffer($domto, $domfrom);
+
+ $currentIndexLeft = 0;
+ $currentIndexRight = 0;
+ foreach ($differences as $d) {
+ if ($d->leftstart > $currentIndexLeft) {
+ $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
+ $currentIndexRight, $d->rightstart);
+ }
+ if ($d->leftlength > 0) {
+ $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
+ }
+ $domdiffer->markAsNew($d->rightstart, $d->rightend);
+
+ $currentIndexLeft = $d->leftend;
+ $currentIndexRight = $d->rightend;
+ }
+ if ($currentIndexLeft < $domdiffer->lengthOld()) {
+ $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
+ }
+
+ $domdiffer->expandWhiteSpace();
+ $output = new HTMLOutput('htmldiff', $this->output);
+ $output->parse($domdiffer->getBodyNode());
+ }
+
+ private function preProcess(/*array*/ $differences){
+ $newRanges = array();
+
+ $nbDifferences = sizeof($differences);
+ for ($i = 0; $i < $nbDifferences; ++$i) {
+ $leftStart = $differences[$i]->leftstart;
+ $leftEnd = $differences[$i]->leftend;
+ $rightStart = $differences[$i]->rightstart;
+ $rightEnd = $differences[$i]->rightend;
+
+ $leftLength = $leftEnd - $leftStart;
+ $rightLength = $rightEnd - $rightStart;
+
+ while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength,
+ $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) {
+ $leftEnd = $differences[$i + 1]->leftend;
+ $rightEnd = $differences[$i + 1]->rightend;
+ $leftLength = $leftEnd - $leftStart;
+ $rightLength = $rightEnd - $rightStart;
+ ++$i;
+ }
+ $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
+ }
+ return $newRanges;
+ }
+
+ /**
+ * Heuristic to merge differences for readability.
+ */
+ public static function score($ll, $nll, $rl, $nrl) {
+ if (($ll == 0 && $nll == 0)
+ || ($rl == 0 && $nrl == 0)){
+ return 0;
+ }
+ $numbers = array($ll, $nll, $rl, $nrl);
+ $d = 0;
+ foreach ($numbers as $number) {
+ while ($number > 3) {
+ $d += 3;
+ $number -= 3;
+ $number *= 0.5;
+ }
+ $d += $number;
+
+ }
+ return $d / (1.5 * sizeof($numbers));
+ }
+
+}
+
+class TextOnlyComparator{
+
+ public $leafs = array();
+
+ function _construct(TagNode $tree) {
+ $this->addRecursive($tree);
+ $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
+ }
+
+ private function addRecursive(TagNode $tree) {
+ foreach ($tree->children as $child) {
+ if ($child instanceof TagNode) {
+ $this->addRecursive($child);
+ } else if ($child instanceof TextNode) {
+ $this->leafs[] = $node;
+ }
+ }
+ }
+
+ public function getMatchRatio(TextOnlyComparator $other) {
+ $nbOthers = sizeof($other->leafs);
+ $nbThis = sizeof($this->leafs);
+ if($nbOthers == 0 || $nbThis == 0){
+ return -log(0);
+ }
+
+ $diffengine = new _DiffEngine();
+ $diffengine->diff_local($this->leafs, $other->leafs);
+
+ $distanceThis = array_sum($diffengine->xchanged);
+ $distanceOther = array_sum($diffengine->ychanged);
+
+ return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
+ / $nbThis) / 2.0;
+ }
+}
+
+class AncestorComparatorResult {
+
+ private $changed = false;
+
+ private $changes = "";
+
+ public function isChanged() {
+ return $this->changed;
+ }
+
+ public function setChanged($changed) {
+ $this->changed = $changed;
+ }
+
+ public function getChanges() {
+ return $this->changes;
+ }
+
+ public function setChanges($changes) {
+ $this->changes = $changes;
+ }
+}
+
+/**
+ * A comparator used when calculating the difference in ancestry of two Nodes.
+ */
+class AncestorComparator{
+
+ public $ancestors;
+ public $ancestorsText;
+
+ function __construct(/*array*/ $ancestors) {
+ $this->ancestors = $ancestors;
+ $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
+ }
+
+ private $compareTxt = "";
+
+ public function getCompareTxt() {
+ return $this->compareTxt;
+ }
+
+ public function getResult(AncestorComparator $other) {
+ $result = new AncestorComparatorResult();
+
+ $diffengine = new _DiffEngine();
+ $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
+
+ if (sizeof($differences) == 0){
+ return $result;
+ }
+ $changeTxt = new ChangeTextGenerator($this, $other);
+
+ $result->setChanged(true);
+ $result->setChanges($changeTxt->getChanged($differences)->toString());
+
+ return $result;
+ }
+}
+
+class ChangeTextGenerator {
+
+ private $new;
+ private $old;
+
+ private $factory;
+
+ function __construct(AncestorComparator $old, AncestorComparator $new) {
+ $this->new = $new;
+ $this->old = $old;
+ $this->factory = new TagToStringFactory();
+ }
+
+ public function getChanged(/*array*/ $differences) {
+ $txt = new ChangeText;
+
+ $rootlistopened = false;
+
+ if (sizeof($differences) > 1) {
+ $txt->addHtml('<ul class="changelist">');
+ $rootlistopened = true;
+ }
+
+ $nbDifferences = sizeof($differences);
+ for ($j = 0; $j < $nbDifferences; ++$j) {
+ $d = $differences[$j];
+
+ $lvl1listopened = false;
+
+ if ($rootlistopened) {
+ $txt->addHtml('<li>');
+ }
+
+ if ($d->leftlength + $d->rightlength > 1) {
+ $txt->addHtml('<ul class="changelist">');
+ $lvl1listopened = true;
+ }
+
+ // left are the old ones
+ for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
+ if ($lvl1listopened){
+ $txt->addHtml('<li>');
+ }
+ // add a bullet for a old tag
+ $this->addTagOld($txt, $this->old->ancestors[$i]);
+
+ if ($lvl1listopened){
+ $txt->addHtml('</li>');
+ }
+ }
+
+ // right are the new ones
+ for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
+ if ($lvl1listopened){
+ $txt->addHtml('<li>');
+ }
+
+ // add a bullet for a new tag
+ $this->addTagNew($txt, $this->new->ancestors[$i]);
+
+ if ($lvl1listopened){
+ $txt->addHtml('</li>');
+ }
+
+ }
+
+ if ($lvl1listopened) {
+ $txt->addHtml('</ul>');
+ }
+
+ if ($rootlistopened) {
+ $txt->addHtml('</li>');
+ }
+ }
+
+ if ($rootlistopened) {
+ $txt->addHtml('</ul>');
+ }
+
+ return $txt;
+
+ }
+
+ private function addTagOld(ChangeText $txt, TagNode $ancestor) {
+ $this->factory->create($ancestor)->getRemovedDescription($txt);
+ }
+
+ private function addTagNew(ChangeText $txt, TagNode $ancestor) {
+ $this->factory->create($ancestor)->getAddedDescription($txt);
+ }
+}
+
+class ChangeText {
+
+ private $txt = "";
+
+ const newLine = "<br/>";
+
+ public function addText($s) {
+ $s = $this->clean($s);
+ $this->txt .= $s;
+ }
+
+ public function addHtml($s) {
+ $this->txt .= $s;
+ }
+
+ public function addNewLine() {
+ $this->addHtml(self::newLine);
+ }
+
+ public function toString() {
+ return $this->txt;
+ }
+
+ private function clean($s) {
+ return htmlspecialchars($s);
+ }
+}
+
+class TagToStringFactory {
+
+ private static $containerTags = array(
+ 'html' => TRUE,
+ 'body' => TRUE,
+ 'p' => TRUE,
+ 'blockquote' => TRUE,
+ 'h1' => TRUE,
+ 'h2' => TRUE,
+ 'h3' => TRUE,
+ 'h4' => TRUE,
+ 'h5' => TRUE,
+ 'pre' => TRUE,
+ 'div' => TRUE,
+ 'ul' => TRUE,
+ 'ol' => TRUE,
+ 'li' => TRUE,
+ 'table' => TRUE,
+ 'tbody' => TRUE,
+ 'tr' => TRUE,
+ 'td' => TRUE,
+ 'th' => TRUE,
+ 'br' => TRUE,
+ 'hr' => TRUE,
+ 'code' => TRUE,
+ 'dl' => TRUE,
+ 'dt' => TRUE,
+ 'dd' => TRUE,
+ 'input' => TRUE,
+ 'form' => TRUE,
+ 'img' => TRUE,
+ // in-line tags that can be considered containers not styles
+ 'span' => TRUE,
+ 'a' => TRUE
+ );
+
+ private static $styleTags = array(
+ 'i' => TRUE,
+ 'b' => TRUE,
+ 'strong' => TRUE,
+ 'em' => TRUE,
+ 'font' => TRUE,
+ 'big' => TRUE,
+ 'del' => TRUE,
+ 'tt' => TRUE,
+ 'sub' => TRUE,
+ 'sup' => TRUE,
+ 'strike' => TRUE
+ );
+
+ const MOVED = 1;
+ const STYLE = 2;
+ const UNKNOWN = 4;
+
+ public function create(TagNode $node) {
+ $sem = $this->getChangeSemantic($node->getQName());
+ if (0 == strcasecmp($node->getQName(),'a')){
+ return new AnchorToString($node, $sem);
+ }
+ if (0 == strcasecmp($node->getQName(),'img')){
+ return new NoContentTagToString($node, $sem);
+ }
+ return new TagToString($node, $sem);
+ }
+
+ protected function getChangeSemantic($qname) {
+ if (array_key_exists(strtolower($qname),self::$containerTags)){
+ return self::MOVED;
+ }
+ if (array_key_exists(strtolower($qname),self::$styleTags)){
+ return self::STYLE;
+ }
+ return self::UNKNOWN;
+ }
+}
+
+class TagToString {
+
+ protected $node;
+
+ protected $sem;
+
+ function __construct(TagNode $node, $sem) {
+ $this->node = $node;
+ $this->sem = $sem;
+ }
+
+ public function getDescription() {
+ return $this->getString('diff-' . $this->node->getQName());
+ }
+
+ public function getRemovedDescription(ChangeText $txt) {
+
+ if ($this->sem == TagToStringFactory::MOVED) {
+ $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
+ $txt->addHtml('<b>');
+ $txt->addText(strtolower($this->getDescription()));
+ $txt->addHtml('</b>');
+ } else if ($this->sem == TagToStringFactory::STYLE) {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getStyleRemoved()));
+ } else {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getRemoved()));
+ }
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt->addText('.');
+ }
+
+ public function getAddedDescription(ChangeText $txt) {
+
+ if ($this->sem == TagToStringFactory::MOVED) {
+ $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
+ $txt->addHtml('<b>');
+ $txt->addText(strtolower($this->getDescription()));
+ $txt->addHtml('</b>');
+ } else if ($this->sem == TagToStringFactory::STYLE) {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getStyleAdded()));
+ } else {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getAdded()));
+ }
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt->addText('.');
+ }
+
+ protected function getMovedTo() {
+ return $this->getString('diff-movedto');
+ }
+
+ protected function getStyleAdded() {
+ return $this->getString('diff-styleadded');
+ }
+
+ protected function getAdded() {
+ return $this->getString('diff-added');
+ }
+
+ protected function getMovedOutOf() {
+ return $this->getString('diff-movedoutof');
+ }
+
+ protected function getStyleRemoved() {
+ return $this->getString('diff-styleremoved');
+ }
+
+ protected function getRemoved() {
+ return $this->getString('diff-removed');
+ }
+
+ protected function addAttributes(ChangeText $txt, array $attributes) {
+ if (sizeof($attributes) < 1)
+ return;
+
+ $keys = array_keys($attributes);
+
+ $txt->addText(' ' . strtolower($this->getWith()) . ' '
+ . $this->translateArgument($keys[0]) . ' '
+ . $attributes[$keys[0]]);
+ for ($i = 1; $i < sizeof($attributes) - 1; $i++) {
+ $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' '
+ . $attributes[$keys[$i]]);
+ }
+ if (sizeof($attributes) > 1) {
+ $txt->addText(' '
+ . strtolower($this->getAnd())
+ . ' '
+ . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' '
+ . $attributes[$keys[sizeof($attributes) - 1]]);
+ }
+ }
+
+ private function getAnd() {
+ return $this->getString('diff-and');
+ }
+
+ private function getWith() {
+ return $this->getString('diff-with');
+ }
+
+ protected function translateArgument($name) {
+ if (0 == strcasecmp($name,'src'))
+ return strtolower($this->getSource());
+ if (0 == strcasecmp($name,'width'))
+ return strtolower($this->getWidth());
+ if (0 == strcasecmp($name,'height'))
+ return strtolower($this->getHeight());
+ return $name;
+ }
+
+ private function getHeight() {
+ return $this->getString('diff-height');
+ }
+
+ private function getWidth() {
+ return $this->getString('diff-width');
+ }
+
+ protected function getSource() {
+ return $this->getString('diff-source');
+ }
+
+ protected function getArticle() {
+ return $this->getString('diff-' . $this->node->getQName() . '-article');
+ }
+
+ public static $bundle = array(
+ 'diff-movedto' => 'Moved to',
+ 'diff-styleadded' => 'Style added',
+ 'diff-added' => 'Added',
+ 'diff-changedto' => 'Changed to',
+ 'diff-movedoutof' => 'Moved out of',
+ 'diff-styleremoved' => 'Style removed',
+ 'diff-removed' => 'Removed',
+ 'diff-changedfrom' => 'Changed from',
+ 'diff-source' => 'Source',
+ 'diff-withdestination' => 'With destination',
+ 'diff-and' => 'And',
+ 'diff-with' => 'With',
+ 'diff-width' => 'Width',
+ 'diff-height' => 'Height',
+ 'diff-html-article' => 'A',
+ 'diff-html' => 'Html page',
+ 'diff-body-article' => 'A',
+ 'diff-body' => 'Html document',
+ 'diff-p-article' => 'A',
+ 'diff-p' => 'Paragraph',
+ 'diff-blockquote-article' => 'A',
+ 'diff-blockquote' => 'Quote',
+ 'diff-h1-article' => 'A',
+ 'diff-h1' => 'Heading (level 1)',
+ 'diff-h2-article' => 'A',
+ 'diff-h2' => 'Heading (level 2)',
+ 'diff-h3-article' => 'A',
+ 'diff-h3' => 'Heading (level 3)',
+ 'diff-h4-article' => 'A',
+ 'diff-h4' => 'Heading (level 4)',
+ 'diff-h5-article' => 'A',
+ 'diff-h5' => 'Heading (level 5)',
+ 'diff-pre-article' => 'A',
+ 'diff-pre' => 'Preformatted block',
+ 'diff-div-article' => 'A',
+ 'diff-div' => 'Division',
+ 'diff-ul-article' => 'An',
+ 'diff-ul' => 'Unordered list',
+ 'diff-ol-article' => 'An',
+ 'diff-ol' => 'Ordered list',
+ 'diff-li-article' => 'A',
+ 'diff-li' => 'List item',
+ 'diff-table-article' => 'A',
+ 'diff-table' => 'Table',
+ 'diff-tbody-article' => 'A',
+ 'diff-tbody' => "Table's content",
+ 'diff-tr-article' => 'A',
+ 'diff-tr' => 'Row',
+ 'diff-td-article' => 'A',
+ 'diff-td' => 'Cell',
+ 'diff-th-article' => 'A',
+ 'diff-th' => 'Header',
+ 'diff-br-article' => 'A',
+ 'diff-br' => 'Break',
+ 'diff-hr-article' => 'A',
+ 'diff-hr' => 'Horizontal rule',
+ 'diff-code-article' => 'A',
+ 'diff-code' => 'Computer code block',
+ 'diff-dl-article' => 'A',
+ 'diff-dl' => 'Definition list',
+ 'diff-dt-article' => 'A',
+ 'diff-dt' => 'Definition term',
+ 'diff-dd-article' => 'A',
+ 'diff-dd' => 'Definition',
+ 'diff-input-article' => 'An',
+ 'diff-input' => 'Input',
+ 'diff-form-article' => 'A',
+ 'diff-form' => 'Form',
+ 'diff-img-article' => 'An',
+ 'diff-img' => 'Image',
+ 'diff-span-article' => 'A',
+ 'diff-span' => 'Span',
+ 'diff-a-article' => 'A',
+ 'diff-a' => 'Link',
+ 'diff-i' => 'Italics',
+ 'diff-b' => 'Bold',
+ 'diff-strong' => 'Strong',
+ 'diff-em' => 'Emphasis',
+ 'diff-font' => 'Font',
+ 'diff-big' => 'Big',
+ 'diff-del' => 'Deleted',
+ 'diff-tt' => 'Fixed width',
+ 'diff-sub' => 'Subscript',
+ 'diff-sup' => 'Superscript',
+ 'diff-strike' => 'Strikethrough'
+ );
+
+ public function getString($key) {
+ return self::$bundle[$key];
+ }
+}
+
+class NoContentTagToString extends TagToString {
+
+ function __construct(TagNode $node, $sem) {
+ parent::__construct($node, $sem);
+ }
+
+ public function getAddedDescription(ChangeText $txt) {
+ $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
+ $txt.addHtml('<b>');
+ $txt.addText(strtolower($this->getDescription()));
+ $txt.addHtml('</b>');
+
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt.addText('.');
+ }
+
+ private function getChangedTo() {
+ return $this->getString('diff-changedto');
+ }
+
+ public function getRemovedDescription(ChangeText $txt) {
+ $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
+ $txt.addHtml('<b>');
+ $txt.addText(strtolower($this->getDescription()));
+ $txt.addHtml('</b>');
+
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt.addText('.');
+ }
+
+ private function getChangedFrom() {
+ return $this->getString('diff-changedfrom');
+ }
+}
+
+class AnchorToString extends TagToString {
+
+ function __construct(TagNode $node, $sem) {
+ parent::__construct($node, $sem);
+ }
+
+ protected function addAttributes(ChangeText $txt, array $attributes) {
+ if (array_key_exists('href',$attributes)) {
+ $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
+ unset($attributes['href']);
+ }
+ parent::addAttributes($txt, $attributes);
+ }
+
+ private function getWithDestination() {
+ return $this->getString('diff-withdestination');
+ }
+
+}
+
+/**
+ * Takes a branch root and creates an HTML file for it.
+ */
+class HTMLOutput{
+
+ private $prefix;
+ private $handler;
+
+ function __construct($prefix, $handler) {
+ $this->prefix = $prefix;
+ $this->handler = $handler;
+ }
+
+ public function parse(TagNode $node) {
+
+ if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) {
+ $this->handler->startElement($node->getQName(), $node->getAttributes());
+ }
+
+ $newStarted = false;
+ $remStarted = false;
+ $changeStarted = false;
+ $changeTXT = '';
+
+ foreach ($node->children as $child) {
+ if ($child instanceof TagNode) {
+ if ($newStarted) {
+ $this->handler->endElement('span');
+ $newStarted = false;
+ } else if ($changeStarted) {
+ $this->handler->endElement('span');
+ $changeStarted = false;
+ } else if ($remStarted) {
+ $this->handler->endElement('span');
+ $remStarted = false;
+ }
+ $this->parse($child);
+ } else if ($child instanceof TextNode) {
+ $mod = $child->getModification();
+
+ if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) {
+ $this->handler->endElement('span');
+ $newStarted = false;
+ } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) {
+ $this->handler->endElement('span');
+ $changeStarted = false;
+ } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) {
+ $this->handler->endElement('span');
+ $remStarted = false;
+ }
+
+ // no else because a removed part can just be closed and a new
+ // part can start
+ if (!$newStarted && $mod->getType() == Modification::ADDED) {
+ $attrs = array('class'=>'diff-html-added');
+ if ($mod->isFirstOfID()) {
+ $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID();
+ }
+ $this->addAttributes($mod, $attrs);
+ $attrs['onclick'] = 'return tipA(constructToolTipA(this));';
+ $this->handler->startElement('span', $attrs);
+ $newStarted = true;
+ } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) {
+ $attrs = array('class'=>'diff-html-changed');
+ if ($mod->isFirstOfID()) {
+ $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID();
+ }
+ $this->addAttributes($mod, $attrs);
+ $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
+ $this->handler->startElement('span', $attrs);
+
+ //tooltip
+ $this->handler->startElement('span', array('class'=>'tip'));
+ $this->handler->characters($mod->getChanges());
+ $this->handler->endElement('span');
+
+ $changeStarted = true;
+ $changeTXT = $mod->getChanges();
+ } else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
+ $attrs = array('class'=>'diff-html-removed');
+ if ($mod->isFirstOfID()) {
+ $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID();
+ }
+ $this->addAttributes($mod, $attrs);
+ $attrs['onclick'] = 'return tipR(constructToolTipR(this));';
+ $this->handler->startElement('span', $attrs);
+ $remStarted = true;
+ }
+
+ $chars = $child->getText();
+
+ if ($child instanceof ImageNode) {
+ $this->writeImage($child);
+ } else {
+ $this->handler->characters($chars);
+ }
+
+ }
+ }
+
+ if ($newStarted) {
+ $this->handler->endElement('span');
+ $newStarted = false;
+ } else if ($changeStarted) {
+ $this->handler->endElement('span');
+ $changeStarted = false;
+ } else if ($remStarted) {
+ $this->handler->endElement('span');
+ $remStarted = false;
+ }
+
+ if (0 != strcasecmp($node->getQName(),'img')
+ && 0 != strcasecmp($node->getQName(),'body'))
+ $this->handler->endElement($node->getQName());
+ }
+
+ private function writeImage(ImageNode $imgNode){
+ $attrs = $imgNode->getAttributes();
+ if ($imgNode->getModification()->getType() == Modification::REMOVED)
+ $attrs['changeType']='diff-removed-image';
+ else if ($imgNode->getModification()->getType() == Modification::ADDED)
+ $attrs['changeType'] = 'diff-added-image';
+ $attrs['onload'] = 'updateOverlays()';
+ $attrs['onError'] = 'updateOverlays()';
+ $attrs['onAbort'] = 'updateOverlays()';
+ $this->handler->startElement('img', $attrs);
+ $this->handler->endElement('img');
+ }
+
+ private function addAttributes(Modification $mod, /*array*/ &$attrs) {
+ if (is_null($mod->getPrevious())) {
+ $previous = 'first-' . $this->prefix;
+ } else {
+ $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-'
+ . $mod->getPrevious()->getID();
+ }
+ $attrs['previous'] = $previous;
+
+ $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID();
+ $attrs['changeId'] = $changeId;
+
+ if (is_null($mod->getNext())) {
+ $next = 'last-' . $this->prefix;
+ } else {
+ $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-'
+ . $mod->getNext()->getID();
+ }
+ $attrs['next'] = $next;
+ }
+}
+
+class EchoingContentHandler{
+
+ function startElement($qname, /*array*/ $arguments){
+ echo '<'.$qname;
+ foreach($arguments as $key => $value){
+ echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"';
+ }
+ echo '>';
+ }
+
+ function endElement($qname){
+ echo '</'.$qname.'>';
+ }
+
+ function characters($chars){
+ echo $chars;
+ }
+
+}
+
+class DelegatingContentHandler{
+
+ private $delegate;
+
+ function __construct($delegate){
+ $this->delegate=$delegate;
+ }
+
+ function startElement($qname, /*array*/ $arguments){
+ $this->delegate->addHtml('<'.$qname) ;
+ foreach($arguments as $key => $value){
+ $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"');
+ }
+ $this->delegate->addHtml('>');
+ }
+
+ function endElement($qname){
+ $this->delegate->addHtml('</'.$qname.'>');
+ }
+
+ function characters($chars){
+ $this->delegate->addHtml( $chars );
+ }
+
+}