3 /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * or see http://www.gnu.org/
20 * @ingroup DifferenceEngine
24 * Any element in the DOM tree of an HTML document.
30 protected $parentTree;
32 public $whiteBefore = false;
34 public $whiteAfter = false;
36 function __construct($parent) {
37 $this->parent
= $parent;
40 public function getParentTree() {
41 if (!isset($this->parentTree
)) {
42 if (!is_null($this->parent
)) {
43 $this->parentTree
= $this->parent
->getParentTree();
44 $this->parentTree
[] = $this->parent
;
46 $this->parentTree
= array();
49 return $this->parentTree
;
52 public function getLastCommonParent(Node
$other) {
53 $result = new LastCommonParentResult();
55 $myParents = $this->getParentTree();
56 $otherParents = $other->getParentTree();
60 $nbMyParents = count($myParents);
61 $nbOtherParents = count($otherParents);
62 while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
63 if (!$myParents[$i]->openingTag
=== $otherParents[$i]->openingTag
) {
66 // After a while, the index i-1 must be the last common parent
71 $result->lastCommonParentDepth
= $i - 1;
72 $result->parent
= $myParents[$i - 1];
74 if (!$isSame ||
$nbMyParents > $nbOtherParents) {
75 // Not all tags matched, or all tags matched but
76 // there are tags left in this tree
77 $result->indexInLastCommonParent
= $myParents[$i - 1]->getIndexOf($myParents[$i]);
78 $result->splittingNeeded
= true;
79 } else if ($nbMyParents <= $nbOtherParents) {
80 $result->indexInLastCommonParent
= $myParents[$i - 1]->getIndexOf($this);
85 public function setParent($parent) {
86 $this->parent
= $parent;
87 unset($this->parentTree
);
90 public function inPre() {
91 $tree = $this->getParentTree();
92 foreach ($tree as &$ancestor) {
93 if ($ancestor->isPre()) {
102 * Node that can contain other nodes. Represents an HTML tag.
104 class TagNode
extends Node
{
106 public $children = array();
110 public $attributes = array();
114 function __construct($parent, $qName, /*array*/ $attributes) {
115 parent
::__construct($parent);
116 $this->qName
= strtolower($qName);
117 foreach($attributes as $key => &$value){
118 $this->attributes
[strtolower($key)] = $value;
120 return $this->openingTag
= Xml
::openElement($this->qName
, $this->attributes
);
123 public function addChildAbsolute(Node
$node, $index) {
124 array_splice($this->children
, $index, 0, array($node));
127 public function getIndexOf(Node
$child) {
128 // don't trust array_search with objects
129 foreach ($this->children
as $key => &$value){
130 if ($value === $child) {
137 public function getNbChildren() {
138 return count($this->children
);
141 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
145 $somethingDeleted = false;
146 $hasNonDeletedDescendant = false;
148 if (empty($this->children
)) {
152 foreach ($this->children
as &$child) {
153 $allDeleted_local = false;
154 $somethingDeleted_local = false;
155 $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
156 if ($somethingDeleted_local) {
157 $nodes = array_merge($nodes, $childrenChildren);
158 $somethingDeleted = true;
160 if (!$allDeleted_local) {
161 $hasNonDeletedDescendant = true;
164 if (!$hasNonDeletedDescendant) {
165 $nodes = array($this);
171 public function splitUntil(TagNode
$parent, Node
$split, $includeLeft) {
172 $splitOccured = false;
173 if ($parent !== $this) {
174 $part1 = new TagNode(null, $this->qName
, $this->attributes
);
175 $part2 = new TagNode(null, $this->qName
, $this->attributes
);
176 $part1->setParent($this->parent
);
177 $part2->setParent($this->parent
);
181 foreach ($this->children
as &$child)
183 if ($child === $split) {
186 if(!$pastSplit ||
($onSplit && $includeLeft)) {
187 $child->setParent($part1);
188 $part1->children
[] = $child;
190 $child->setParent($part2);
191 $part2->children
[] = $child;
198 $myindexinparent = $this->parent
->getIndexOf($this);
199 if (!empty($part1->children
)) {
200 $this->parent
->addChildAbsolute($part1, $myindexinparent);
202 if (!empty($part2->children
)) {
203 $this->parent
->addChildAbsolute($part2, $myindexinparent);
205 if (!empty($part1->children
) && !empty($part2->children
)) {
206 $splitOccured = true;
209 $this->parent
->removeChild($myindexinparent);
212 $this->parent
->splitUntil($parent, $part1, $includeLeft);
214 $this->parent
->splitUntil($parent, $part2, $includeLeft);
217 return $splitOccured;
221 private function removeChild($index) {
222 unset($this->children
[$index]);
223 $this->children
= array_values($this->children
);
226 public static $blocks = array('html', 'body','p','blockquote', 'h1',
227 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
228 'tbody', 'tr', 'td', 'th', 'br');
230 public function copyTree() {
231 $newThis = new TagNode(null, $this->qName
, $this->attributes
);
232 $newThis->whiteBefore
= $this->whiteBefore
;
233 $newThis->whiteAfter
= $this->whiteAfter
;
234 foreach ($this->children
as &$child) {
235 $newChild = $child->copyTree();
236 $newChild->setParent($newThis);
237 $newThis->children
[] = $newChild;
242 public function getMatchRatio(TagNode
$other) {
243 $txtComp = new TextOnlyComparator($other);
244 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
247 public function expandWhiteSpace() {
251 $nbOriginalChildren = $this->getNbChildren();
252 for ($i = 0; $i < $nbOriginalChildren; ++
$i) {
253 $child = $this->children
[$i +
$shift];
255 if ($child instanceof TagNode
) {
256 if (!$child->isPre()) {
257 $child->expandWhiteSpace();
260 if (!$spaceAdded && $child->whiteBefore
) {
261 $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
262 $ws->setParent($this);
263 $this->addChildAbsolute($ws,$i +
($shift++
));
265 if ($child->whiteAfter
) {
266 $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
267 $ws->setParent($this);
268 $this->addChildAbsolute($ws,$i +
1 +
($shift++
));
277 public function getLeftMostChild() {
278 if (empty($this->children
)) {
281 return $this->children
[0]->getLeftMostChild();
284 public function getRightMostChild() {
285 if (empty($this->children
)) {
288 return $this->children
[$this->getNbChildren() - 1]->getRightMostChild();
291 public function isPre() {
292 return 0 == strcasecmp($this->qName
,'pre');
295 public static function toDiffLine(TagNode
$node) {
296 return $node->openingTag
;
301 * Represents a piece of text in the HTML file.
303 class TextNode
extends Node
{
307 public $modification;
309 function __construct($parent, $text) {
310 parent
::__construct($parent);
311 $this->modification
= new Modification(Modification
::NONE
);
315 public function copyTree() {
316 $clone = clone $this;
317 $clone->setParent(null);
321 public function getLeftMostChild() {
325 public function getRightMostChild() {
329 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
330 if ($this->modification
->type
== Modification
::REMOVED
331 && $this->modification
->id
== $id){
332 $somethingDeleted = true;
339 public function isSameText($other) {
340 if (is_null($other) ||
! $other instanceof TextNode
) {
343 return str_replace('\n', ' ',$this->text
) === str_replace('\n', ' ',$other->text
);
346 public static function toDiffLine(TextNode
$node) {
347 return str_replace('\n', ' ',$node->text
);
351 class WhiteSpaceNode
extends TextNode
{
353 function __construct($parent, $s, Node
$like = null) {
354 parent
::__construct($parent, $s);
355 if(!is_null($like) && $like instanceof TextNode
) {
356 $newModification = clone $like->modification
;
357 $newModification->firstOfID
= false;
358 $this->modification
= $newModification;
364 * Represents the root of a HTML document.
366 class BodyNode
extends TagNode
{
368 function __construct() {
369 parent
::__construct(null, 'body', array());
372 public function copyTree() {
373 $newThis = new BodyNode();
374 foreach ($this->children
as &$child) {
375 $newChild = $child->copyTree();
376 $newChild->setParent($newThis);
377 $newThis->children
[] = $newChild;
382 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
384 foreach ($this->children
as &$child) {
385 $childrenChildren = $child->getMinimalDeletedSet($id,
386 $allDeleted, $somethingDeleted);
387 $nodes = array_merge($nodes, $childrenChildren);
395 * Represents an image in HTML. Even though images do not contain any text they
396 * are independent visible objects on the page. They are logically a TextNode.
398 class ImageNode
extends TextNode
{
402 function __construct(TagNode
$parent, /*array*/ $attrs) {
403 if(!array_key_exists('src', $attrs)) {
404 HTMLDiffer
::diffDebug( "Image without a source\n" );
405 parent
::__construct($parent, '<img></img>');
407 parent
::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
409 $this->attributes
= $attrs;
412 public function isSameText($other) {
413 if (is_null($other) ||
! $other instanceof ImageNode
) {
416 return $this->text
=== $other->text
;
424 class DummyNode
extends Node
{
426 function __construct() {