* Add all HTMLDiff-related classes to the autoloader.
[lhc/web/wiklou.git] / includes / diff / Nodes.php
1 <?php
2
3 /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * or see http://www.gnu.org/
19 *
20 * @ingroup DifferenceEngine
21 */
22
23 /**
24 * Any element in the DOM tree of an HTML document.
25 */
26 class Node {
27
28 public $parent;
29
30 protected $parentTree;
31
32 public $whiteBefore = false;
33
34 public $whiteAfter = false;
35
36 function __construct($parent) {
37 $this->parent = $parent;
38 }
39
40 public function getParentTree() {
41 if (!isset($this->parentTree)) {
42 if (!is_null($this->parent)) {
43 $this->parentTree = $this->parent->getParentTree();
44 $this->parentTree[] = $this->parent;
45 } else {
46 $this->parentTree = array();
47 }
48 }
49 return $this->parentTree;
50 }
51
52 public function getLastCommonParent(Node $other) {
53 $result = new LastCommonParentResult();
54
55 $myParents = $this->getParentTree();
56 $otherParents = $other->getParentTree();
57
58 $i = 1;
59 $isSame = true;
60 $nbMyParents = count($myParents);
61 $nbOtherParents = count($otherParents);
62 while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
63 if (!$myParents[$i]->openingTag === $otherParents[$i]->openingTag) {
64 $isSame = false;
65 } else {
66 // After a while, the index i-1 must be the last common parent
67 $i++;
68 }
69 }
70
71 $result->lastCommonParentDepth = $i - 1;
72 $result->parent = $myParents[$i - 1];
73
74 if (!$isSame || $nbMyParents > $nbOtherParents) {
75 // Not all tags matched, or all tags matched but
76 // there are tags left in this tree
77 $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]);
78 $result->splittingNeeded = true;
79 } else if ($nbMyParents <= $nbOtherParents) {
80 $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this);
81 }
82 return $result;
83 }
84
85 public function setParent($parent) {
86 $this->parent = $parent;
87 unset($this->parentTree);
88 }
89
90 public function inPre() {
91 $tree = $this->getParentTree();
92 foreach ($tree as &$ancestor) {
93 if ($ancestor->isPre()) {
94 return true;
95 }
96 }
97 return false;
98 }
99 }
100
101 /**
102 * Node that can contain other nodes. Represents an HTML tag.
103 */
104 class TagNode extends Node {
105
106 public $children = array();
107
108 public $qName;
109
110 public $attributes = array();
111
112 public $openingTag;
113
114 function __construct($parent, $qName, /*array*/ $attributes) {
115 parent::__construct($parent);
116 $this->qName = strtolower($qName);
117 foreach($attributes as $key => &$value){
118 $this->attributes[strtolower($key)] = $value;
119 }
120 return $this->openingTag = Xml::openElement($this->qName, $this->attributes);
121 }
122
123 public function addChildAbsolute(Node $node, $index) {
124 array_splice($this->children, $index, 0, array($node));
125 }
126
127 public function getIndexOf(Node $child) {
128 // don't trust array_search with objects
129 foreach ($this->children as $key => &$value){
130 if ($value === $child) {
131 return $key;
132 }
133 }
134 return null;
135 }
136
137 public function getNbChildren() {
138 return count($this->children);
139 }
140
141 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
142 $nodes = array();
143
144 $allDeleted = false;
145 $somethingDeleted = false;
146 $hasNonDeletedDescendant = false;
147
148 if (empty($this->children)) {
149 return $nodes;
150 }
151
152 foreach ($this->children as &$child) {
153 $allDeleted_local = false;
154 $somethingDeleted_local = false;
155 $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
156 if ($somethingDeleted_local) {
157 $nodes = array_merge($nodes, $childrenChildren);
158 $somethingDeleted = true;
159 }
160 if (!$allDeleted_local) {
161 $hasNonDeletedDescendant = true;
162 }
163 }
164 if (!$hasNonDeletedDescendant) {
165 $nodes = array($this);
166 $allDeleted = true;
167 }
168 return $nodes;
169 }
170
171 public function splitUntil(TagNode $parent, Node $split, $includeLeft) {
172 $splitOccured = false;
173 if ($parent !== $this) {
174 $part1 = new TagNode(null, $this->qName, $this->attributes);
175 $part2 = new TagNode(null, $this->qName, $this->attributes);
176 $part1->setParent($this->parent);
177 $part2->setParent($this->parent);
178
179 $onSplit = false;
180 $pastSplit = false;
181 foreach ($this->children as &$child)
182 {
183 if ($child === $split) {
184 $onSplit = true;
185 }
186 if(!$pastSplit || ($onSplit && $includeLeft)) {
187 $child->setParent($part1);
188 $part1->children[] = $child;
189 } else {
190 $child->setParent($part2);
191 $part2->children[] = $child;
192 }
193 if ($onSplit) {
194 $onSplit = false;
195 $pastSplit = true;
196 }
197 }
198 $myindexinparent = $this->parent->getIndexOf($this);
199 if (!empty($part1->children)) {
200 $this->parent->addChildAbsolute($part1, $myindexinparent);
201 }
202 if (!empty($part2->children)) {
203 $this->parent->addChildAbsolute($part2, $myindexinparent);
204 }
205 if (!empty($part1->children) && !empty($part2->children)) {
206 $splitOccured = true;
207 }
208
209 $this->parent->removeChild($myindexinparent);
210
211 if ($includeLeft) {
212 $this->parent->splitUntil($parent, $part1, $includeLeft);
213 } else {
214 $this->parent->splitUntil($parent, $part2, $includeLeft);
215 }
216 }
217 return $splitOccured;
218
219 }
220
221 private function removeChild($index) {
222 unset($this->children[$index]);
223 $this->children = array_values($this->children);
224 }
225
226 public static $blocks = array('html', 'body','p','blockquote', 'h1',
227 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
228 'tbody', 'tr', 'td', 'th', 'br');
229
230 public function copyTree() {
231 $newThis = new TagNode(null, $this->qName, $this->attributes);
232 $newThis->whiteBefore = $this->whiteBefore;
233 $newThis->whiteAfter = $this->whiteAfter;
234 foreach ($this->children as &$child) {
235 $newChild = $child->copyTree();
236 $newChild->setParent($newThis);
237 $newThis->children[] = $newChild;
238 }
239 return $newThis;
240 }
241
242 public function getMatchRatio(TagNode $other) {
243 $txtComp = new TextOnlyComparator($other);
244 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
245 }
246
247 public function expandWhiteSpace() {
248 $shift = 0;
249 $spaceAdded = false;
250
251 $nbOriginalChildren = $this->getNbChildren();
252 for ($i = 0; $i < $nbOriginalChildren; ++$i) {
253 $child = $this->children[$i + $shift];
254
255 if ($child instanceof TagNode) {
256 if (!$child->isPre()) {
257 $child->expandWhiteSpace();
258 }
259 }
260 if (!$spaceAdded && $child->whiteBefore) {
261 $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
262 $ws->setParent($this);
263 $this->addChildAbsolute($ws,$i + ($shift++));
264 }
265 if ($child->whiteAfter) {
266 $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
267 $ws->setParent($this);
268 $this->addChildAbsolute($ws,$i + 1 + ($shift++));
269 $spaceAdded = true;
270 } else {
271 $spaceAdded = false;
272 }
273
274 }
275 }
276
277 public function getLeftMostChild() {
278 if (empty($this->children)) {
279 return $this;
280 }
281 return $this->children[0]->getLeftMostChild();
282 }
283
284 public function getRightMostChild() {
285 if (empty($this->children)) {
286 return $this;
287 }
288 return $this->children[$this->getNbChildren() - 1]->getRightMostChild();
289 }
290
291 public function isPre() {
292 return 0 == strcasecmp($this->qName,'pre');
293 }
294
295 public static function toDiffLine(TagNode $node) {
296 return $node->openingTag;
297 }
298 }
299
300 /**
301 * Represents a piece of text in the HTML file.
302 */
303 class TextNode extends Node {
304
305 public $text;
306
307 public $modification;
308
309 function __construct($parent, $text) {
310 parent::__construct($parent);
311 $this->modification = new Modification(Modification::NONE);
312 $this->text = $text;
313 }
314
315 public function copyTree() {
316 $clone = clone $this;
317 $clone->setParent(null);
318 return $clone;
319 }
320
321 public function getLeftMostChild() {
322 return $this;
323 }
324
325 public function getRightMostChild() {
326 return $this;
327 }
328
329 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
330 if ($this->modification->type == Modification::REMOVED
331 && $this->modification->id == $id){
332 $somethingDeleted = true;
333 $allDeleted = true;
334 return array($this);
335 }
336 return array();
337 }
338
339 public function isSameText($other) {
340 if (is_null($other) || ! $other instanceof TextNode) {
341 return false;
342 }
343 return str_replace('\n', ' ',$this->text) === str_replace('\n', ' ',$other->text);
344 }
345
346 public static function toDiffLine(TextNode $node) {
347 return str_replace('\n', ' ',$node->text);
348 }
349 }
350
351 class WhiteSpaceNode extends TextNode {
352
353 function __construct($parent, $s, Node $like = null) {
354 parent::__construct($parent, $s);
355 if(!is_null($like) && $like instanceof TextNode) {
356 $newModification = clone $like->modification;
357 $newModification->firstOfID = false;
358 $this->modification = $newModification;
359 }
360 }
361 }
362
363 /**
364 * Represents the root of a HTML document.
365 */
366 class BodyNode extends TagNode {
367
368 function __construct() {
369 parent::__construct(null, 'body', array());
370 }
371
372 public function copyTree() {
373 $newThis = new BodyNode();
374 foreach ($this->children as &$child) {
375 $newChild = $child->copyTree();
376 $newChild->setParent($newThis);
377 $newThis->children[] = $newChild;
378 }
379 return $newThis;
380 }
381
382 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
383 $nodes = array();
384 foreach ($this->children as &$child) {
385 $childrenChildren = $child->getMinimalDeletedSet($id,
386 $allDeleted, $somethingDeleted);
387 $nodes = array_merge($nodes, $childrenChildren);
388 }
389 return $nodes;
390 }
391
392 }
393
394 /**
395 * Represents an image in HTML. Even though images do not contain any text they
396 * are independent visible objects on the page. They are logically a TextNode.
397 */
398 class ImageNode extends TextNode {
399
400 public $attributes;
401
402 function __construct(TagNode $parent, /*array*/ $attrs) {
403 if(!array_key_exists('src', $attrs)) {
404 HTMLDiffer::diffDebug( "Image without a source\n" );
405 parent::__construct($parent, '<img></img>');
406 }else{
407 parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
408 }
409 $this->attributes = $attrs;
410 }
411
412 public function isSameText($other) {
413 if (is_null($other) || ! $other instanceof ImageNode) {
414 return false;
415 }
416 return $this->text === $other->text;
417 }
418
419 }
420
421 /**
422 * No-op node
423 */
424 class DummyNode extends Node {
425
426 function __construct() {
427 // no op
428 }
429
430 }