HTML5 Balancer
[lhc/web/wiklou.git] / includes / tidy / Balancer.php
1 <?php
2 /**
3 * An implementation of the tree building portion of the HTML5 parsing
4 * spec.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Parser
23 * @since 1.27
24 * @author C. Scott Ananian, 2016
25 */
26 namespace MediaWiki\Tidy;
27
28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
30 use \ExplodeIterator;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
33 use \Sanitizer;
34
35 # A note for future librarization[1] -- this file is a good candidate
36 # for splitting into an independent library, except that it is currently
37 # highly optimized for MediaWiki use. It only implements the portions
38 # of the HTML5 tree builder used by tags supported by MediaWiki, and
39 # does not contain a true tokenizer pass, instead relying on
40 # comment stripping, attribute normalization, and escaping done by
41 # the MediaWiki Sanitizer. It also deliberately avoids building
42 # a true DOM in memory, instead serializing elements to an output string
43 # as soon as possible (usually as soon as the tag is closed) to reduce
44 # its memory footprint.
45
46 # On the other hand, I've been pretty careful to note with comments in the
47 # code the places where this implementation omits features of the spec or
48 # depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
49 # implement the missing pieces and make this a standalone PHP HTML5 parser.
50 # In order to do so, some sort of MediaWiki-specific API will need
51 # to be added to (a) allow the Balancer to bypass the tokenizer,
52 # and (b) support on-the-fly flattening instead of DOM node creation.
53
54 # [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
55
56 /**
57 * Utility constants and sets for the HTML5 tree building algorithm.
58 * Sets are associative arrays indexed first by namespace and then by
59 * lower-cased tag name.
60 *
61 * @ingroup Parser
62 * @since 1.27
63 */
64 class BalanceSets {
65 const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
66 const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
67 const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
68
69 public static $unsupportedSet = [
70 self::HTML_NAMESPACE => [
71 'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
72 'form' => true, 'frame' => true,
73 'plaintext' => true, 'isindex' => true, 'textarea' => true,
74 'xmp' => true, 'iframe' => true, 'noembed' => true,
75 'noscript' => true, 'select' => true, 'script' => true,
76 'title' => true
77 ]
78 ];
79
80 public static $emptyElementSet = [
81 self::HTML_NAMESPACE => [
82 'area' => true, 'base' => true, 'basefont' => true,
83 'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
84 'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
85 'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
86 'param' => true, 'source' => true, 'track' => true, 'wbr' => true
87 ]
88 ];
89
90 public static $headingSet = [
91 self::HTML_NAMESPACE => [
92 'h1' => true, 'h2' => true, 'h3' => true,
93 'h4' => true, 'h5' => true, 'h6' => true
94 ]
95 ];
96
97 public static $specialSet = [
98 self::HTML_NAMESPACE => [
99 'address' => true, 'applet' => true, 'area' => true,
100 'article' => true, 'aside' => true, 'base' => true,
101 'basefont' => true, 'bgsound' => true, 'blockquote' => true,
102 'body' => true, 'br' => true, 'button' => true, 'caption' => true,
103 'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
104 'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
105 'dt' => true, 'embed' => true, 'fieldset' => true,
106 'figcaption' => true, 'figure' => true, 'footer' => true,
107 'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
108 'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
109 'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
110 'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
111 'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
112 'listing' => true, 'main' => true, 'marquee' => true,
113 'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
114 'noembed' => true, 'noframes' => true, 'noscript' => true,
115 'object' => true, 'ol' => true, 'p' => true, 'param' => true,
116 'plaintext' => true, 'pre' => true, 'script' => true,
117 'section' => true, 'select' => true, 'source' => true,
118 'style' => true, 'summary' => true, 'table' => true,
119 'tbody' => true, 'td' => true, 'template' => true,
120 'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
121 'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
122 'wbr' => true, 'xmp' => true
123 ],
124 self::SVG_NAMESPACE => [
125 'foreignobject' => true, 'desc' => true, 'title' => true
126 ],
127 self::MATHML_NAMESPACE => [
128 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
129 'mtext' => true, 'annotation-xml' => true
130 ]
131 ];
132
133 public static $addressDivPSet = [
134 self::HTML_NAMESPACE => [
135 'address' => true, 'div' => true, 'p' => true
136 ]
137 ];
138
139 public static $tableSectionRowSet = [
140 self::HTML_NAMESPACE => [
141 'table' => true, 'thead' => true, 'tbody' => true,
142 'tfoot' => true, 'tr' => true
143 ]
144 ];
145
146 public static $impliedEndTagsSet = [
147 self::HTML_NAMESPACE => [
148 'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
149 'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
150 'rt' => true, 'rtc' => true
151 ]
152 ];
153
154 public static $thoroughImpliedEndTagsSet = [
155 self::HTML_NAMESPACE => [
156 'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
157 'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
158 'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
159 'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
160 'thead' => true, 'tr' => true
161 ]
162 ];
163
164 public static $tableCellSet = [
165 self::HTML_NAMESPACE => [
166 'td' => true, 'th' => true
167 ]
168 ];
169 public static $tableContextSet = [
170 self::HTML_NAMESPACE => [
171 'table' => true, 'template' => true, 'html' => true
172 ]
173 ];
174
175 public static $tableBodyContextSet = [
176 self::HTML_NAMESPACE => [
177 'tbody' => true, 'tfoot' => true, 'thead' => true,
178 'template' => true, 'html' => true
179 ]
180 ];
181
182 public static $tableRowContextSet = [
183 self::HTML_NAMESPACE => [
184 'tr' => true, 'template' => true, 'html' => true
185 ]
186 ];
187
188 # OMITTED: formAssociatedSet, since we don't allow <form>
189
190 public static $inScopeSet = [
191 self::HTML_NAMESPACE => [
192 'applet' => true, 'caption' => true, 'html' => true,
193 'marquee' => true, 'object' => true,
194 'table' => true, 'td' => true, 'template' => true,
195 'th' => true
196 ],
197 self::SVG_NAMESPACE => [
198 'foreignobject' => true, 'desc' => true, 'title' => true
199 ],
200 self::MATHML_NAMESPACE => [
201 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
202 'mtext' => true, 'annotation-xml' => true
203 ]
204 ];
205
206 private static $inListItemScopeSet = null;
207 public static function inListItemScopeSet() {
208 if ( self::$inListItemScopeSet === null ) {
209 self::$inListItemScopeSet = self::$inScopeSet;
210 self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
211 self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
212 }
213 return self::$inListItemScopeSet;
214 }
215
216 private static $inButtonScopeSet = null;
217 public static function inButtonScopeSet() {
218 if ( self::$inButtonScopeSet === null ) {
219 self::$inButtonScopeSet = self::$inScopeSet;
220 self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
221 }
222 return self::$inButtonScopeSet;
223 }
224
225 public static $inTableScopeSet = [
226 self::HTML_NAMESPACE => [
227 'html' => true, 'table' => true, 'template' => true
228 ]
229 ];
230
231 public static $mathmlTextIntegrationPointSet = [
232 self::MATHML_NAMESPACE => [
233 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
234 'mtext' => true
235 ]
236 ];
237
238 public static $htmlIntegrationPointSet = [
239 self::SVG_NAMESPACE => [
240 'foreignobject' => true,
241 'desc' => true,
242 'title' => true
243 ]
244 ];
245 }
246
247 /**
248 * A BalanceElement is a simplified version of a DOM Node. The main
249 * difference is that we only keep BalanceElements around for nodes
250 * currently on the BalanceStack of open elements. As soon as an
251 * element is closed, with some minor exceptions relating to the
252 * tree builder "adoption agency algorithm", the element and all its
253 * children are serialized to a string using the flatten() method.
254 * This keeps our memory usage low.
255 *
256 * @ingroup Parser
257 * @since 1.27
258 */
259 class BalanceElement {
260 /**
261 * The namespace of the element.
262 * @var string $namespaceURI
263 */
264 public $namespaceURI;
265 /**
266 * The lower-cased name of the element.
267 * @var string $localName
268 */
269 public $localName;
270 /**
271 * Attributes for the element, as normalized by
272 * Sanitizer::safeEncodeTagAttributes. Attributes are space-separated
273 * and attribute values are double-quoted. Elements with no
274 * attributes should have a zero-length string here.
275 * @var string $attribs
276 */
277 public $attribs;
278
279 /**
280 * Parent of this element, or the string "flat" if this element has
281 * already been flattened into its parent.
282 * @var string|null $parent
283 */
284 public $parent;
285
286 /**
287 * An array of children of this element. Typically only the last
288 * child will be an actual BalanceElement object; the rest will
289 * be strings, representing either text nodes or flattened
290 * BalanceElement objects.
291 * @var array $children
292 */
293 public $children;
294
295 /**
296 * Make a new BalanceElement corresponding to the HTML DOM Element
297 * with the given localname, namespace, and attributes.
298 *
299 * @param string $namespaceURI The namespace of the element.
300 * @param string $localName The lowercased name of the tag.
301 * @param string $attribs Attributes of the element, as normalized
302 * by Sanitizer:safeEncodeTagAttributes.
303 */
304 public function __construct( $namespaceURI, $localName, $attribs ) {
305 Assert::parameterType( 'string', $namespaceURI, '$namespaceURI' );
306 Assert::parameterType( 'string', $localName, '$localName' );
307 Assert::parameterType( 'string', $attribs, '$attribs' );
308
309 $this->localName = $localName;
310 $this->namespaceURI = $namespaceURI;
311 $this->attribs = $attribs;
312 $this->contents = '';
313 $this->parent = null;
314 $this->children = [];
315 }
316
317 /**
318 * Remove the given child from this element.
319 * @param BalanceElement $elt
320 */
321 private function removeChild( $elt ) {
322 Assert::precondition(
323 $this->parent !== 'flat', "Can't removeChild after flattening $this"
324 );
325 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
326 Assert::parameter(
327 $elt->parent === $this, 'elt', 'must have $this as a parent'
328 );
329 $idx = array_search( $elt, $this->children, true );
330 Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
331 $elt->parent = null;
332 array_splice( $this->children, $idx, 1 );
333 }
334
335 /**
336 * Find $a in the list of children and insert $b before it.
337 * @param BalanceElement $a
338 * @param BalanceElement|string $b
339 */
340 public function insertBefore( $a, $b ) {
341 Assert::precondition(
342 $this->parent !== 'flat', "Can't insertBefore after flattening."
343 );
344 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $a, '$a' );
345 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|string', $b, '$b' );
346 $idx = array_search( $a, $this->children, true );
347 Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
348 if ( is_string( $b ) ) {
349 array_splice( $this->children, $idx, 0, [ $b ] );
350 } else {
351 Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
352 if ( $b->parent !== null ) {
353 $b->parent->removeChild( $b );
354 }
355 array_splice( $this->children, $idx, 0, [ $b ] );
356 $b->parent = $this;
357 }
358 }
359
360 /**
361 * Append $elt to the end of the list of children.
362 * @param BalanceElement|string $elt
363 */
364 public function appendChild( $elt ) {
365 Assert::precondition(
366 $this->parent !== 'flat', "Can't appendChild after flattening."
367 );
368 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|string', $elt, '$elt' );
369 if ( is_string( $elt ) ) {
370 array_push( $this->children, $elt );
371 return;
372 }
373 // Remove $elt from parent, if it had one.
374 if ( $elt->parent !== null ) {
375 $elt->parent->removeChild( $elt );
376 }
377 array_push( $this->children, $elt );
378 $elt->parent = $this;
379 }
380
381 /**
382 * Transfer all of the children of $elt to $this.
383 * @param BalanceElement $elt
384 */
385 public function adoptChildren( $elt ) {
386 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
387 Assert::precondition(
388 $elt->parent !== 'flat', "Can't adoptChildren after flattening."
389 );
390 foreach ( $elt->children as $child ) {
391 if ( !is_string( $child ) ) {
392 // This is an optimization which avoids an O(n^2) set of
393 // array_splice operations.
394 $child->parent = null;
395 }
396 $this->appendChild( $child );
397 }
398 $elt->children = [];
399 }
400
401 /**
402 * Flatten this node and all of its children into a string, as specified
403 * by the HTML serialization specification, and replace this node
404 * in its parent by that string.
405 *
406 * @see __toString()
407 */
408 public function flatten() {
409 Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
410 Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
411 $idx = array_search( $this, $this->parent->children, true );
412 Assert::parameter(
413 $idx !== false, '$this', 'must be a child of its parent'
414 );
415 $flat = "{$this}";
416 $this->parent->children[$idx] = $flat;
417 $this->parent = 'flat'; # for assertion checking
418 return $flat;
419 }
420
421 /**
422 * Serialize this node and all of its children to a string, as specified
423 * by the HTML serialization specification.
424 *
425 * @return string The serialization of the BalanceElement
426 * @see https://html.spec.whatwg.org/multipage/syntax.html#serialising-html-fragments
427 */
428 public function __toString() {
429 $out = "<{$this->localName}{$this->attribs}>";
430 if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
431 // flatten children
432 foreach ( $this->children as $elt ) {
433 $out .= "{$elt}";
434 }
435 $out .= "</{$this->localName}>";
436 } else {
437 Assert::invariant(
438 count( $this->children ) === 0,
439 "Empty elements shouldn't have children."
440 );
441 }
442 return $out;
443 }
444
445 # Utility functions on BalanceElements.
446
447 /**
448 * Determine if $this represents a specific HTML tag, is a member of
449 * a tag set, or is equal to another BalanceElement.
450 *
451 * @param BalanceElement|array|string $set The target BalanceElement,
452 * set (from the BalanceSets class), or string (HTML tag name).
453 * @return bool
454 */
455 public function isA( $set ) {
456 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|array|string', $set, '$set' );
457 if ( $set instanceof BalanceElement ) {
458 return $this === $set;
459 } elseif ( is_array( $set ) ) {
460 return isset( $set[$this->namespaceURI] ) &&
461 isset( $set[$this->namespaceURI][$this->localName] );
462 } else {
463 # assume this is an HTML element name.
464 return $this->isHtml() && $this->localName === $set;
465 }
466 }
467
468 /**
469 * Determine if $this represents an element in the HTML namespace.
470 *
471 * @return bool
472 */
473 public function isHtml() {
474 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
475 }
476
477 /**
478 * Determine if $this represents a MathML text integration point,
479 * as defined in the HTML5 specification.
480 *
481 * @return bool
482 * @see https://html.spec.whatwg.org/multipage/syntax.html#mathml-text-integration-point
483 */
484 public function isMathmlTextIntegrationPoint() {
485 return $this->isA( BalanceSets::$mathmlTextIntegrationPointSet );
486 }
487
488 /**
489 * Determine if $this represents an HTML integration point,
490 * as defined in the HTML5 specification.
491 *
492 * @return bool
493 * @see https://html.spec.whatwg.org/multipage/syntax.html#html-integration-point
494 */
495 public function isHtmlIntegrationPoint() {
496 if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
497 return true;
498 }
499 if (
500 $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
501 $this->localName === 'annotation-xml' &&
502 // We rely on Sanitizer::fixTagAttributes having run on $attribs
503 // to normalize the form of the tag parameters.
504 preg_match( ':(^| )encoding="(text/html|application/xhtml+xml)":i', $this->attribs )
505 ) {
506 return true;
507 }
508 return false;
509 }
510 }
511
512 /**
513 * The "stack of open elements" as defined in the HTML5 tree builder
514 * spec. This contains methods to ensure that content (start tags, text)
515 * are inserted at the correct place in the output string, and to
516 * flatten BalanceElements are they are closed to avoid holding onto
517 * a complete DOM tree for the document in memory.
518 *
519 * The stack defines a PHP iterator to traverse it in "reverse order",
520 * that is, the most-recently-added element is visited first in a
521 * foreach loop.
522 *
523 * @ingroup Parser
524 * @since 1.27
525 * @see https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
526 */
527 class BalanceStack implements IteratorAggregate {
528 /**
529 * Backing storage for the stack.
530 * @var array $elements
531 */
532 private $elements = [];
533 /**
534 * Foster parent mode determines how nodes are inserted into the
535 * stack.
536 * @var bool $fosterParentMode
537 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
538 */
539 public $fosterParentMode = false;
540
541 /**
542 * Create a new BalanceStack with a single BalanceElement on it,
543 * representing the root &lt;html&gt; node.
544 */
545 public function __construct() {
546 # always a root <html> element on the stack
547 array_push(
548 $this->elements,
549 new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', '' )
550 );
551 }
552
553 /**
554 * Return a string representing the output of the tree builder:
555 * all the children of the root &lt;html&gt; node.
556 * @return string
557 */
558 public function getOutput() {
559 // Don't include the outer '<html>....</html>'
560 $out = '';
561 foreach ( $this->elements[0]->children as $elt ) {
562 $out .= is_string( $elt ) ? $elt : $elt->flatten();
563 }
564 return $out;
565 }
566
567 /**
568 * Insert text at the appropriate place for inserting a node.
569 * @param string $value
570 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
571 */
572 public function insertText( $value ) {
573 Assert::parameterType( 'string', $value, '$value' );
574 if (
575 $this->fosterParentMode &&
576 $this->currentNode()->isA( BalanceSets::$tableSectionRowSet )
577 ) {
578 $this->fosterParent( $value );
579 } else {
580 $this->currentNode()->appendChild( $value );
581 }
582 }
583
584 /**
585 * Insert a BalanceElement at the appropriate place, pushing it
586 * on to the open elements stack.
587 * @param string $namespaceURI The element namespace
588 * @param string $tag The tag name
589 * @param string $attribs Normalized attributes, as a string.
590 * @return BalanceElement
591 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-foreign-element
592 */
593 public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
594 return $this->insertElement(
595 new BalanceElement( $namespaceURI, $tag, $attribs )
596 );
597 }
598
599 /**
600 * Insert an HTML element at the appropriate place, pushing it on to
601 * the open elements stack.
602 * @param string $tag The tag name
603 * @param string $attribs Normalized attributes, as a string.
604 * @return BalanceElement
605 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-an-html-element
606 */
607 public function insertHTMLElement( $tag, $attribs ) {
608 return $this->insertForeignElement(
609 BalanceSets::HTML_NAMESPACE, $tag, $attribs
610 );
611 }
612
613 /**
614 * Insert an element at the appropriate place and push it on to the
615 * open elements stack.
616 * @param BalanceElement $elt
617 * @return BalanceElement
618 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
619 */
620 public function insertElement( $elt ) {
621 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
622 if (
623 $this->fosterParentMode &&
624 $this->currentNode()->isA( BalanceSets::$tableSectionRowSet )
625 ) {
626 $elt = $this->fosterParent( $elt );
627 } else {
628 $this->currentNode()->appendChild( $elt );
629 }
630 Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
631 Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
632 array_push( $this->elements, $elt );
633 return $elt;
634 }
635
636 /**
637 * Determine if the stack has $tag in scope.
638 * @param BalanceElement|array|string $tag
639 * @return bool
640 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-scope
641 */
642 public function inScope( $tag ) {
643 return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
644 }
645
646 /**
647 * Determine if the stack has $tag in button scope.
648 * @param BalanceElement|array|string $tag
649 * @return bool
650 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-button-scope
651 */
652 public function inButtonScope( $tag ) {
653 return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
654 }
655
656 /**
657 * Determine if the stack has $tag in list item scope.
658 * @param BalanceElement|array|string $tag
659 * @return bool
660 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-list-item-scope
661 */
662 public function inListItemScope( $tag ) {
663 return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
664 }
665
666 /**
667 * Determine if the stack has $tag in table scope.
668 * @param BalanceElement|array|string $tag
669 * @return bool
670 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-table-scope
671 */
672 public function inTableScope( $tag ) {
673 return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
674 }
675
676 /**
677 * Determine if the stack has $tag in a specific scope, $set.
678 * @param BalanceElement|array|string $tag
679 * @param BalanceElement|array|string $set
680 * @return bool
681 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-the-specific-scope
682 */
683 public function inSpecificScope( $tag, $set ) {
684 foreach ( $this as $elt ) {
685 if ( $elt->isA( $tag ) ) {
686 return true;
687 }
688 if ( $elt->isA( $set ) ) {
689 return false;
690 }
691 }
692 return false;
693 }
694
695 /**
696 * Generate implied end tags.
697 * @param BalanceElement|array|string|null $butnot
698 * @param bool $thorough True if we should generate end tags thoroughly.
699 * @see https://html.spec.whatwg.org/multipage/syntax.html#generate-implied-end-tags
700 */
701 public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
702 $endTagSet = $thorough ?
703 BalanceSets::$thoroughImpliedEndTagsSet :
704 BalanceSets::$impliedEndTagsSet;
705 while ( $this->length() > 0 ) {
706 if ( $butnot !== null && $this->currentNode()->isA( $butnot ) ) {
707 break;
708 }
709 if ( !$this->currentNode()->isA( $endTagSet ) ) {
710 break;
711 }
712 $this->pop();
713 }
714 }
715
716 /**
717 * Return the current node (the element in the stack with the largest
718 * index).
719 * @return BalanceElement
720 * @see https://html.spec.whatwg.org/multipage/syntax.html#current-node
721 */
722 public function currentNode() {
723 return $this->node( count( $this->elements ) - 1 );
724 }
725
726 /**
727 * Return the adjusted current node.
728 */
729 public function adjustedCurrentNode( $fragmentContext ) {
730 return ( $fragmentContext && $this->length() === 1 ) ?
731 $fragmentContext : $this->currentNode();
732 }
733
734 /**
735 * Return an iterator over this stack which visits the current node
736 * first, and the root node last.
737 * @return Iterator
738 */
739 public function getIterator() {
740 return new ReverseArrayIterator( $this->elements );
741 }
742
743 /**
744 * Return the BalanceElement at the given position $idx, where
745 * position 0 represents the root element.
746 * @param int $idx
747 * @return BalanceElement
748 */
749 public function node( $idx ) {
750 return $this->elements[ $idx ];
751 }
752
753 /**
754 * Replace the element at position $idx in the BalanceStack with $elt.
755 * @param int $idx
756 * @param BalanceElement $elt
757 */
758 public function replaceAt( $idx, $elt ) {
759 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
760 Assert::precondition(
761 $this->elements[$idx]->parent !== 'flat',
762 'Replaced element should not have already been flattened.'
763 );
764 Assert::precondition(
765 $elt->parent !== 'flat',
766 'New element should not have already been flattened.'
767 );
768 $this->elements[$idx] = $elt;
769 }
770
771 /**
772 * Return the position of the given BalanceElement, set, or
773 * HTML tag name string in the BalanceStack.
774 * @param BalanceElement|array|string $tag
775 * @return int
776 */
777 public function indexOf( $tag ) {
778 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
779 if ( $this->elements[$i]->isA( $tag ) ) {
780 return $i;
781 }
782 }
783 return -1;
784 }
785
786 /**
787 * Return the number of elements currently in the BalanceStack.
788 * @return int
789 */
790 public function length() {
791 return count( $this->elements );
792 }
793
794 /**
795 * Remove the current node from the BalanceStack, flattening it
796 * in the process.
797 */
798 public function pop() {
799 $elt = array_pop( $this->elements );
800 $elt->flatten();
801 }
802
803 /**
804 * Remove all nodes up to and including position $idx from the
805 * BalanceStack, flattening them in the process.
806 * @param int $idx
807 */
808 public function popTo( $idx ) {
809 while ( $this->length() > $idx ) {
810 $this->pop();
811 }
812 }
813
814 /**
815 * Pop elements off the stack up to and including the first
816 * element with the specified HTML tagname (or matching the given
817 * set).
818 * @param BalanceElement|array|string $tag
819 */
820 public function popTag( $tag ) {
821 while ( $this->length() > 0 ) {
822 if ( $this->currentNode()->isA( $tag ) ) {
823 $this->pop();
824 break;
825 }
826 $this->pop();
827 }
828 }
829
830 /**
831 * Pop elements off the stack *not including* the first element
832 * in the specified set.
833 * @param BalanceElement|array|string $set
834 */
835 public function clearToContext( $set ) {
836 // Note that we don't loop to 0. Never pop the <html> elt off.
837 while ( $this->length() > 1 ) {
838 if ( $this->currentNode()->isA( $set ) ) {
839 break;
840 }
841 $this->pop();
842 }
843 }
844
845 /**
846 * Remove the given $elt from the BalanceStack, optionally
847 * flattening it in the process.
848 * @param BalanceElement $elt The element to remove.
849 * @param bool $flatten Whether to flatten the removed element.
850 */
851 public function removeElement( $elt, $flatten = true ) {
852 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
853 Assert::parameter(
854 $elt->parent !== 'flat',
855 '$elt',
856 '$elt should not already have been flattened.'
857 );
858 Assert::parameter(
859 $elt->parent->parent !== 'flat',
860 '$elt',
861 'The parent of $elt should not already have been flattened.'
862 );
863 $idx = array_search( $elt, $this->elements, true );
864 Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
865 array_splice( $this->elements, $idx, 1 );
866 if ( $flatten ) {
867 // serialize $elt into its parent
868 // otherwise, it will eventually serialize when the parent
869 // is serialized, we just hold onto the memory for its
870 // tree of objects a little longer.
871 $elt->flatten();
872 }
873 Assert::postcondition(
874 array_search( $elt, $this->elements, true ) === false,
875 '$elt should no longer be in open elements stack'
876 );
877 }
878
879 /**
880 * Find $a in the BalanceStack and insert $b after it.
881 * @param BalanceElement $a
882 * @param BalanceElement $b
883 */
884 public function insertAfter( $a, $b ) {
885 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $a, '$a' );
886 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $b, '$b' );
887 $idx = $this->indexOf( $a );
888 Assert::parameter( $idx !== false, '$a', 'must be in stack' );
889 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
890 }
891
892 # Fostering and adoption.
893
894 /**
895 * Foster parent the given $elt in the stack of open elements.
896 * @param BalanceElement|string $elt
897 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
898 */
899 private function fosterParent( $elt ) {
900 Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|string', $elt, '$elt' );
901 $lastTable = $this->indexOf( 'table' );
902 $lastTemplate = $this->indexOf( 'template' );
903 $parent = null;
904 $before = null;
905
906 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
907 $parent = $this->elements[$lastTemplate];
908 } elseif ( $lastTable >= 0 ) {
909 $parent = $this->elements[$lastTable]->parent;
910 # Assume all tables have parents, since we're not running scripts!
911 Assert::invariant(
912 $parent !== null, "All tables should have parents"
913 );
914 $before = $this->elements[$lastTable];
915 } else {
916 $parent = $this->elements[0]; // the `html` element.
917 }
918 if ( $before ) {
919 $parent->insertBefore( $before, $elt );
920 } else {
921 $parent->appendChild( $elt );
922 }
923 return $elt;
924 }
925
926 /**
927 * Run the "adoption agency algoritm" (AAA) for the given subject
928 * tag name.
929 * @param string $tag The subject tag name.
930 * @param BalanceActiveFormattingElements $afe The current
931 * active formatting elements list.
932 * @return true if the adoption agency algorithm "did something", false
933 * if more processing is required by the caller.
934 * @see https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
935 */
936 public function adoptionAgency( $tag, $afe ) {
937 // If the current node is an HTML element whose tag name is subject,
938 // and the current node is not in the list of active formatting
939 // elements, then pop the current node off the stack of open
940 // elements and abort these steps.
941 if (
942 $this->currentNode()->isA( $tag ) &&
943 $afe->indexOf( $this->currentNode() ) < 0
944 ) {
945 $this->pop();
946 return true; // no more handling required
947 }
948
949 // Let outer loop counter be zero.
950 $outer = 0;
951
952 // Outer loop: If outer loop counter is greater than or
953 // equal to eight, then abort these steps.
954 while ( $outer < 8 ) {
955 // Increment outer loop counter by one.
956 $outer++;
957
958 // Let the formatting element be the last element in the list
959 // of active formatting elements that: is between the end of
960 // the list and the last scope marker in the list, if any, or
961 // the start of the list otherwise, and has the same tag name
962 // as the token.
963 $fmtelt = $afe->findElementByTag( $tag );
964
965 // If there is no such node, then abort these steps and instead
966 // act as described in the "any other end tag" entry below.
967 if ( !$fmtelt ) {
968 return false; // false means handle by the default case
969 }
970
971 // Otherwise, if there is such a node, but that node is not in
972 // the stack of open elements, then this is a parse error;
973 // remove the element from the list, and abort these steps.
974 $index = $this->indexOf( $fmtelt );
975 if ( $index < 0 ) {
976 $afe->remove( $fmtelt );
977 return true; // true means no more handling required
978 }
979
980 // Otherwise, if there is such a node, and that node is also in
981 // the stack of open elements, but the element is not in scope,
982 // then this is a parse error; ignore the token, and abort
983 // these steps.
984 if ( !$this->inScope( $fmtelt ) ) {
985 return true;
986 }
987
988 // Let the furthest block be the topmost node in the stack of
989 // open elements that is lower in the stack than the formatting
990 // element, and is an element in the special category. There
991 // might not be one.
992 $furthestblock = null;
993 $furthestblockindex = -1;
994 $stacklen = $this->length();
995 for ( $i = $index+1; $i < $stacklen; $i++ ) {
996 if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
997 $furthestblock = $this->node( $i );
998 $furthestblockindex = $i;
999 break;
1000 }
1001 }
1002
1003 // If there is no furthest block, then the UA must skip the
1004 // subsequent steps and instead just pop all the nodes from the
1005 // bottom of the stack of open elements, from the current node
1006 // up to and including the formatting element, and remove the
1007 // formatting element from the list of active formatting
1008 // elements.
1009 if ( !$furthestblock ) {
1010 $this->popTag( $fmtelt );
1011 $afe->remove( $fmtelt );
1012 return true;
1013 } else {
1014 // Let the common ancestor be the element immediately above
1015 // the formatting element in the stack of open elements.
1016 $ancestor = $this->node( $index-1 );
1017
1018 // Let a bookmark note the position of the formatting
1019 // element in the list of active formatting elements
1020 // relative to the elements on either side of it in the
1021 // list.
1022 $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', '' );
1023 $afe->insertAfter( $fmtelt, $BOOKMARK );
1024
1025 // Let node and last node be the furthest block.
1026 $node = $furthestblock;
1027 $lastnode = $furthestblock;
1028 $nodeindex = $furthestblockindex;
1029 $nodeafeindex = -1;
1030
1031 // Let inner loop counter be zero.
1032 $inner = 0;
1033
1034 while ( true ) {
1035
1036 // Increment inner loop counter by one.
1037 $inner++;
1038
1039 // Let node be the element immediately above node in
1040 // the stack of open elements, or if node is no longer
1041 // in the stack of open elements (e.g. because it got
1042 // removed by this algorithm), the element that was
1043 // immediately above node in the stack of open elements
1044 // before node was removed.
1045 $node = $this->node( --$nodeindex );
1046
1047 // If node is the formatting element, then go
1048 // to the next step in the overall algorithm.
1049 if ( $node === $fmtelt ) break;
1050
1051 // If the inner loop counter is greater than three and node
1052 // is in the list of active formatting elements, then remove
1053 // node from the list of active formatting elements.
1054 $nodeafeindex = $afe->indexOf( $node );
1055 if ( $inner > 3 && $nodeafeindex !== -1 ) {
1056 $afe->remove( $node );
1057 $nodeafeindex = -1;
1058 }
1059
1060 // If node is not in the list of active formatting
1061 // elements, then remove node from the stack of open
1062 // elements and then go back to the step labeled inner
1063 // loop.
1064 if ( $nodeafeindex === -1 ) {
1065 // Don't flatten here, since we're about to relocate
1066 // parts of this $node.
1067 $this->removeElement( $node, false );
1068 continue;
1069 }
1070
1071 // Create an element for the token for which the
1072 // element node was created with common ancestor as
1073 // the intended parent, replace the entry for node
1074 // in the list of active formatting elements with an
1075 // entry for the new element, replace the entry for
1076 // node in the stack of open elements with an entry for
1077 // the new element, and let node be the new element.
1078 $newelt = $afe->cloneAt( $nodeafeindex ); // XXX
1079 $afe->replace( $node, $newelt );
1080 $this->replaceAt( $nodeindex, $newelt );
1081 $node = $newelt;
1082
1083 // If last node is the furthest block, then move the
1084 // aforementioned bookmark to be immediately after the
1085 // new node in the list of active formatting elements.
1086 if ( $lastnode === $furthestblock ) {
1087 $afe->remove( $BOOKMARK );
1088 $afe->insertAfter( $newelt, $BOOKMARK );
1089 }
1090
1091 // Insert last node into node, first removing it from
1092 // its previous parent node if any.
1093 $node->appendChild( $lastnode );
1094
1095 // Let last node be node.
1096 $lastnode = $node;
1097 }
1098
1099 // If the common ancestor node is a table, tbody, tfoot,
1100 // thead, or tr element, then, foster parent whatever last
1101 // node ended up being in the previous step, first removing
1102 // it from its previous parent node if any.
1103 if (
1104 $this->fosterParentMode &&
1105 $ancestor->isA( BalanceSets::$tableSectionRowSet )
1106 ) {
1107 $this->fosterParent( $lastnode );
1108 } else {
1109 // Otherwise, append whatever last node ended up being in
1110 // the previous step to the common ancestor node, first
1111 // removing it from its previous parent node if any.
1112 $ancestor->appendChild( $lastnode );
1113 }
1114
1115 // Create an element for the token for which the
1116 // formatting element was created, with furthest block
1117 // as the intended parent.
1118 $newelt2 = $afe->cloneAt( $afe->indexOf( $fmtelt ) );
1119
1120 // Take all of the child nodes of the furthest block and
1121 // append them to the element created in the last step.
1122 $newelt2->adoptChildren( $furthestblock );
1123
1124 // Append that new element to the furthest block.
1125 $furthestblock->appendChild( $newelt2 );
1126
1127 // Remove the formatting element from the list of active
1128 // formatting elements, and insert the new element into the
1129 // list of active formatting elements at the position of
1130 // the aforementioned bookmark.
1131 $afe->remove( $fmtelt );
1132 $afe->replace( $BOOKMARK, $newelt2 );
1133
1134 // Remove the formatting element from the stack of open
1135 // elements, and insert the new element into the stack of
1136 // open elements immediately below the position of the
1137 // furthest block in that stack.
1138 $this->removeElement( $fmtelt );
1139 $this->insertAfter( $furthestblock, $newelt2 );
1140 }
1141 }
1142
1143 return true;
1144 }
1145
1146 /**
1147 * Return the contents of the open elements stack as a string for
1148 * debugging.
1149 * @return string
1150 */
1151 public function __toString() {
1152 $r = [];
1153 foreach ( $this->elements as $elt ) {
1154 array_push( $r, $elt->localName );
1155 }
1156 return implode( $r, ' ' );
1157 }
1158 }
1159
1160 /**
1161 * The list of active formatting elements, which is used to handle
1162 * mis-nested formatting element tags in the HTML5 tree builder
1163 * specification.
1164 *
1165 * @ingroup Parser
1166 * @since 1.27
1167 * @see https://html.spec.whatwg.org/multipage/syntax.html#list-of-active-formatting-elements
1168 */
1169 class BalanceActiveFormattingElements {
1170 private $elemList = [];
1171 private $attribList = [];
1172 private static $MARKER = '|';
1173
1174 public function insertMarker() {
1175 $this->elemList[] = self::$MARKER;
1176 $this->attribList[] = self::$MARKER;
1177 }
1178
1179 public function push( $elt, $attribs ) {
1180 // "Noah's Ark clause" -- if there are already three copies of
1181 // this element before we encounter a marker, then drop the last
1182 // one.
1183 $count = 0;
1184 for ( $i = count( $this->elemList ) - 1; $i >= 0; $i-- ) {
1185 if ( $this->elemList[$i] === self::$MARKER ) {
1186 break;
1187 }
1188 // Note that we rely on Sanitizer::fixTagAttributes having run
1189 // previously with the $sorted option true. The attributes are
1190 // thus canonicalized, which allows us to compare $attribs with
1191 // a simple string compare.
1192 if (
1193 $this->elemList[$i]->localName === $elt->localName &&
1194 $this->attribList[$i] === $elt->attribs
1195 ) {
1196 $count++;
1197 if ( $count === 3 ) {
1198 array_splice( $this->elemList, $i, 1 );
1199 array_splice( $this->attribList, $i, 1 );
1200 break;
1201 }
1202 }
1203 }
1204 // Now push the new element onto the list.
1205 $this->elemList[] = $elt;
1206 // Spec says we have to clone the attribs, in case the element's
1207 // attributes are later modified.
1208 $this->attribList[] = $elt->attribs;
1209 }
1210
1211 public function clearToMarker() {
1212 # This is deliberately >0 not >=0, since it doesn't matter if element
1213 # 0 is the marker, we clear the whole list in that case regardless.
1214 for ( $i = count( $this->elemList ) - 1; $i > 0; $i-- ) {
1215 if ( $this->elemList[$i] === self::$MARKER ) {
1216 break;
1217 }
1218 }
1219 array_splice( $this->elemList, $i );
1220 array_splice( $this->attribList, $i );
1221 }
1222
1223 /**
1224 * Find and return the last element with the specified tag between the
1225 * end of the list and the last marker on the list.
1226 * Used when parsing &lt;a&gt; "in body mode".
1227 */
1228 public function findElementByTag( $tag ) {
1229 for ( $i = count( $this->elemList ) - 1; $i >= 0; $i-- ) {
1230 $elt = $this->elemList[$i];
1231 if ( $elt === self::$MARKER ) {
1232 break;
1233 }
1234 if ( $elt->localName === $tag ) {
1235 return $elt;
1236 }
1237 }
1238 return null;
1239 }
1240
1241 public function indexOf( $elt ) {
1242 for ( $i = count( $this->elemList ) - 1; $i >= 0; $i-- ) {
1243 if ( $this->elemList[$i] === $elt ) {
1244 return $i;
1245 }
1246 }
1247 return -1;
1248 }
1249
1250 /**
1251 * Find the element $elt in the list and remove it.
1252 * Used when parsing &lt;a&gt; in body mode.
1253 */
1254 public function remove( $elt ) {
1255 $idx = $this->indexOf( $elt );
1256 Assert::parameter( $idx >= 0, '$elt', 'should be present in afe list' );
1257 array_splice( $this->elemList, $idx, 1 );
1258 array_splice( $this->attribList, $idx, 1 );
1259 }
1260
1261 /**
1262 * Find element $a in the list and replace it with element $b,
1263 * optionally replacing the stored attributes as well with $attribs.
1264 */
1265 public function replace( $a, $b, $attribs=null ) {
1266 $idx = $this->indexOf( $a );
1267 if ( $idx >= 0 ) {
1268 $this->elemList[ $idx ] = $b;
1269 if ( $attribs !== null ) {
1270 $this->attribList[ $idx ] = $attribs;
1271 }
1272 }
1273 }
1274
1275 /**
1276 * Find $a in the list and insert $b after it.
1277 * This is only used for insert a bookmark object, so the
1278 * $this->attribList contents don't really matter.
1279 */
1280 public function insertAfter( $a, $b ) {
1281 $idx = $this->indexOf( $a );
1282 if ( $idx >= 0 ) {
1283 array_splice( $this->elemList, $idx, 0, [ $b ] );
1284 array_splice( $this->attribList, $idx, 0, [ '' ] );
1285 }
1286 }
1287
1288 /**
1289 * Make a copy of element $idx on the list of active formatting
1290 * elements, using its original attributes not current attributes.
1291 * (In full HTML spec, current attributes could have been modified
1292 * by a script.)
1293 */
1294 public function cloneAt( $idx ) {
1295 $node = $this->elemList[$idx];
1296 $attribs = $this->attribList[$idx];
1297 return new BalanceElement(
1298 $node->namespaceURI, $node->localName, $attribs
1299 );
1300 }
1301
1302 // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1303 /**
1304 * Reconstruct the active formatting elements.
1305 * @param BalanceStack $stack The open elements stack
1306 * @see https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements
1307 */
1308 // @codingStandardsIgnoreEnd
1309 public function reconstruct( $stack ) {
1310 if ( empty( $this->elemList ) ) {
1311 return;
1312 }
1313 $len = count( $this->elemList );
1314 $entry = $this->elemList[$len - 1];
1315 // If the last is a marker, do nothing.
1316 if ( $entry === self::$MARKER ) {
1317 return;
1318 }
1319 // Or if it is an open element, do nothing.
1320 if ( $stack->indexOf( $entry ) >= 0 ) {
1321 return;
1322 }
1323
1324 // Loop backward through the list until we find a marker or an
1325 // open element, and then move forward one from there.
1326 for ( $i = $len - 2; $i >= 0; $i-- ) {
1327 $entry = $this->elemList[$i];
1328 if ( $entry === self::$MARKER ) {
1329 break;
1330 }
1331 if ( $stack->indexOf( $entry ) >= 0 ) {
1332 break;
1333 }
1334 }
1335
1336 // Now loop forward, starting from the element after the current
1337 // one, recreating formatting elements and pushing them back onto
1338 // the list of open elements
1339 for ( $i++; $i < $len; $i++ ) {
1340 $this->elemList[$i] = $stack->insertHTMLElement(
1341 $this->elemList[$i]->localName,
1342 $this->attribList[$i]
1343 );
1344 }
1345 }
1346 }
1347
1348 /**
1349 * An implementation of the tree building portion of the HTML5 parsing
1350 * spec.
1351 *
1352 * This is used to balance and tidy output so that the result can
1353 * always be cleanly serialized/deserialized by an HTML5 parser. It
1354 * does *not* guarantee "conforming" output -- the HTML5 spec contains
1355 * a number of constraints which are not enforced by the HTML5 parsing
1356 * process. But the result will be free of gross errors: misnested or
1357 * unclosed tags, for example, and will be unchanged by spec-complient
1358 * parsing followed by serialization.
1359 *
1360 * The tree building stage is structured as a state machine.
1361 * When comparing the implementation to
1362 * https://www.w3.org/TR/html5/syntax.html#tree-construction
1363 * note that each state is implemented as a function with a
1364 * name ending in `Mode` (because the HTML spec refers to them
1365 * as insertion modes). The current insertion mode is held by
1366 * the $parseMode property.
1367 *
1368 * The following simplifications have been made:
1369 * - We handle body content only (ie, we start `in body`.)
1370 * - The document is never in "quirks mode".
1371 * - All occurrences of < and > have been entity escaped, so we
1372 * can parse tags by simply splitting on those two characters.
1373 * Similarly, all attributes have been "cleaned" and are double-quoted
1374 * and escaped.
1375 * - All comments and null characters are assumed to have been removed.
1376 * - We don't alter linefeeds after <pre>/<listing>.
1377 * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
1378 * <form>, <frame>, <plaintext>, <isindex>, <textarea>, <xmp>, <iframe>,
1379 * <noembed>, <noscript>, <select>, <script>, <title>. As a result,
1380 * further simplifications can be made:
1381 * - `frameset-ok` is not tracked.
1382 * - `form element pointer` is not tracked.
1383 * - `head element pointer` is not tracked (but presumed non-null)
1384 * - Tokenizer has only a single mode.
1385 *
1386 * We generally mark places where we omit cases from the spec due to
1387 * disallowed elements with a comment: `# OMITTED: <element-name>`.
1388 *
1389 * The HTML spec keeps a flag during the parsing process to track
1390 * whether or not a "parse error" has been encountered. We don't
1391 * bother to track that flag, we just implement the error-handling
1392 * process as specified.
1393 *
1394 * @ingroup Parser
1395 * @since 1.27
1396 * @see https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
1397 */
1398 class Balancer {
1399 private $parseMode;
1400 private $bitsIterator;
1401 private $allowedHtmlElements;
1402 private $afe;
1403 private $stack;
1404 private $strict;
1405
1406 private $textIntegrationMode = false;
1407 private $pendingTableText;
1408 private $originalInsertionMode;
1409 private $fragmentContext;
1410
1411 /**
1412 * Create a new Balancer.
1413 * @param array $config Balancer configuration. Includes:
1414 * 'strict' : boolean, defaults to false.
1415 * When true, enforces syntactic constraints on input:
1416 * all non-tag '<' must be escaped, all attributes must be
1417 * separated by a single space and double-quoted. This is
1418 * consistent with the output of the Sanitizer.
1419 * 'allowedHtmlElements' : array, defaults to null.
1420 * When present, the keys of this associative array give
1421 * the acceptable HTML tag names. When not present, no
1422 * tag sanitization is done.
1423 */
1424 public function __construct( array $config ) {
1425 $config = $config + [
1426 'strict' => false,
1427 'allowedHtmlElements' => null,
1428 ];
1429 $this->allowedHtmlElements = $config['allowedHtmlElements'];
1430 $this->strict = $config['strict'];
1431 if ( $this->allowedHtmlElements !== null ) {
1432 # Sanity check!
1433 $bad = array_uintersect_assoc(
1434 $this->allowedHtmlElements,
1435 BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
1436 function( $a, $b ) {
1437 // Ignore the values (just intersect the keys) by saying
1438 // all values are equal to each other.
1439 return 0;
1440 }
1441 );
1442 if ( count( $bad ) > 0 ) {
1443 $badstr = implode( array_keys( $bad ), ',' );
1444 throw new ParameterAssertionException(
1445 '$config',
1446 'Balance attempted with sanitization including ' .
1447 "unsupported elements: {$badstr}"
1448 );
1449 }
1450 }
1451 }
1452
1453 /**
1454 * Return a balanced HTML string for the HTML fragment given by $text,
1455 * subject to the caveats listed in the class description. The result
1456 * will typically be idempotent -- that is, rebalancing the output
1457 * would result in no change.
1458 *
1459 * @param string $text The markup to be balanced
1460 * @param callable $processingCallback Callback to do any variable or
1461 * parameter replacements in HTML attributes values
1462 * @param array|bool $processingArgs Arguments for the processing callback
1463 * @return string The balanced markup
1464 */
1465 public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1466 $this->parseMode = 'inBodyMode';
1467 $this->bitsIterator = new ExplodeIterator( '<', $text );
1468 $this->afe = new BalanceActiveFormattingElements();
1469 $this->stack = new BalanceStack();
1470 $this->processingCallback = $processingCallback;
1471 $this->processingArgs = $processingArgs;
1472
1473 # The stack is constructed with an <html> element already on it.
1474 # Set this up as a fragment parsed with <body> as the context.
1475 $this->fragmentContext =
1476 new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', '' );
1477 $this->resetInsertionMode();
1478
1479 // First element is text not tag
1480 $x = $this->bitsIterator->current();
1481 $this->bitsIterator->next();
1482 $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1483 // Now process each tag.
1484 while ( $this->bitsIterator->valid() ) {
1485 $this->advance();
1486 }
1487 $this->insertToken( 'eof', null );
1488 $result = $this->stack->getOutput();
1489 // Free memory before returning.
1490 $this->bitsIterator = null;
1491 $this->afe = null;
1492 $this->stack = null;
1493 $this->fragmentContext = null;
1494 return $result;
1495 }
1496
1497 /**
1498 * Pass a token to the tree builder. The $token will be one of the
1499 * strings "tag", "endtag", or "text".
1500 */
1501 private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
1502 // validate tags against $unsupportedSet
1503 if ( $token === 'tag' || $token === 'endtag' ) {
1504 if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
1505 # As described in "simplifications" above, these tags are
1506 # not supported in the balancer.
1507 Assert::invariant(
1508 !$this->strict,
1509 "Unsupported $token <$value> found."
1510 );
1511 return false;
1512 }
1513 } elseif ( $token === 'text' && $value === '' ) {
1514 # Don't actually inject the empty string as a text token.
1515 return true;
1516 }
1517 // Some hoops we have to jump through
1518 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1519
1520 $isForeign = true;
1521 if (
1522 $this->stack->length() === 0 ||
1523 $adjusted->isHtml() ||
1524 $token === 'eof'
1525 ) {
1526 $isForeign = false;
1527 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1528 if ( $token === 'text' ) {
1529 $isForeign = false;
1530 } elseif (
1531 $token === 'tag' &&
1532 $value !== 'mglyph' && $value !== 'malignmark'
1533 ) {
1534 $isForeign = false;
1535 }
1536 } elseif (
1537 $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
1538 $adjusted->localName === 'annotation-xml' &&
1539 $token === 'tag' && $value === 'svg'
1540 ) {
1541 $isForeign = false;
1542 } elseif (
1543 $adjusted->isHtmlIntegrationPoint() &&
1544 ( $token === 'tag' || $token === 'text' )
1545 ) {
1546 $isForeign = false;
1547 }
1548 if ( $isForeign ) {
1549 return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
1550 } else {
1551 $func = $this->parseMode;
1552 return $this->$func( $token, $value, $attribs, $selfclose );
1553 }
1554 }
1555
1556 private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
1557 if ( $token === 'text' ) {
1558 $this->stack->insertText( $value );
1559 return true;
1560 } elseif ( $token === 'tag' ) {
1561 switch ( $value ) {
1562 case 'font':
1563 // We rely on Sanitizer::fixTagAttributes having run on $attribs
1564 // to normalize the form of the tag parameters.
1565 if ( !preg_match( '/(^| )(color|face|size)="/i', $attribs ) ) {
1566 break;
1567 }
1568 /* otherwise, fall through */
1569 case 'b':
1570 case 'big':
1571 case 'blockquote':
1572 case 'body':
1573 case 'br':
1574 case 'center':
1575 case 'code':
1576 case 'dd':
1577 case 'div':
1578 case 'dl':
1579 case 'dt':
1580 case 'em':
1581 case 'embed':
1582 case 'h1':
1583 case 'h2':
1584 case 'h3':
1585 case 'h4':
1586 case 'h5':
1587 case 'h6':
1588 case 'head':
1589 case 'hr':
1590 case 'i':
1591 case 'img':
1592 case 'li':
1593 case 'listing':
1594 case 'menu':
1595 case 'meta':
1596 case 'nobr':
1597 case 'ol':
1598 case 'p':
1599 case 'pre':
1600 case 'ruby':
1601 case 's':
1602 case 'small':
1603 case 'span':
1604 case 'strong':
1605 case 'strike':
1606 case 'sub':
1607 case 'sup':
1608 case 'table':
1609 case 'tt':
1610 case 'u':
1611 case 'ul':
1612 case 'var':
1613 if ( $this->fragmentContext ) {
1614 break;
1615 }
1616 while ( true ) {
1617 $this->stack->pop();
1618 $node = $this->stack->currentNode();
1619 if (
1620 $node->isMathmlTextIntegrationPoint() ||
1621 $node->isHtmlIntegrationPoint() ||
1622 $node->isHtml()
1623 ) {
1624 break;
1625 }
1626 }
1627 return $this->insertToken( $token, $value, $attribs, $selfclose );
1628 }
1629 // "Any other start tag"
1630 $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
1631 $this->fragmentContext : $this->stack->currentNode();
1632 $this->stack->insertForeignElement(
1633 $adjusted->namespaceURI, $value, $attribs
1634 );
1635 if ( $selfclose ) {
1636 $this->stack->pop();
1637 }
1638 return true;
1639 } elseif ( $token === 'endtag' ) {
1640 $first = true;
1641 foreach ( $this->stack as $i => $node ) {
1642 if ( $node->isHtml() && !$first ) {
1643 // process the end tag as HTML
1644 $func = $this->parseMode;
1645 return $this->$func( $token, $value, $attribs, $selfclose );
1646 } elseif ( $i === 0 ) {
1647 return true;
1648 } elseif ( $node->localName === $value ) {
1649 $this->stack->popTag( $node );
1650 return true;
1651 }
1652 $first = false;
1653 }
1654 }
1655 }
1656
1657 /**
1658 * Grab the next "token" from $bitsIterator. This is either a open/close
1659 * tag or text, depending on whether the Sanitizer approves.
1660 */
1661 private function advance() {
1662 $x = $this->bitsIterator->current();
1663 $this->bitsIterator->next();
1664 $regs = [];
1665 # $slash: Does the current element start with a '/'?
1666 # $t: Current element name
1667 # $attribs: String between element name and >
1668 # $brace: Ending '>' or '/>'
1669 # $rest: Everything until the next element from the $bitsIterator
1670 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
1671 list( /* $qbar */, $slash, $t, $attribs, $brace, $rest ) = $regs;
1672 $t = strtolower( $t );
1673 if ( $this->strict ) {
1674 /* Verify that attributes are all properly double-quoted */
1675 Assert::invariant(
1676 preg_match(
1677 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribs
1678 ),
1679 "Bad attribute string found"
1680 );
1681 }
1682 } else {
1683 Assert::invariant(
1684 !$this->strict, "< found which does not start a valid tag"
1685 );
1686 $slash = $t = $attribs = $brace = $rest = null;
1687 }
1688 $goodtag = $t;
1689 $sanitize = $this->allowedHtmlElements !== null;
1690 if ( $sanitize ) {
1691 $goodtag = $t && isset( $this->allowedHtmlElements[$t] );
1692 }
1693 if ( $goodtag ) {
1694 if ( is_callable( $this->processingCallback ) ) {
1695 call_user_func_array( $this->processingCallback, [ &$attribs, $this->processingArgs ] );
1696 }
1697 if ( $sanitize ) {
1698 $goodtag = Sanitizer::validateTag( $attribs, $t );
1699 }
1700 }
1701 if ( $goodtag ) {
1702 if ( $sanitize ) {
1703 $newattribs = Sanitizer::fixTagAttributes( $attribs, $t, true );
1704 } else {
1705 $decoded = Sanitizer::decodeTagAttributes( $attribs );
1706 ksort( $decoded );
1707 $newattribs = Sanitizer::safeEncodeTagAttributes( $decoded );
1708 }
1709 $goodtag = $this->insertToken(
1710 $slash ? 'endtag' : 'tag', $t, $newattribs, $brace === '/>'
1711 );
1712 }
1713 if ( $goodtag ) {
1714 $rest = str_replace( '>', '&gt;', $rest );
1715 $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
1716 } else {
1717 # bad tag; serialize entire thing as text.
1718 $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
1719 }
1720 }
1721
1722 private function switchMode( $mode ) {
1723 Assert::parameter(
1724 substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
1725 );
1726 $oldMode = $this->parseMode;
1727 $this->parseMode = $mode;
1728 return $oldMode;
1729 }
1730
1731 private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
1732 $this->switchMode( $mode );
1733 return $this->insertToken( $token, $value, $attribs, $selfclose );
1734 }
1735
1736 private function resetInsertionMode() {
1737 $last = false;
1738 foreach ( $this->stack as $i => $node ) {
1739 if ( $i === 0 ) {
1740 $last = true;
1741 if ( $this->fragmentContext ) {
1742 $node = $this->fragmentContext;
1743 }
1744 }
1745 if ( $node->isHtml() ) {
1746 switch ( $node->localName ) {
1747 # OMITTED: <select>
1748 /*
1749 case 'select':
1750 $stacklen = $this->stack->length();
1751 for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
1752 $ancestor = $this->stack->node( $stacklen-$j-1 );
1753 if ( $ancestor->isA( 'template' ) ) {
1754 break;
1755 }
1756 if ( $ancestor->isA( 'table' ) ) {
1757 $this->switchMode( 'inSelectInTableMode' );
1758 return;
1759 }
1760 }
1761 $this->switchMode( 'inSelectMode' );
1762 return;
1763 */
1764 case 'tr':
1765 $this->switchMode( 'inRowMode' );
1766 return;
1767 case 'tbody':
1768 case 'tfoot':
1769 case 'thead':
1770 $this->switchMode( 'inTableBodyMode' );
1771 return;
1772 case 'caption':
1773 $this->switchMode( 'inCaptionMode' );
1774 return;
1775 case 'colgroup':
1776 $this->switchMode( 'inColumnGroupMode' );
1777 return;
1778 case 'table':
1779 $this->switchMode( 'inTableMode' );
1780 return;
1781 case 'template':
1782 $this->switchMode(
1783 array_slice( $this->templateInsertionModes, -1 )[0]
1784 );
1785 return;
1786 case 'body':
1787 $this->switchMode( 'inBodyMode' );
1788 return;
1789 # OMITTED: <frameset>
1790 # OMITTED: <html>
1791 # OMITTED: <head>
1792 default:
1793 if ( !$last ) {
1794 # OMITTED: <head>
1795 if ( $node->isA( BalanceSets::$tableCellSet ) ) {
1796 $this->switchMode( 'inCellMode' );
1797 return;
1798 }
1799 }
1800 }
1801 }
1802 if ( $last ) {
1803 $this->switchMode( 'inBodyMode' );
1804 return;
1805 }
1806 }
1807 }
1808
1809 private function stopParsing() {
1810 # Most of the spec methods are inapplicable, other than step 2:
1811 # "pop all the nodes off the stack of open elements".
1812 # We're going to keep the top-most <html> element on the stack, though.
1813 while ( $this->stack->length() > 1 ) {
1814 $this->stack->pop();
1815 }
1816 }
1817
1818 private function parseRawText( $value, $attribs = null ) {
1819 $this->stack->insertHTMLElement( $value, $attribs );
1820 // XXX switch tokenizer to rawtext state?
1821 $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
1822 return true;
1823 }
1824
1825 private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
1826 if ( $token === 'text' ) {
1827 $this->stack->insertText( $value );
1828 return true;
1829 } elseif ( $token === 'eof' ) {
1830 $this->stack->pop();
1831 return $this->switchModeAndReprocess(
1832 $this->originalInsertionMode, $token, $value, $attribs, $selfclose
1833 );
1834 } elseif ( $token === 'endtag' ) {
1835 $this->stack->pop();
1836 $this->switchMode( $this->originalInsertionMode );
1837 return true;
1838 }
1839 return true;
1840 }
1841
1842 private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
1843 if ( $token === 'text' ) {
1844 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
1845 $this->stack->insertText( $matches[0] );
1846 $value = substr( $value, strlen( $matches[0] ) );
1847 }
1848 if ( strlen( $value ) === 0 ) {
1849 return true; // All text handled.
1850 }
1851 // Fall through to handle non-whitespace below.
1852 } elseif ( $token === 'tag' ) {
1853 switch ( $value ) {
1854 # OMITTED: <html>
1855 case 'base':
1856 case 'basefont':
1857 case 'bgsound':
1858 case 'link':
1859 $this->stack->insertHTMLElement( $value, $attribs );
1860 $this->stack->pop();
1861 return true;
1862 # OMITTED: <title>
1863 # OMITTED: <noscript>
1864 case 'noframes':
1865 case 'style':
1866 return $this->parseRawText( $value, $attribs );
1867 # OMITTED: <script>
1868 case 'template':
1869 $this->stack->insertHTMLElement( $value, $attribs );
1870 $this->afe->insertMarker();
1871 # OMITTED: frameset_ok
1872 $this->switchMode( 'inTemplateMode' );
1873 $this->templateInsertionModes[] = $this->parseMode;
1874 return true;
1875 # OMITTED: <head>
1876 }
1877 } elseif ( $token === 'endtag' ) {
1878 switch ( $value ) {
1879 # OMITTED: <head>
1880 # OMITTED: <body>
1881 # OMITTED: <html>
1882 case 'br':
1883 break; // handle at the bottom of the function
1884 case 'template':
1885 if ( $this->stack->indexOf( $value ) < 0 ) {
1886 return true; // Ignore the token.
1887 }
1888 $this->stack->generateImpliedEndTags( null, true /* thorough */ );
1889 $this->stack->popTag( $value );
1890 $this->afe->clearToMarker();
1891 array_pop( $this->templateInsertionModes );
1892 $this->resetInsertionMode();
1893 return true;
1894 default:
1895 // ignore any other end tag
1896 return true;
1897 }
1898 }
1899
1900 // If not handled above
1901 $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
1902 // Then redo this one
1903 return $this->insertToken( $token, $value, $attribs, $selfclose );
1904 }
1905
1906 private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
1907 if ( $token === 'text' ) {
1908 $this->afe->reconstruct( $this->stack );
1909 $this->stack->insertText( $value );
1910 return true;
1911 } elseif ( $token === 'eof' ) {
1912 if ( !empty( $this->templateInsertionModes ) ) {
1913 return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
1914 }
1915 $this->stopParsing();
1916 return true;
1917 } elseif ( $token === 'tag' ) {
1918 switch ( $value ) {
1919 # OMITTED: <html>
1920 case 'base':
1921 case 'basefont':
1922 case 'bgsound':
1923 case 'link':
1924 case 'meta':
1925 case 'noframes':
1926 # OMITTED: <script>
1927 case 'style':
1928 case 'template':
1929 # OMITTED: <title>
1930 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
1931 # OMITTED: <body>
1932 # OMITTED: <frameset>
1933
1934 case 'address':
1935 case 'article':
1936 case 'aside':
1937 case 'blockquote':
1938 case 'center':
1939 case 'details':
1940 case 'dialog':
1941 case 'dir':
1942 case 'div':
1943 case 'dl':
1944 case 'fieldset':
1945 case 'figcaption':
1946 case 'figure':
1947 case 'footer':
1948 case 'header':
1949 case 'hgroup':
1950 case 'main':
1951 case 'menu':
1952 case 'nav':
1953 case 'ol':
1954 case 'p':
1955 case 'section':
1956 case 'summary':
1957 case 'ul':
1958 if ( $this->stack->inButtonScope( 'p' ) ) {
1959 $this->inBodyMode( 'endtag', 'p' );
1960 }
1961 $this->stack->insertHTMLElement( $value, $attribs );
1962 return true;
1963
1964 case 'h1':
1965 case 'h2':
1966 case 'h3':
1967 case 'h4':
1968 case 'h5':
1969 case 'h6':
1970 if ( $this->stack->inButtonScope( 'p' ) ) {
1971 $this->inBodyMode( 'endtag', 'p' );
1972 }
1973 if ( $this->stack->currentNode()->isA( BalanceSets::$headingSet ) ) {
1974 $this->stack->pop();
1975 }
1976 $this->stack->insertHTMLElement( $value, $attribs );
1977 return true;
1978
1979 case 'pre':
1980 case 'listing':
1981 if ( $this->stack->inButtonScope( 'p' ) ) {
1982 $this->inBodyMode( 'endtag', 'p' );
1983 }
1984 $this->stack->insertHTMLElement( $value, $attribs );
1985 # As described in "simplifications" above:
1986 # 1. We don't touch the next token, even if it's a linefeed.
1987 # 2. OMITTED: frameset_ok
1988 return true;
1989
1990 # OMITTED: <form>
1991
1992 case 'li':
1993 # OMITTED: frameset_ok
1994 foreach ( $this->stack as $node ) {
1995 if ( $node->isA( 'li' ) ) {
1996 $this->inBodyMode( 'endtag', 'li' );
1997 break;
1998 }
1999 if (
2000 $node->isA( BalanceSets::$specialSet ) &&
2001 !$node->isA( BalanceSets::$addressDivPSet )
2002 ) {
2003 break;
2004 }
2005 }
2006 if ( $this->stack->inButtonScope( 'p' ) ) {
2007 $this->inBodyMode( 'endtag', 'p' );
2008 }
2009 $this->stack->insertHTMLElement( $value, $attribs );
2010 return true;
2011
2012 case 'dd':
2013 case 'dt':
2014 # OMITTED: frameset_ok
2015 foreach ( $this->stack as $node ) {
2016 if ( $node->isA( 'dd' ) ) {
2017 $this->inBodyMode( 'endtag', 'dd' );
2018 break;
2019 }
2020 if ( $node->isA( 'dt' ) ) {
2021 $this->inBodyMode( 'endtag', 'dt' );
2022 break;
2023 }
2024 if (
2025 $node->isA( BalanceSets::$specialSet ) &&
2026 !$node->isA( BalanceSets::$addressDivPSet )
2027 ) {
2028 break;
2029 }
2030 }
2031 if ( $this->stack->inButtonScope( 'p' ) ) {
2032 $this->inBodyMode( 'endtag', 'p' );
2033 }
2034 $this->stack->insertHTMLElement( $value, $attribs );
2035 return true;
2036
2037 # OMITTED: <plaintext>
2038
2039 case 'button':
2040 if ( $this->stack->inScope( 'button' ) ) {
2041 $this->inBodyMode( 'endtag', 'button' );
2042 return $this->insertToken( $token, $value, $attribs, $selfclose );
2043 }
2044 $this->afe->reconstruct( $this->stack );
2045 $this->stack->insertHTMLElement( $value, $attribs );
2046 return true;
2047
2048 case 'a':
2049 $activeElement = $this->afe->findElementByTag( 'a' );
2050 if ( $activeElement ) {
2051 $this->inBodyMode( 'endtag', 'a' );
2052 if ( $this->afe->indexOf( $activeElement ) >= 0 ) {
2053 $this->afe->remove( $activeElement );
2054 // Don't flatten here, since when we fall
2055 // through below we might foster parent
2056 // the new <a> tag inside this one.
2057 $this->stack->removeElement( $activeElement, false );
2058 }
2059 }
2060 /* Falls through */
2061 case 'b':
2062 case 'big':
2063 case 'code':
2064 case 'em':
2065 case 'font':
2066 case 'i':
2067 case 's':
2068 case 'small':
2069 case 'strike':
2070 case 'strong':
2071 case 'tt':
2072 case 'u':
2073 $this->afe->reconstruct( $this->stack );
2074 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2075 return true;
2076
2077 case 'nobr':
2078 $this->afe->reconstruct( $this->stack );
2079 if ( $this->stack->inScope( 'nobr' ) ) {
2080 $this->inBodyMode( 'endtag', 'nobr' );
2081 $this->afe->reconstruct( $this->stack );
2082 }
2083 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2084 return true;
2085
2086 case 'applet':
2087 case 'marquee':
2088 case 'object':
2089 $this->afe->reconstruct( $this->stack );
2090 $this->stack->insertHTMLElement( $value, $attribs );
2091 $this->afe->insertMarker();
2092 # OMITTED: frameset_ok
2093 return true;
2094
2095 case 'table':
2096 # The document is never in "quirks mode"; see simplifications
2097 # above.
2098 if ( $this->stack->inButtonScope( 'p' ) ) {
2099 $this->inBodyMode( 'endtag', 'p' );
2100 }
2101 $this->stack->insertHTMLElement( $value, $attribs );
2102 # OMITTED: frameset_ok
2103 $this->switchMode( 'inTableMode' );
2104 return true;
2105
2106 case 'area':
2107 case 'br':
2108 case 'embed':
2109 case 'img':
2110 case 'keygen':
2111 case 'wbr':
2112 $this->afe->reconstruct( $this->stack );
2113 $this->stack->insertHTMLElement( $value, $attribs );
2114 $this->stack->pop();
2115 # OMITTED: frameset_ok
2116 return true;
2117
2118 case 'input':
2119 $this->afe->reconstruct( $this->stack );
2120 $this->stack->insertHTMLElement( $value, $attribs );
2121 $this->stack->pop();
2122 # OMITTED: frameset_ok
2123 # (hence we don't need to examine the tag's "type" attribute)
2124 return true;
2125
2126 case 'menuitem':
2127 case 'param':
2128 case 'source':
2129 case 'track':
2130 $this->stack->insertHTMLElement( $value, $attribs );
2131 $this->stack->pop();
2132 return true;
2133
2134 case 'hr':
2135 if ( $this->stack->inButtonScope( 'p' ) ) {
2136 $this->inBodyMode( 'endtag', 'p' );
2137 }
2138 $this->stack->insertHTMLElement( $value, $attribs );
2139 $this->stack->pop();
2140 return true;
2141
2142 case 'image':
2143 # warts!
2144 return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
2145
2146 # OMITTED: <isindex>
2147 # OMITTED: <textarea>
2148 # OMITTED: <xmp>
2149 # OMITTED: <iframe>
2150 # OMITTED: <noembed>
2151 # OMITTED: <noscript>
2152
2153 # OMITTED: <select>
2154 /*
2155 case 'select':
2156 $this->afe->reconstruct( $this->stack );
2157 $this->stack->insertHTMLElement( $value, $attribs );
2158 switch ( $this->parseMode ) {
2159 case 'inTableMode':
2160 case 'inCaptionMode':
2161 case 'inTableBodyMode':
2162 case 'inRowMode':
2163 case 'inCellMode':
2164 $this->switchMode( 'inSelectInTableMode' );
2165 return true;
2166 default:
2167 $this->switchMode( 'inSelectMode' );
2168 return true;
2169 }
2170 */
2171
2172 case 'optgroup':
2173 case 'option':
2174 if ( $this->stack->currentNode()->isA( 'option' ) ) {
2175 $this->inBodyMode( 'endtag', 'option' );
2176 }
2177 $this->afe->reconstruct( $this->stack );
2178 $this->stack->insertHTMLElement( $value, $attribs );
2179 return true;
2180
2181 case 'rb':
2182 case 'rtc':
2183 if ( $this->stack->inScope( 'ruby' ) ) {
2184 $this->stack->generateImpliedEndTags();
2185 }
2186 $this->stack->insertHTMLElement( $value, $attribs );
2187 return true;
2188
2189 case 'rp':
2190 case 'rt':
2191 if ( $this->stack->inScope( 'ruby' ) ) {
2192 $this->stack->generateImpliedEndTags( 'rtc' );
2193 }
2194 $this->stack->insertHTMLElement( $value, $attribs );
2195 return true;
2196
2197 case 'math':
2198 $this->afe->reconstruct( $this->stack );
2199 # We skip the spec's "adjust MathML attributes" and
2200 # "adjust foreign attributes" steps, since the browser will
2201 # do this later when it parses the output and it doesn't affect
2202 # balancing.
2203 $this->stack->insertForeignElement(
2204 BalanceSets::MATHML_NAMESPACE, $value, $attribs
2205 );
2206 if ( $selfclose ) {
2207 # emit explicit </math> tag.
2208 $this->stack->pop();
2209 }
2210 return true;
2211
2212 case 'svg':
2213 $this->afe->reconstruct( $this->stack );
2214 # We skip the spec's "adjust SVG attributes" and
2215 # "adjust foreign attributes" steps, since the browser will
2216 # do this later when it parses the output and it doesn't affect
2217 # balancing.
2218 $this->stack->insertForeignElement(
2219 BalanceSets::SVG_NAMESPACE, $value, $attribs
2220 );
2221 if ( $selfclose ) {
2222 # emit explicit </svg> tag.
2223 $this->stack->pop();
2224 }
2225 return true;
2226
2227 case 'caption':
2228 case 'col':
2229 case 'colgroup':
2230 # OMITTED: <frame>
2231 case 'head':
2232 case 'tbody':
2233 case 'td':
2234 case 'tfoot':
2235 case 'th':
2236 case 'thead':
2237 case 'tr':
2238 // Ignore table tags if we're not inTableMode
2239 return true;
2240 }
2241
2242 // Handle any other start tag here
2243 $this->afe->reconstruct( $this->stack );
2244 $this->stack->insertHTMLElement( $value, $attribs );
2245 return true;
2246 } elseif ( $token === 'endtag' ) {
2247 switch ( $value ) {
2248 # </body>,</html> are unsupported.
2249
2250 case 'template':
2251 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2252
2253 case 'address':
2254 case 'article':
2255 case 'aside':
2256 case 'blockquote':
2257 case 'button':
2258 case 'center':
2259 case 'details':
2260 case 'dialog':
2261 case 'dir':
2262 case 'div':
2263 case 'dl':
2264 case 'fieldset':
2265 case 'figcaption':
2266 case 'figure':
2267 case 'footer':
2268 case 'header':
2269 case 'hgroup':
2270 case 'listing':
2271 case 'main':
2272 case 'menu':
2273 case 'nav':
2274 case 'ol':
2275 case 'pre':
2276 case 'section':
2277 case 'summary':
2278 case 'ul':
2279 // Ignore if there is not a matching open tag
2280 if ( !$this->stack->inScope( $value ) ) {
2281 return true;
2282 }
2283 $this->stack->generateImpliedEndTags();
2284 $this->stack->popTag( $value );
2285 return true;
2286
2287 # OMITTED: <form>
2288
2289 case 'p':
2290 if ( !$this->stack->inButtonScope( 'p' ) ) {
2291 $this->inBodyMode( 'tag', 'p', '' );
2292 return $this->insertToken( $token, $value, $attribs, $selfclose );
2293 }
2294 $this->stack->generateImpliedEndTags( $value );
2295 $this->stack->popTag( $value );
2296 return true;
2297
2298 case 'li':
2299 if ( !$this->stack->inListItemScope( $value ) ) {
2300 return true; # ignore
2301 }
2302 $this->stack->generateImpliedEndTags( $value );
2303 $this->stack->popTag( $value );
2304 return true;
2305
2306 case 'dd':
2307 case 'dt':
2308 if ( !$this->stack->inScope( $value ) ) {
2309 return true; # ignore
2310 }
2311 $this->stack->generateImpliedEndTags( $value );
2312 $this->stack->popTag( $value );
2313 return true;
2314
2315 case 'h1':
2316 case 'h2':
2317 case 'h3':
2318 case 'h4':
2319 case 'h5':
2320 case 'h6':
2321 if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2322 return;
2323 }
2324 $this->stack->generateImpliedEndTags();
2325 $this->stack->popTag( BalanceSets::$headingSet );
2326 return true;
2327
2328 case 'sarcasm':
2329 # Take a deep breath, then:
2330 break;
2331
2332 case 'a':
2333 case 'b':
2334 case 'big':
2335 case 'code':
2336 case 'em':
2337 case 'font':
2338 case 'i':
2339 case 'nobr':
2340 case 's':
2341 case 'small':
2342 case 'strike':
2343 case 'strong':
2344 case 'tt':
2345 case 'u':
2346 if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2347 return true; # If we did something, we're done.
2348 }
2349 break; # Go to the "any other end tag" case.
2350
2351 case 'applet':
2352 case 'marquee':
2353 case 'object':
2354 if ( !$this->stack->inScope( $value ) ) {
2355 return true; # ignore
2356 }
2357 $this->stack->generateImpliedEndTags();
2358 $this->stack->popTag( $value );
2359 $this->afe->clearToMarker();
2360 return true;
2361
2362 case 'br':
2363 # Turn </br> into <br>
2364 return $this->inBodyMode( 'tag', $value, '' );
2365 }
2366
2367 // Any other end tag goes here
2368 foreach ( $this->stack as $i => $node ) {
2369 if ( $node->isA( $value ) ) {
2370 $this->stack->generateImpliedEndTags( $value );
2371 $this->stack->popTo( $i ); # including $i
2372 break;
2373 } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2374 return true; // ignore this close token.
2375 }
2376 }
2377 return true;
2378 } else {
2379 Assert::invariant( false, "Bad token type: $token" );
2380 }
2381 }
2382
2383 private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2384 if ( $token === 'text' ) {
2385 if ( $this->textIntegrationMode ) {
2386 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2387 } elseif ( $this->stack->currentNode()->isA( BalanceSets::$tableSectionRowSet ) ) {
2388 $this->pendingTableText = '';
2389 $this->originalInsertionMode = $this->parseMode;
2390 return $this->switchModeAndReprocess( 'inTableTextMode', $token, $value, $attribs, $selfclose );
2391 }
2392 // fall through to default case.
2393 } elseif ( $token === 'eof' ) {
2394 $this->stopParsing();
2395 return true;
2396 } elseif ( $token === 'tag' ) {
2397 switch ( $value ) {
2398 case 'caption':
2399 $this->afe->insertMarker();
2400 $this->stack->insertHTMLElement( $value, $attribs );
2401 $this->switchMode( 'inCaptionMode' );
2402 return true;
2403 case 'colgroup':
2404 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2405 $this->stack->insertHTMLElement( $value, $attribs );
2406 $this->switchMode( 'inColumnGroupMode' );
2407 return true;
2408 case 'col':
2409 $this->inTableMode( 'tag', 'colgroup', '' );
2410 return $this->insertToken( $token, $value, $attribs, $selfclose );
2411 case 'tbody':
2412 case 'tfoot':
2413 case 'thead':
2414 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2415 $this->stack->insertHTMLElement( $value, $attribs );
2416 $this->switchMode( 'inTableBodyMode' );
2417 return true;
2418 case 'td':
2419 case 'th':
2420 case 'tr':
2421 $this->inTableMode( 'tag', 'tbody', '' );
2422 return $this->insertToken( $token, $value, $attribs, $selfclose );
2423 case 'table':
2424 if ( !$this->stack->inTableScope( $value ) ) {
2425 return true; // Ignore this tag.
2426 }
2427 $this->inTableMode( 'endtag', $value );
2428 return $this->insertToken( $token, $value, $attribs, $selfclose );
2429
2430 case 'style':
2431 # OMITTED: <script>
2432 case 'template':
2433 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2434
2435 case 'input':
2436 // We rely on Sanitizer::fixTagAttributes having run on $attribs
2437 // to normalize the form of the tag parameters.
2438 if ( !preg_match( '/(^| )type="hidden"/i', $attribs ) ) {
2439 break; // Handle this as "everything else"
2440 }
2441 $this->stack->insertHTMLElement( $value, $attribs );
2442 $this->stack->pop();
2443 return true;
2444
2445 # OMITTED: <form>
2446 }
2447 // Fall through for "anything else" clause.
2448 } elseif ( $token === 'endtag' ) {
2449 switch ( $value ) {
2450 case 'table':
2451 if ( !$this->stack->inTableScope( $value ) ) {
2452 return true; // Ignore.
2453 }
2454 $this->stack->popTag( $value );
2455 $this->resetInsertionMode();
2456 return true;
2457 # OMITTED: <body>
2458 case 'caption':
2459 case 'col':
2460 case 'colgroup':
2461 # OMITTED: <html>
2462 case 'tbody':
2463 case 'td':
2464 case 'tfoot':
2465 case 'th':
2466 case 'thead':
2467 case 'tr':
2468 return true; // Ignore the token.
2469 case 'template':
2470 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2471 }
2472 // Fall through for "anything else" clause.
2473 }
2474 // This is the "anything else" case:
2475 $this->stack->fosterParentMode = true;
2476 $this->inBodyMode( $token, $value, $attribs, $selfclose );
2477 $this->stack->fosterParentMode = false;
2478 return true;
2479 }
2480
2481 private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
2482 if ( $token === 'text' ) {
2483 $this->pendingTableText .= $value;
2484 return true;
2485 }
2486 // Non-text token:
2487 $text = $this->pendingTableText;
2488 $this->pendingTableText = '';
2489 if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
2490 // This should match the "anything else" case inTableMode
2491 $this->stack->fosterParentMode = true;
2492 $this->inBodyMode( 'text', $text );
2493 $this->stack->fosterParentMode = false;
2494 } else {
2495 // Pending text is just whitespace.
2496 $this->stack->insertText( $text );
2497 }
2498 return $this->switchModeAndReprocess(
2499 $this->originalInsertionMode, $token, $value, $attribs, $selfclose
2500 );
2501 }
2502
2503 // helper for inCaptionMode
2504 private function endCaption() {
2505 if ( !$this->stack->inTableScope( 'caption' ) ) {
2506 return false;
2507 }
2508 $this->stack->generateImpliedEndTags();
2509 $this->stack->popTag( 'caption' );
2510 $this->afe->clearToMarker();
2511 $this->switchMode( 'inTableMode' );
2512 return true;
2513 }
2514
2515 private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
2516 if ( $token === 'tag' ) {
2517 switch ( $value ) {
2518 case 'caption':
2519 case 'col':
2520 case 'colgroup':
2521 case 'tbody':
2522 case 'td':
2523 case 'tfoot':
2524 case 'th':
2525 case 'thead':
2526 case 'tr':
2527 if ( $this->endCaption() ) {
2528 $this->insertToken( $token, $value, $attribs, $selfclose );
2529 }
2530 return true;
2531 }
2532 // Fall through to "anything else" case.
2533 } elseif ( $token === 'endtag' ) {
2534 switch ( $value ) {
2535 case 'caption':
2536 $this->endCaption();
2537 return true;
2538 case 'table':
2539 if ( $this->endCaption() ) {
2540 $this->insertToken( $token, $value, $attribs, $selfclose );
2541 }
2542 return true;
2543 case 'body':
2544 case 'col':
2545 case 'colgroup':
2546 # OMITTED: <html>
2547 case 'tbody':
2548 case 'td':
2549 case 'tfoot':
2550 case 'th':
2551 case 'thead':
2552 case 'tr':
2553 // Ignore the token
2554 return true;
2555 }
2556 // Fall through to "anything else" case.
2557 }
2558 // The Anything Else case
2559 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2560 }
2561
2562 private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
2563 if ( $token === 'text' ) {
2564 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2565 $this->stack->insertText( $matches[0] );
2566 $value = substr( $value, strlen( $matches[0] ) );
2567 }
2568 if ( strlen( $value ) === 0 ) {
2569 return true; // All text handled.
2570 }
2571 // Fall through to handle non-whitespace below.
2572 } elseif ( $token === 'tag' ) {
2573 switch ( $value ) {
2574 # OMITTED: <html>
2575 case 'col':
2576 $this->stack->insertHTMLElement( $value, $attribs );
2577 $this->stack->pop();
2578 return true;
2579 case 'template':
2580 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2581 }
2582 // Fall through for "anything else".
2583 } elseif ( $token === 'endtag' ) {
2584 switch ( $value ) {
2585 case 'colgroup':
2586 if ( !$this->stack->currentNode()->isA( 'colgroup' ) ) {
2587 return true; // Ignore the token.
2588 }
2589 $this->stack->pop();
2590 $this->switchMode( 'inTableMode' );
2591 return true;
2592 case 'col':
2593 return true; // Ignore the token.
2594 case 'template':
2595 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2596 }
2597 // Fall through for "anything else".
2598 } elseif ( $token === 'eof' ) {
2599 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2600 }
2601
2602 // Anything else
2603 if ( !$this->stack->currentNode()->isA( 'colgroup' ) ) {
2604 return true; // Ignore the token.
2605 }
2606 $this->inColumnGroupMode( 'endtag', 'colgroup' );
2607 return $this->insertToken( $token, $value, $attribs, $selfclose );
2608 }
2609
2610 // Helper function for inTableBodyMode
2611 private function endSection() {
2612 if ( !(
2613 $this->stack->inTableScope( 'tbody' ) ||
2614 $this->stack->inTableScope( 'thead' ) ||
2615 $this->stack->inTableScope( 'tfoot' )
2616 ) ) {
2617 return false;
2618 }
2619 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
2620 $this->stack->pop();
2621 $this->switchMode( 'inTableMode' );
2622 return true;
2623 }
2624 private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
2625 if ( $token === 'tag' ) {
2626 switch ( $value ) {
2627 case 'tr':
2628 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
2629 $this->stack->insertHTMLElement( $value, $attribs );
2630 $this->switchMode( 'inRowMode' );
2631 return true;
2632 case 'th':
2633 case 'td':
2634 $this->inTableBodyMode( 'tag', 'tr', '' );
2635 $this->insertToken( $token, $value, $attribs, $selfclose );
2636 return true;
2637 case 'caption':
2638 case 'col':
2639 case 'colgroup':
2640 case 'tbody':
2641 case 'tfoot':
2642 case 'thead':
2643 if ( $this->endSection() ) {
2644 $this->insertToken( $token, $value, $attribs, $selfclose );
2645 }
2646 return true;
2647 }
2648 } elseif ( $token === 'endtag' ) {
2649 switch ( $value ) {
2650 case 'table':
2651 if ( $this->endSection() ) {
2652 $this->insertToken( $token, $value, $attribs, $selfclose );
2653 }
2654 return true;
2655 case 'tbody':
2656 case 'tfoot':
2657 case 'thead':
2658 if ( $this->stack->inTableScope( $value ) ) {
2659 $this->endSection();
2660 }
2661 return true;
2662 # OMITTED: <body>
2663 case 'caption':
2664 case 'col':
2665 case 'colgroup':
2666 # OMITTED: <html>
2667 case 'td':
2668 case 'th':
2669 case 'tr':
2670 return true; // Ignore the token.
2671 }
2672 }
2673 // Anything else:
2674 return $this->inTableMode( $token, $value, $attribs, $selfclose );
2675 }
2676
2677 // Helper function for inRowMode
2678 private function endRow() {
2679 if ( !$this->stack->inTableScope( 'tr' ) ) {
2680 return false;
2681 }
2682 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
2683 $this->stack->pop();
2684 $this->switchMode( 'inTableBodyMode' );
2685 return true;
2686 }
2687 private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
2688 if ( $token === 'tag' ) {
2689 switch ( $value ) {
2690 case 'th':
2691 case 'td':
2692 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
2693 $this->stack->insertHTMLElement( $value, $attribs );
2694 $this->switchMode( 'inCellMode' );
2695 $this->afe->insertMarker();
2696 return true;
2697 case 'caption':
2698 case 'col':
2699 case 'colgroup':
2700 case 'tbody':
2701 case 'tfoot':
2702 case 'thead':
2703 case 'tr':
2704 if ( $this->endRow() ) {
2705 $this->insertToken( $token, $value, $attribs, $selfclose );
2706 }
2707 return true;
2708 }
2709 } elseif ( $token === 'endtag' ) {
2710 switch ( $value ) {
2711 case 'tr':
2712 $this->endRow();
2713 return true;
2714 case 'table':
2715 if ( $this->endRow() ) {
2716 $this->insertToken( $token, $value, $attribs, $selfclose );
2717 }
2718 return true;
2719 case 'tbody':
2720 case 'tfoot':
2721 case 'thead':
2722 if (
2723 $this->stack->inTableScope( $value ) &&
2724 $this->endRow()
2725 ) {
2726 $this->insertToken( $token, $value, $attribs, $selfclose );
2727 }
2728 return true;
2729 # OMITTED: <body>
2730 case 'caption':
2731 case 'col':
2732 case 'colgroup':
2733 # OMITTED: <html>
2734 case 'td':
2735 case 'th':
2736 return true; // Ignore the token.
2737 }
2738 }
2739 // Anything else:
2740 return $this->inTableMode( $token, $value, $attribs, $selfclose );
2741 }
2742
2743 // Helper for inCellMode
2744 private function endCell() {
2745 if ( $this->stack->inTableScope( 'td' ) ) {
2746 $this->inCellMode( 'endtag', 'td' );
2747 return true;
2748 } elseif ( $this->stack->inTableScope( 'th' ) ) {
2749 $this->inCellMode( 'endtag', 'th' );
2750 return true;
2751 } else {
2752 return false;
2753 }
2754 }
2755 private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
2756 if ( $token === 'tag' ) {
2757 switch ( $value ) {
2758 case 'caption':
2759 case 'col':
2760 case 'colgroup':
2761 case 'tbody':
2762 case 'td':
2763 case 'tfoot':
2764 case 'th':
2765 case 'thead':
2766 case 'tr':
2767 if ( $this->endCell() ) {
2768 $this->insertToken( $token, $value, $attribs, $selfclose );
2769 }
2770 return true;
2771 }
2772 } elseif ( $token === 'endtag' ) {
2773 switch ( $value ) {
2774 case 'td':
2775 case 'th':
2776 if ( $this->stack->inTableScope( $value ) ) {
2777 $this->stack->generateImpliedEndTags();
2778 $this->stack->popTag( $value );
2779 $this->afe->clearToMarker();
2780 $this->switchMode( 'inRowMode' );
2781 }
2782 return true;
2783 # OMITTED: <body>
2784 case 'caption':
2785 case 'col':
2786 case 'colgroup':
2787 # OMITTED: <html>
2788 return true;
2789
2790 case 'table':
2791 case 'tbody':
2792 case 'tfoot':
2793 case 'thead':
2794 case 'tr':
2795 if ( $this->stack->inTableScope( $value ) ) {
2796 $this->stack->generateImpliedEndTags();
2797 $this->stack->popTag( BalanceSets::$tableCellSet );
2798 $this->afe->clearToMarker();
2799 $this->switchMode( 'inRowMode' );
2800 $this->insertToken( $token, $value, $attribs, $selfclose );
2801 }
2802 return true;
2803 }
2804 }
2805 // Anything else:
2806 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2807 }
2808
2809 # OMITTED: <select>
2810 /*
2811 private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
2812 Assert::invariant( false, 'Unimplemented' );
2813 }
2814
2815 private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2816 Assert::invariant( false, 'Unimplemented' );
2817 }
2818 */
2819
2820 private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
2821 if ( $token === 'text' ) {
2822 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2823 } elseif ( $token === 'eof' ) {
2824 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2825 $this->stopParsing();
2826 } else {
2827 $this->stack->popTag( 'template' );
2828 $this->afe->clearToMarker();
2829 array_pop( $this->templateInsertionModes );
2830 $this->resetInsertionMode();
2831 $this->insertToken( $token, $value, $attribs, $selfclose );
2832 }
2833 return true;
2834 } elseif ( $token === 'tag' ) {
2835 switch ( $value ) {
2836 case 'base':
2837 case 'basefont':
2838 case 'bgsound':
2839 case 'link':
2840 case 'meta':
2841 case 'noframes':
2842 # OMITTED: <script>
2843 case 'style':
2844 case 'template':
2845 # OMITTED: <title>
2846 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2847
2848 case 'caption':
2849 case 'colgroup':
2850 case 'tbody':
2851 case 'tfoot':
2852 case 'thead':
2853 return $this->switchModeAndReprocess(
2854 'inTableMode', $token, $value, $attribs, $selfclose
2855 );
2856
2857 case 'col':
2858 return $this->switchModeAndReprocess(
2859 'inColumnGroupMode', $token, $value, $attribs, $selfclose
2860 );
2861
2862 case 'tr':
2863 return $this->switchModeAndReprocess(
2864 'inTableBodyMode', $token, $value, $attribs, $selfclose
2865 );
2866
2867 case 'td':
2868 case 'th':
2869 return $this->switchModeAndReprocess(
2870 'inRowMode', $token, $value, $attribs, $selfclose
2871 );
2872 }
2873 return $this->switchModeAndReprocess(
2874 'inBodyMode', $token, $value, $attribs, $selfclose
2875 );
2876 } elseif ( $token === 'endtag' ) {
2877 switch ( $value ) {
2878 case 'template':
2879 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2880 }
2881 return true;
2882 } else {
2883 Assert::invariant( false, "Bad token type: $token" );
2884 }
2885 }
2886 }