Fix function name case
[lhc/web/wiklou.git] / includes / tidy / Balancer.php
index 9e96b14..069b460 100644 (file)
@@ -32,26 +32,31 @@ use \IteratorAggregate;
 use \ReverseArrayIterator;
 use \Sanitizer;
 
-# A note for future librarization[1] -- this file is a good candidate
-# for splitting into an independent library, except that it is currently
-# highly optimized for MediaWiki use.  It only implements the portions
-# of the HTML5 tree builder used by tags supported by MediaWiki, and
-# does not contain a true tokenizer pass, instead relying on
-# comment stripping, attribute normalization, and escaping done by
-# the MediaWiki Sanitizer.  It also deliberately avoids building
-# a true DOM in memory, instead serializing elements to an output string
-# as soon as possible (usually as soon as the tag is closed) to reduce
-# its memory footprint.
-
-# On the other hand, I've been pretty careful to note with comments in the
-# code the places where this implementation omits features of the spec or
-# depends on the MediaWiki Sanitizer.  Perhaps in the future we'll want to
-# implement the missing pieces and make this a standalone PHP HTML5 parser.
-# In order to do so, some sort of MediaWiki-specific API will need
-# to be added to (a) allow the Balancer to bypass the tokenizer,
-# and (b) support on-the-fly flattening instead of DOM node creation.
-
-# [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
+// A note for future librarization[1] -- this file is a good candidate
+// for splitting into an independent library, except that it is currently
+// highly optimized for MediaWiki use.  It only implements the portions
+// of the HTML5 tree builder used by tags supported by MediaWiki, and
+// does not contain a true tokenizer pass, instead relying on
+// comment stripping, attribute normalization, and escaping done by
+// the MediaWiki Sanitizer.  It also deliberately avoids building
+// a true DOM in memory, instead serializing elements to an output string
+// as soon as possible (usually as soon as the tag is closed) to reduce
+// its memory footprint.
+
+// We've been gradually lifting some of these restrictions to handle
+// non-sanitized output generated by extensions, but we shortcut the tokenizer
+// for speed (primarily by splitting on `<`) and so rely on syntactic
+// well-formedness.
+
+// On the other hand, I've been pretty careful to note with comments in the
+// code the places where this implementation omits features of the spec or
+// depends on the MediaWiki Sanitizer.  Perhaps in the future we'll want to
+// implement the missing pieces and make this a standalone PHP HTML5 parser.
+// In order to do so, some sort of MediaWiki-specific API will need
+// to be added to (a) allow the Balancer to bypass the tokenizer,
+// and (b) support on-the-fly flattening instead of DOM node creation.
+
+// [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
 
 /**
  * Utility constants and sets for the HTML5 tree building algorithm.
@@ -69,10 +74,10 @@ class BalanceSets {
        public static $unsupportedSet = [
                self::HTML_NAMESPACE => [
                        'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
-                       'form' => true, 'frame' => true,
-                       'plaintext' => true, 'isindex' => true, 'textarea' => true,
+                       'frame' => true,
+                       'plaintext' => true, 'isindex' => true,
                        'xmp' => true, 'iframe' => true, 'noembed' => true,
-                       'noscript' => true, 'select' => true, 'script' => true,
+                       'noscript' => true, 'script' => true,
                        'title' => true
                ]
        ];
@@ -87,6 +92,12 @@ class BalanceSets {
                ]
        ];
 
+       public static $extraLinefeedSet = [
+               self::HTML_NAMESPACE => [
+                       'pre' => true, 'textarea' => true, 'listing' => true,
+               ]
+       ];
+
        public static $headingSet = [
                self::HTML_NAMESPACE => [
                        'h1' => true, 'h2' => true, 'h3' => true,
@@ -185,7 +196,14 @@ class BalanceSets {
                ]
        ];
 
-       # OMITTED: formAssociatedSet, since we don't allow <form>
+       // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
+       public static $formAssociatedSet = [
+               self::HTML_NAMESPACE => [
+                       'button' => true, 'fieldset' => true, 'input' => true,
+                       'keygen' => true, 'object' => true, 'output' => true,
+                       'select' => true, 'textarea' => true, 'img' => true
+               ]
+       ];
 
        public static $inScopeSet = [
                self::HTML_NAMESPACE => [
@@ -228,6 +246,12 @@ class BalanceSets {
                ]
        ];
 
+       public static $inInvertedSelectScopeSet = [
+               self::HTML_NAMESPACE => [
+                       'option' => true, 'optgroup' => true
+               ]
+       ];
+
        public static $mathmlTextIntegrationPointSet = [
                self::MATHML_NAMESPACE => [
                        'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
@@ -440,20 +464,23 @@ class BalanceElement {
         * by the HTML serialization specification, and replace this node
         * in its parent by that string.
         *
+        * @param array $config Balancer configuration; see Balancer::__construct().
+        *
         * @see __toString()
         */
-       public function flatten( $tidyCompat = false ) {
+       public function flatten( array $config ) {
                Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
                Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
                $idx = array_search( $this, $this->parent->children, true );
                Assert::parameter(
                        $idx !== false, '$this', 'must be a child of its parent'
                );
+               $tidyCompat = $config['tidyCompat'];
                if ( $tidyCompat ) {
                        $blank = true;
                        foreach ( $this->children as $elt ) {
                                if ( !is_string( $elt ) ) {
-                                       $elt = $elt->flatten( $tidyCompat );
+                                       $elt = $elt->flatten( $config );
                                }
                                if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
                                        $blank = false;
@@ -476,7 +503,7 @@ class BalanceElement {
                        $flat = "{$this}";
                }
                $this->parent->children[$idx] = $flat;
-               $this->parent = 'flat'; # for assertion checking
+               $this->parent = 'flat'; // for assertion checking
                return $flat;
        }
 
@@ -495,11 +522,21 @@ class BalanceElement {
                }
                if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
                        $out = "<{$this->localName}{$encAttribs}>";
+                       $len = strlen( $out );
                        // flatten children
                        foreach ( $this->children as $elt ) {
                                $out .= "{$elt}";
                        }
                        $out .= "</{$this->localName}>";
+                       if (
+                               $this->isA( BalanceSets::$extraLinefeedSet ) &&
+                               $out[$len] === "\n"
+                       ) {
+                               // Double the linefeed after pre/listing/textarea
+                               // according to the HTML5 fragment serialization algorithm.
+                               $out = substr( $out, 0, $len + 1 ) .
+                                       substr( $out, $len );
+                       }
                } else {
                        $out = "<{$this->localName}{$encAttribs} />";
                        Assert::invariant(
@@ -510,7 +547,7 @@ class BalanceElement {
                return $out;
        }
 
-       # Utility functions on BalanceElements.
+       // Utility functions on BalanceElements.
 
        /**
         * Determine if $this represents a specific HTML tag, is a member of
@@ -527,7 +564,7 @@ class BalanceElement {
                        return isset( $set[$this->namespaceURI] ) &&
                                isset( $set[$this->namespaceURI][$this->localName] );
                } else {
-                       # assume this is an HTML element name.
+                       // assume this is an HTML element name.
                        return $this->isHtml() && $this->localName === $set;
                }
        }
@@ -627,9 +664,11 @@ class BalanceStack implements IteratorAggregate {
         */
        public $fosterParentMode = false;
        /**
-        * Tidy compatibility mode, determines behavior of body/blockquote
+        * Configuration options governing flattening.
+        * @var array $config
+        * @see Balancer::__construct()
         */
-       public $tidyCompat = false;
+       private $config;
        /**
         * Reference to the current element
         */
@@ -638,14 +677,16 @@ class BalanceStack implements IteratorAggregate {
        /**
         * Create a new BalanceStack with a single BalanceElement on it,
         * representing the root &lt;html&gt; node.
+        * @param array $config Balancer configuration; see Balancer::_construct().
         */
-       public function __construct() {
-               # always a root <html> element on the stack
+       public function __construct( array $config ) {
+               // always a root <html> element on the stack
                array_push(
                        $this->elements,
                        new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
                );
                $this->currentNode = $this->elements[0];
+               $this->config = $config;
        }
 
        /**
@@ -658,27 +699,37 @@ class BalanceStack implements IteratorAggregate {
                $out = '';
                foreach ( $this->elements[0]->children as $elt ) {
                        $out .= is_string( $elt ) ? $elt :
-                               $elt->flatten( $this->tidyCompat );
+                               $elt->flatten( $this->config );
                }
                return $out;
        }
 
+       /**
+        * Insert a comment at the appropriate place for inserting a node.
+        * @param string $value Content of the comment.
+        * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-comment
+        */
+       public function insertComment( $value ) {
+               // Just another type of text node, except for tidy p-wrapping.
+               return $this->insertText( '<!--' . $value . '-->', true );
+       }
+
        /**
         * Insert text at the appropriate place for inserting a node.
         * @param string $value
         * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
         */
-       public function insertText( $value ) {
+       public function insertText( $value, $isComment = false ) {
                if (
                        $this->fosterParentMode &&
                        $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
                ) {
                        $this->fosterParent( $value );
                } elseif (
-                       $this->tidyCompat &&
+                       $this->config['tidyCompat'] && !$isComment &&
                        $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
                ) {
-                       $this->insertHTMLELement( 'mw:p-wrap', [] );
+                       $this->insertHTMLElement( 'mw:p-wrap', [] );
                        return $this->insertText( $value );
                } else {
                        $this->currentNode->appendChild( $value );
@@ -784,6 +835,26 @@ class BalanceStack implements IteratorAggregate {
                return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
        }
 
+       /**
+        * Determine if the stack has $tag in select scope.
+        * @param BalanceElement|array|string $tag
+        * @return bool
+        * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-select-scope
+        */
+       public function inSelectScope( $tag ) {
+               // Can't use inSpecificScope to implement this, since it involves
+               // *inverting* a set of tags.  Implement manually.
+               foreach ( $this as $elt ) {
+                       if ( $elt->isA( $tag ) ) {
+                               return true;
+                       }
+                       if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
+                               return false;
+                       }
+               }
+               return false;
+       }
+
        /**
         * Determine if the stack has $tag in a specific scope, $set.
         * @param BalanceElement|array|string $tag
@@ -906,7 +977,7 @@ class BalanceStack implements IteratorAggregate {
                        $this->currentNode = null;
                }
                if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
-                       $elt->flatten( $this->tidyCompat );
+                       $elt->flatten( $this->config );
                }
        }
 
@@ -916,7 +987,6 @@ class BalanceStack implements IteratorAggregate {
         * @param int $idx
         */
        public function popTo( $idx ) {
-               $length = count( $this->elements );
                for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
                        $this->pop();
                }
@@ -981,7 +1051,7 @@ class BalanceStack implements IteratorAggregate {
                        // otherwise, it will eventually serialize when the parent
                        // is serialized, we just hold onto the memory for its
                        // tree of objects a little longer.
-                       $elt->flatten( $this->tidyCompat );
+                       $elt->flatten( $this->config );
                }
                Assert::postcondition(
                        array_search( $elt, $this->elements, true ) === false,
@@ -1005,7 +1075,7 @@ class BalanceStack implements IteratorAggregate {
                }
        }
 
-       # Fostering and adoption.
+       // Fostering and adoption.
 
        /**
         * Foster parent the given $elt in the stack of open elements.
@@ -1022,7 +1092,7 @@ class BalanceStack implements IteratorAggregate {
                        $parent = $this->elements[$lastTemplate];
                } elseif ( $lastTable >= 0 ) {
                        $parent = $this->elements[$lastTable]->parent;
-                       # Assume all tables have parents, since we're not running scripts!
+                       // Assume all tables have parents, since we're not running scripts!
                        Assert::invariant(
                                $parent !== null, "All tables should have parents"
                        );
@@ -1031,7 +1101,7 @@ class BalanceStack implements IteratorAggregate {
                        $parent = $this->elements[0]; // the `html` element.
                }
 
-               if ( $this->tidyCompat ) {
+               if ( $this->config['tidyCompat'] ) {
                        if ( is_string( $elt ) ) {
                                // We're fostering text: do we need a p-wrapper?
                                if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
@@ -1087,34 +1157,28 @@ class BalanceStack implements IteratorAggregate {
                        return true; // no more handling required
                }
 
-               // Let outer loop counter be zero.
-               $outer = 0;
-
                // Outer loop: If outer loop counter is greater than or
                // equal to eight, then abort these steps.
-               while ( $outer < 8 ) {
-                       // Increment outer loop counter by one.
-                       $outer++;
-
+               for ( $outer = 0; $outer < 8; $outer++ ) {
                        // Let the formatting element be the last element in the list
                        // of active formatting elements that: is between the end of
                        // the list and the last scope marker in the list, if any, or
                        // the start of the list otherwise, and has the same tag name
                        // as the token.
-                       $fmtelt = $afe->findElementByTag( $tag );
+                       $fmtElt = $afe->findElementByTag( $tag );
 
                        // If there is no such node, then abort these steps and instead
                        // act as described in the "any other end tag" entry below.
-                       if ( !$fmtelt ) {
+                       if ( !$fmtElt ) {
                                return false; // false means handle by the default case
                        }
 
                        // Otherwise, if there is such a node, but that node is not in
                        // the stack of open elements, then this is a parse error;
                        // remove the element from the list, and abort these steps.
-                       $index = $this->indexOf( $fmtelt );
+                       $index = $this->indexOf( $fmtElt );
                        if ( $index < 0 ) {
-                               $afe->remove( $fmtelt );
+                               $afe->remove( $fmtElt );
                                return true;   // true means no more handling required
                        }
 
@@ -1122,7 +1186,7 @@ class BalanceStack implements IteratorAggregate {
                        // the stack of open elements, but the element is not in scope,
                        // then this is a parse error; ignore the token, and abort
                        // these steps.
-                       if ( !$this->inScope( $fmtelt ) ) {
+                       if ( !$this->inScope( $fmtElt ) ) {
                                return true;
                        }
 
@@ -1130,13 +1194,13 @@ class BalanceStack implements IteratorAggregate {
                        // open elements that is lower in the stack than the formatting
                        // element, and is an element in the special category. There
                        // might not be one.
-                       $furthestblock = null;
-                       $furthestblockindex = -1;
-                       $stacklen = $this->length();
-                       for ( $i = $index+1; $i < $stacklen; $i++ ) {
+                       $furthestBlock = null;
+                       $furthestBlockIndex = -1;
+                       $stackLength = $this->length();
+                       for ( $i = $index+1; $i < $stackLength; $i++ ) {
                                if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
-                                       $furthestblock = $this->node( $i );
-                                       $furthestblockindex = $i;
+                                       $furthestBlock = $this->node( $i );
+                                       $furthestBlockIndex = $i;
                                        break;
                                }
                        }
@@ -1147,140 +1211,134 @@ class BalanceStack implements IteratorAggregate {
                        // up to and including the formatting element, and remove the
                        // formatting element from the list of active formatting
                        // elements.
-                       if ( !$furthestblock ) {
-                               $this->popTag( $fmtelt );
-                               $afe->remove( $fmtelt );
+                       if ( !$furthestBlock ) {
+                               $this->popTag( $fmtElt );
+                               $afe->remove( $fmtElt );
                                return true;
-                       } else {
-                               // Let the common ancestor be the element immediately above
-                               // the formatting element in the stack of open elements.
-                               $ancestor = $this->node( $index-1 );
-
-                               // Let a bookmark note the position of the formatting
-                               // element in the list of active formatting elements
-                               // relative to the elements on either side of it in the
-                               // list.
-                               $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
-                               $afe->insertAfter( $fmtelt, $BOOKMARK );
-
-                               // Let node and last node be the furthest block.
-                               $node = $furthestblock;
-                               $lastnode = $furthestblock;
-                               $nodeindex = $furthestblockindex;
-                               $isAFE = false;
-
-                               // Let inner loop counter be zero.
-                               $inner = 0;
-
-                               while ( true ) {
+                       }
 
-                                       // Increment inner loop counter by one.
-                                       $inner++;
-
-                                       // Let node be the element immediately above node in
-                                       // the stack of open elements, or if node is no longer
-                                       // in the stack of open elements (e.g. because it got
-                                       // removed by this algorithm), the element that was
-                                       // immediately above node in the stack of open elements
-                                       // before node was removed.
-                                       $node = $this->node( --$nodeindex );
-
-                                       // If node is the formatting element, then go
-                                       // to the next step in the overall algorithm.
-                                       if ( $node === $fmtelt ) break;
-
-                                       // If the inner loop counter is greater than three and node
-                                       // is in the list of active formatting elements, then remove
-                                       // node from the list of active formatting elements.
-                                       $isAFE = $afe->isInList( $node );
-                                       if ( $inner > 3 && $isAFE ) {
-                                               $afe->remove( $node );
-                                               $isAFE = false;
-                                       }
+                       // Let the common ancestor be the element immediately above
+                       // the formatting element in the stack of open elements.
+                       $ancestor = $this->node( $index-1 );
+
+                       // Let a bookmark note the position of the formatting
+                       // element in the list of active formatting elements
+                       // relative to the elements on either side of it in the
+                       // list.
+                       $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
+                       $afe->insertAfter( $fmtElt, $BOOKMARK );
+
+                       // Let node and last node be the furthest block.
+                       $node = $furthestBlock;
+                       $lastNode = $furthestBlock;
+                       $nodeIndex = $furthestBlockIndex;
+                       $isAFE = false;
+
+                       // Inner loop
+                       for ( $inner = 1; true; $inner++ ) {
+                               // Let node be the element immediately above node in
+                               // the stack of open elements, or if node is no longer
+                               // in the stack of open elements (e.g. because it got
+                               // removed by this algorithm), the element that was
+                               // immediately above node in the stack of open elements
+                               // before node was removed.
+                               $node = $this->node( --$nodeIndex );
+
+                               // If node is the formatting element, then go
+                               // to the next step in the overall algorithm.
+                               if ( $node === $fmtElt ) break;
+
+                               // If the inner loop counter is greater than three and node
+                               // is in the list of active formatting elements, then remove
+                               // node from the list of active formatting elements.
+                               $isAFE = $afe->isInList( $node );
+                               if ( $inner > 3 && $isAFE ) {
+                                       $afe->remove( $node );
+                                       $isAFE = false;
+                               }
 
-                                       // If node is not in the list of active formatting
-                                       // elements, then remove node from the stack of open
-                                       // elements and then go back to the step labeled inner
-                                       // loop.
-                                       if ( !$isAFE ) {
-                                               // Don't flatten here, since we're about to relocate
-                                               // parts of this $node.
-                                               $this->removeElement( $node, false );
-                                               continue;
-                                       }
+                               // If node is not in the list of active formatting
+                               // elements, then remove node from the stack of open
+                               // elements and then go back to the step labeled inner
+                               // loop.
+                               if ( !$isAFE ) {
+                                       // Don't flatten here, since we're about to relocate
+                                       // parts of this $node.
+                                       $this->removeElement( $node, false );
+                                       continue;
+                               }
 
-                                       // Create an element for the token for which the
-                                       // element node was created with common ancestor as
-                                       // the intended parent, replace the entry for node
-                                       // in the list of active formatting elements with an
-                                       // entry for the new element, replace the entry for
-                                       // node in the stack of open elements with an entry for
-                                       // the new element, and let node be the new element.
-                                       $newelt = new BalanceElement(
-                                               $node->namespaceURI, $node->localName, $node->attribs );
-                                       $afe->replace( $node, $newelt );
-                                       $this->replaceAt( $nodeindex, $newelt );
-                                       $node = $newelt;
-
-                                       // If last node is the furthest block, then move the
-                                       // aforementioned bookmark to be immediately after the
-                                       // new node in the list of active formatting elements.
-                                       if ( $lastnode === $furthestblock ) {
-                                               $afe->remove( $BOOKMARK );
-                                               $afe->insertAfter( $newelt, $BOOKMARK );
-                                       }
+                               // Create an element for the token for which the
+                               // element node was created with common ancestor as
+                               // the intended parent, replace the entry for node
+                               // in the list of active formatting elements with an
+                               // entry for the new element, replace the entry for
+                               // node in the stack of open elements with an entry for
+                               // the new element, and let node be the new element.
+                               $newElt = new BalanceElement(
+                                       $node->namespaceURI, $node->localName, $node->attribs );
+                               $afe->replace( $node, $newElt );
+                               $this->replaceAt( $nodeIndex, $newElt );
+                               $node = $newElt;
+
+                               // If last node is the furthest block, then move the
+                               // aforementioned bookmark to be immediately after the
+                               // new node in the list of active formatting elements.
+                               if ( $lastNode === $furthestBlock ) {
+                                       $afe->remove( $BOOKMARK );
+                                       $afe->insertAfter( $newElt, $BOOKMARK );
+                               }
 
-                                       // Insert last node into node, first removing it from
-                                       // its previous parent node if any.
-                                       $node->appendChild( $lastnode );
+                               // Insert last node into node, first removing it from
+                               // its previous parent node if any.
+                               $node->appendChild( $lastNode );
 
-                                       // Let last node be node.
-                                       $lastnode = $node;
-                               }
+                               // Let last node be node.
+                               $lastNode = $node;
+                       }
 
-                               // If the common ancestor node is a table, tbody, tfoot,
-                               // thead, or tr element, then, foster parent whatever last
-                               // node ended up being in the previous step, first removing
-                               // it from its previous parent node if any.
-                               if (
-                                       $this->fosterParentMode &&
-                                       $ancestor->isA( BalanceSets::$tableSectionRowSet )
-                               ) {
-                                       $this->fosterParent( $lastnode );
-                               } else {
-                                       // Otherwise, append whatever last node ended up being in
-                                       // the previous step to the common ancestor node, first
-                                       // removing it from its previous parent node if any.
-                                       $ancestor->appendChild( $lastnode );
-                               }
+                       // If the common ancestor node is a table, tbody, tfoot,
+                       // thead, or tr element, then, foster parent whatever last
+                       // node ended up being in the previous step, first removing
+                       // it from its previous parent node if any.
+                       if (
+                               $this->fosterParentMode &&
+                               $ancestor->isA( BalanceSets::$tableSectionRowSet )
+                       ) {
+                               $this->fosterParent( $lastNode );
+                       } else {
+                               // Otherwise, append whatever last node ended up being in
+                               // the previous step to the common ancestor node, first
+                               // removing it from its previous parent node if any.
+                               $ancestor->appendChild( $lastNode );
+                       }
 
-                               // Create an element for the token for which the
-                               // formatting element was created, with furthest block
-                               // as the intended parent.
-                               $newelt2 = new BalanceElement(
-                                       $fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
+                       // Create an element for the token for which the
+                       // formatting element was created, with furthest block
+                       // as the intended parent.
+                       $newElt2 = new BalanceElement(
+                               $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
 
-                               // Take all of the child nodes of the furthest block and
-                               // append them to the element created in the last step.
-                               $newelt2->adoptChildren( $furthestblock );
+                       // Take all of the child nodes of the furthest block and
+                       // append them to the element created in the last step.
+                       $newElt2->adoptChildren( $furthestBlock );
 
-                               // Append that new element to the furthest block.
-                               $furthestblock->appendChild( $newelt2 );
+                       // Append that new element to the furthest block.
+                       $furthestBlock->appendChild( $newElt2 );
 
-                               // Remove the formatting element from the list of active
-                               // formatting elements, and insert the new element into the
-                               // list of active formatting elements at the position of
-                               // the aforementioned bookmark.
-                               $afe->remove( $fmtelt );
-                               $afe->replace( $BOOKMARK, $newelt2 );
+                       // Remove the formatting element from the list of active
+                       // formatting elements, and insert the new element into the
+                       // list of active formatting elements at the position of
+                       // the aforementioned bookmark.
+                       $afe->remove( $fmtElt );
+                       $afe->replace( $BOOKMARK, $newElt2 );
 
-                               // Remove the formatting element from the stack of open
-                               // elements, and insert the new element into the stack of
-                               // open elements immediately below the position of the
-                               // furthest block in that stack.
-                               $this->removeElement( $fmtelt );
-                               $this->insertAfter( $furthestblock, $newelt2 );
-                       }
+                       // Remove the formatting element from the stack of open
+                       // elements, and insert the new element into the stack of
+                       // open elements immediately below the position of the
+                       // furthest block in that stack.
+                       $this->removeElement( $fmtElt );
+                       $this->insertAfter( $furthestBlock, $newElt2 );
                }
 
                return true;
@@ -1613,9 +1671,11 @@ class BalanceActiveFormattingElements {
 
                // Loop backward through the list until we find a marker or an
                // open element
+               $foundIt = false;
                while ( $entry->prevAFE ) {
                        $entry = $entry->prevAFE;
                        if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
+                               $foundIt = true;
                                break;
                        }
                }
@@ -1624,7 +1684,7 @@ class BalanceActiveFormattingElements {
                // the first element if we didn't find a marker or open element),
                // recreating formatting elements and pushing them back onto the list
                // of open elements.
-               if ( $entry->prevAFE ) {
+               if ( $foundIt ) {
                        $entry = $entry->nextAFE;
                }
                do {
@@ -1690,21 +1750,22 @@ class BalanceActiveFormattingElements {
  * - The document is never in "quirks mode".
  * - All occurrences of < and > have been entity escaped, so we
  *   can parse tags by simply splitting on those two characters.
+ *   (This also simplifies the handling of < inside <textarea>.)
+ *   The character < must not appear inside comments.
  *   Similarly, all attributes have been "cleaned" and are double-quoted
  *   and escaped.
- * - All comments and null characters are assumed to have been removed.
- * - We don't alter linefeeds after <pre>/<listing>.
+ * - All null characters are assumed to have been removed.
  * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
- *   <form>, <frame>, <plaintext>, <isindex>, <textarea>, <xmp>, <iframe>,
- *   <noembed>, <noscript>, <select>, <script>, <title>.  As a result,
+ *   <frame>, <plaintext>, <isindex>, <xmp>, <iframe>,
+ *   <noembed>, <noscript>, <script>, <title>.  As a result,
  *   further simplifications can be made:
  *   - `frameset-ok` is not tracked.
- *   - `form element pointer` is not tracked.
  *   - `head element pointer` is not tracked (but presumed non-null)
- *   - Tokenizer has only a single mode.
+ *   - Tokenizer has only a single mode. (<textarea> wants RCDATA and
+ *     <style>/<noframes> want RAWTEXT modes which we only loosely emulate.)
  *
  *   We generally mark places where we omit cases from the spec due to
- *   disallowed elements with a comment: `# OMITTED: <element-name>`.
+ *   disallowed elements with a comment: `// OMITTED: <element-name>`.
  *
  *   The HTML spec keeps a flag during the parsing process to track
  *   whether or not a "parse error" has been encountered.  We don't
@@ -1722,12 +1783,48 @@ class Balancer {
        private $afe;
        private $stack;
        private $strict;
-       private $tidyCompat;
+       private $allowComments;
+       private $config;
 
-       private $textIntegrationMode = false;
+       private $textIntegrationMode;
        private $pendingTableText;
        private $originalInsertionMode;
        private $fragmentContext;
+       private $formElementPointer;
+       private $ignoreLinefeed;
+       private $inRCDATA;
+       private $inRAWTEXT;
+
+       /**
+        * Valid HTML5 comments.
+        * Regex borrowed from Tim Starling's "remex-html" project.
+        */
+       const VALID_COMMENT_REGEX = "~ !--
+               (                             # 1. Comment match detector
+                       > | -> | # Invalid short close
+                       (                         # 2. Comment contents
+                               (?:
+                                       (?! --> )
+                                       (?! --!> )
+                                       (?! --! \z )
+                                       (?! -- \z )
+                                       (?! - \z )
+                                       .
+                               )*+
+                       )
+                       (                         # 3. Comment close
+                               --> |   # Normal close
+                               --!> |  # Comment end bang
+                               (                     # 4. Indicate matches requiring EOF
+                                       --! |   # EOF in comment end bang state
+                                       -- |    # EOF in comment end state
+                                       -  |    # EOF in comment end dash state
+                                               # EOF in comment state
+                               )
+                       )
+               )
+               ([^<]*) \z                    # 5. Non-tag text after the comment
+               ~xs";
 
        /**
         * Create a new Balancer.
@@ -1747,18 +1844,24 @@ class Balancer {
         *         program: <p>-wrapping is done to the children of
         *         <body> and <blockquote> elements, and empty elements
         *         are removed.
+        *     'allowComments': boolean, defaults to true.
+        *         When true, allows HTML comments in the input.
+        *         The Sanitizer generally strips all comments, so if you
+        *         are running on sanitized output you can set this to
+        *         false to get a bit more performance.
         */
        public function __construct( array $config = [] ) {
-               $config = $config + [
+               $this->config = $config = $config + [
                        'strict' => false,
                        'allowedHtmlElements' => null,
                        'tidyCompat' => false,
+                       'allowComments' => true,
                ];
                $this->allowedHtmlElements = $config['allowedHtmlElements'];
                $this->strict = $config['strict'];
-               $this->tidyCompat = $config['tidyCompat'];
+               $this->allowComments = $config['allowComments'];
                if ( $this->allowedHtmlElements !== null ) {
-                       # Sanity check!
+                       // Sanity check!
                        $bad = array_uintersect_assoc(
                                $this->allowedHtmlElements,
                                BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
@@ -1795,16 +1898,27 @@ class Balancer {
                $this->parseMode = 'inBodyMode';
                $this->bitsIterator = new ExplodeIterator( '<', $text );
                $this->afe = new BalanceActiveFormattingElements();
-               $this->stack = new BalanceStack();
-               $this->stack->tidyCompat = $this->tidyCompat;
+               $this->stack = new BalanceStack( $this->config );
                $this->processingCallback = $processingCallback;
                $this->processingArgs = $processingArgs;
 
-               # The stack is constructed with an <html> element already on it.
-               # Set this up as a fragment parsed with <body> as the context.
+               $this->textIntegrationMode =
+                       $this->ignoreLinefeed =
+                       $this->inRCDATA =
+                       $this->inRAWTEXT = false;
+
+               // The stack is constructed with an <html> element already on it.
+               // Set this up as a fragment parsed with <body> as the context.
                $this->fragmentContext =
                        new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
                $this->resetInsertionMode();
+               $this->formElementPointer = null;
+               for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
+                       if ( $e->isHtmlNamed( 'form' ) ) {
+                               $this->formElementPointer = $e;
+                               break;
+                       }
+               }
 
                // First element is text not tag
                $x = $this->bitsIterator->current();
@@ -1821,6 +1935,7 @@ class Balancer {
                $this->afe = null;
                $this->stack = null;
                $this->fragmentContext = null;
+               $this->formElementPointer = null;
                return $result;
        }
 
@@ -1828,12 +1943,12 @@ class Balancer {
         * Pass a token to the tree builder.  The $token will be one of the
         * strings "tag", "endtag", or "text".
         */
-       private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
+       private function insertToken( $token, $value, $attribs = null, $selfClose = false ) {
                // validate tags against $unsupportedSet
                if ( $token === 'tag' || $token === 'endtag' ) {
                        if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
-                               # As described in "simplifications" above, these tags are
-                               # not supported in the balancer.
+                               // As described in "simplifications" above, these tags are
+                               // not supported in the balancer.
                                Assert::invariant(
                                        !$this->strict,
                                        "Unsupported $token <$value> found."
@@ -1841,9 +1956,22 @@ class Balancer {
                                return false;
                        }
                } elseif ( $token === 'text' && $value === '' ) {
-                       # Don't actually inject the empty string as a text token.
+                       // Don't actually inject the empty string as a text token.
                        return true;
                }
+               // Support pre/listing/textarea by suppressing initial linefeed
+               if ( $this->ignoreLinefeed ) {
+                       $this->ignoreLinefeed = false;
+                       if ( $token === 'text' ) {
+                               if ( $value[0] === "\n" ) {
+                                       if ( $value === "\n" ) {
+                                               // Nothing would be left, don't inject the empty string.
+                                               return true;
+                                       }
+                                       $value = substr( $value, 1 );
+                               }
+                       }
+               }
                // Some hoops we have to jump through
                $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
 
@@ -1876,14 +2004,14 @@ class Balancer {
                        $isForeign = false;
                }
                if ( $isForeign ) {
-                       return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
+                       return $this->insertForeignToken( $token, $value, $attribs, $selfClose );
                } else {
                        $func = $this->parseMode;
-                       return $this->$func( $token, $value, $attribs, $selfclose );
+                       return $this->$func( $token, $value, $attribs, $selfClose );
                }
        }
 
-       private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
+       private function insertForeignToken( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        $this->stack->insertText( $value );
                        return true;
@@ -1896,7 +2024,7 @@ class Balancer {
                                ) {
                                        break;
                                }
-                               /* otherwise, fall through */
+                               // otherwise, fall through
                        case 'b':
                        case 'big':
                        case 'blockquote':
@@ -1955,7 +2083,7 @@ class Balancer {
                                                break;
                                        }
                                }
-                               return $this->insertToken( $token, $value, $attribs, $selfclose );
+                               return $this->insertToken( $token, $value, $attribs, $selfClose );
                        }
                        // "Any other start tag"
                        $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
@@ -1963,7 +2091,7 @@ class Balancer {
                        $this->stack->insertForeignElement(
                                $adjusted->namespaceURI, $value, $attribs
                        );
-                       if ( $selfclose ) {
+                       if ( $selfClose ) {
                                $this->stack->pop();
                        }
                        return true;
@@ -1973,7 +2101,7 @@ class Balancer {
                                if ( $node->isHtml() && !$first ) {
                                        // process the end tag as HTML
                                        $func = $this->parseMode;
-                                       return $this->$func( $token, $value, $attribs, $selfclose );
+                                       return $this->$func( $token, $value, $attribs, $selfClose );
                                } elseif ( $i === 0 ) {
                                        return true;
                                } elseif ( $node->localName === $value ) {
@@ -1987,22 +2115,37 @@ class Balancer {
 
        /**
         * Grab the next "token" from $bitsIterator.  This is either a open/close
-        * tag or text, depending on whether the Sanitizer approves.
+        * tag or text or a comment, depending on whether the Sanitizer approves.
         */
        private function advance() {
                $x = $this->bitsIterator->current();
                $this->bitsIterator->next();
                $regs = [];
-               # $slash: Does the current element start with a '/'?
-               # $t: Current element name
-               # $attribStr: String between element name and >
-               # $brace: Ending '>' or '/>'
-               # $rest: Everything until the next element from the $bitsIterator
+               // Handle comments.  These won't be generated by mediawiki (they
+               // are stripped in the Sanitizer) but may be generated by extensions.
+               if (
+                       $this->allowComments &&
+                       !( $this->inRCDATA || $this->inRAWTEXT ) &&
+                       preg_match( Balancer::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
+                       // verify EOF condition where necessary
+                       ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
+               ) {
+                       $contents = $regs[2][0];
+                       $rest = $regs[5][0];
+                       $this->insertToken( 'comment', $contents );
+                       $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
+                       return;
+               }
+               // $slash: Does the current element start with a '/'?
+               // $t: Current element name
+               // $attribStr: String between element name and >
+               // $brace: Ending '>' or '/>'
+               // $rest: Everything until the next element from the $bitsIterator
                if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
                        list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
                        $t = strtolower( $t );
                        if ( $this->strict ) {
-                               /* Verify that attributes are all properly double-quoted */
+                               // Verify that attributes are all properly double-quoted
                                Assert::invariant(
                                        preg_match(
                                                '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
@@ -2016,35 +2159,53 @@ class Balancer {
                        );
                        $slash = $t = $attribStr = $brace = $rest = null;
                }
-               $goodtag = $t;
+               $goodTag = $t;
+               if ( $this->inRCDATA ) {
+                       if ( $slash && $t === $this->inRCDATA ) {
+                               $this->inRCDATA = false;
+                       } else {
+                               // No tags allowed; this emulates the "rcdata" tokenizer mode.
+                               $goodTag = false;
+                       }
+               }
+               if ( $this->inRAWTEXT ) {
+                       if ( $slash && $t === $this->inRAWTEXT ) {
+                               $this->inRAWTEXT = false;
+                       } else {
+                               // No tags allowed, no entity-escaping done.
+                               $goodTag = false;
+                       }
+               }
                $sanitize = $this->allowedHtmlElements !== null;
                if ( $sanitize ) {
-                       $goodtag = $t && isset( $this->allowedHtmlElements[$t] );
+                       $goodTag = $t && isset( $this->allowedHtmlElements[$t] );
                }
-               if ( $goodtag ) {
+               if ( $goodTag ) {
                        if ( is_callable( $this->processingCallback ) ) {
                                call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
                        }
                        if ( $sanitize ) {
-                               $goodtag = Sanitizer::validateTag( $attribStr, $t );
+                               $goodTag = Sanitizer::validateTag( $attribStr, $t );
                        }
                }
-               if ( $goodtag ) {
+               if ( $goodTag ) {
                        if ( $sanitize ) {
                                $attribs = Sanitizer::decodeTagAttributes( $attribStr );
                                $attribs = Sanitizer::validateTagAttributes( $attribs, $t );
                        } else {
                                $attribs = Sanitizer::decodeTagAttributes( $attribStr );
                        }
-                       $goodtag = $this->insertToken(
+                       $goodTag = $this->insertToken(
                                $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
                        );
                }
-               if ( $goodtag ) {
+               if ( $goodTag ) {
                        $rest = str_replace( '>', '&gt;', $rest );
                        $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
+               } elseif ( $this->inRAWTEXT ) {
+                       $this->insertToken( 'text', "<$x" );
                } else {
-                       # bad tag; serialize entire thing as text.
+                       // bad tag; serialize entire thing as text.
                        $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
                }
        }
@@ -2058,9 +2219,9 @@ class Balancer {
                return $oldMode;
        }
 
-       private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
+       private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose ) {
                $this->switchMode( $mode );
-               return $this->insertToken( $token, $value, $attribs, $selfclose );
+               return $this->insertToken( $token, $value, $attribs, $selfClose );
        }
 
        private function resetInsertionMode() {
@@ -2074,12 +2235,10 @@ class Balancer {
                        }
                        if ( $node->isHtml() ) {
                                switch ( $node->localName ) {
-                               # OMITTED: <select>
-                               /*
                                case 'select':
-                                       $stacklen = $this->stack->length();
-                                       for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
-                                               $ancestor = $this->stack->node( $stacklen-$j-1 );
+                                       $stackLength = $this->stack->length();
+                                       for ( $j = $i + 1; $j < $stackLength-1; $j++ ) {
+                                               $ancestor = $this->stack->node( $stackLength-$j-1 );
                                                if ( $ancestor->isHtmlNamed( 'template' ) ) {
                                                        break;
                                                }
@@ -2090,7 +2249,6 @@ class Balancer {
                                        }
                                        $this->switchMode( 'inSelectMode' );
                                        return;
-                               */
                                case 'tr':
                                        $this->switchMode( 'inRowMode' );
                                        return;
@@ -2116,12 +2274,12 @@ class Balancer {
                                case 'body':
                                        $this->switchMode( 'inBodyMode' );
                                        return;
-                               # OMITTED: <frameset>
-                               # OMITTED: <html>
-                               # OMITTED: <head>
+                               // OMITTED: <frameset>
+                               // OMITTED: <html>
+                               // OMITTED: <head>
                                default:
                                        if ( !$last ) {
-                                               # OMITTED: <head>
+                                               // OMITTED: <head>
                                                if ( $node->isA( BalanceSets::$tableCellSet ) ) {
                                                        $this->switchMode( 'inCellMode' );
                                                        return;
@@ -2137,33 +2295,33 @@ class Balancer {
        }
 
        private function stopParsing() {
-               # Most of the spec methods are inapplicable, other than step 2:
-               # "pop all the nodes off the stack of open elements".
-               # We're going to keep the top-most <html> element on the stack, though.
-
-               # Clear the AFE list first, otherwise the element objects will stay live
-               # during serialization, potentially using O(N^2) memory. Note that
-               # popping the stack will never result in reconstructing the active
-               # formatting elements.
+               // Most of the spec methods are inapplicable, other than step 2:
+               // "pop all the nodes off the stack of open elements".
+               // We're going to keep the top-most <html> element on the stack, though.
+
+               // Clear the AFE list first, otherwise the element objects will stay live
+               // during serialization, potentially using O(N^2) memory. Note that
+               // popping the stack will never result in reconstructing the active
+               // formatting elements.
                $this->afe = null;
                $this->stack->popTo( 1 );
        }
 
        private function parseRawText( $value, $attribs = null ) {
                $this->stack->insertHTMLElement( $value, $attribs );
-               // XXX switch tokenizer to rawtext state?
+               $this->inRAWTEXT = $value;
                $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
                return true;
        }
 
-       private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inTextMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        $this->stack->insertText( $value );
                        return true;
                } elseif ( $token === 'eof' ) {
                        $this->stack->pop();
                        return $this->switchModeAndReprocess(
-                               $this->originalInsertionMode, $token, $value, $attribs, $selfclose
+                               $this->originalInsertionMode, $token, $value, $attribs, $selfClose
                        );
                } elseif ( $token === 'endtag' ) {
                        $this->stack->pop();
@@ -2173,7 +2331,7 @@ class Balancer {
                return true;
        }
 
-       private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inHeadMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
                                $this->stack->insertText( $matches[0] );
@@ -2186,9 +2344,9 @@ class Balancer {
                } elseif ( $token === 'tag' ) {
                        switch ( $value ) {
                        case 'meta':
-                               # OMITTED: in a full HTML parser, this might change the encoding.
-                               /* falls through */
-                       # OMITTED: <html>
+                               // OMITTED: in a full HTML parser, this might change the encoding.
+                               // falls through
+                       // OMITTED: <html>
                        case 'base':
                        case 'basefont':
                        case 'bgsound':
@@ -2196,26 +2354,26 @@ class Balancer {
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->stack->pop();
                                return true;
-                       # OMITTED: <title>
-                       # OMITTED: <noscript>
+                       // OMITTED: <title>
+                       // OMITTED: <noscript>
                        case 'noframes':
                        case 'style':
                                return $this->parseRawText( $value, $attribs );
-                       # OMITTED: <script>
+                       // OMITTED: <script>
                        case 'template':
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->afe->insertMarker();
-                               # OMITTED: frameset_ok
+                               // OMITTED: frameset_ok
                                $this->switchMode( 'inTemplateMode' );
                                $this->templateInsertionModes[] = $this->parseMode;
                                return true;
-                       # OMITTED: <head>
+                       // OMITTED: <head>
                        }
                } elseif ( $token === 'endtag' ) {
                        switch ( $value ) {
-                       # OMITTED: <head>
-                       # OMITTED: <body>
-                       # OMITTED: <html>
+                       // OMITTED: <head>
+                       // OMITTED: <body>
+                       // OMITTED: <html>
                        case 'br':
                                break; // handle at the bottom of the function
                        case 'template':
@@ -2232,41 +2390,44 @@ class Balancer {
                                // ignore any other end tag
                                return true;
                        }
+               } elseif ( $token === 'comment' ) {
+                       $this->stack->insertComment( $value );
+                       return true;
                }
 
                // If not handled above
                $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
                // Then redo this one
-               return $this->insertToken( $token, $value, $attribs, $selfclose );
+               return $this->insertToken( $token, $value, $attribs, $selfClose );
        }
 
-       private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        $this->afe->reconstruct( $this->stack );
                        $this->stack->insertText( $value );
                        return true;
                } elseif ( $token === 'eof' ) {
                        if ( !empty( $this->templateInsertionModes ) ) {
-                               return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
+                               return $this->inTemplateMode( $token, $value, $attribs, $selfClose );
                        }
                        $this->stopParsing();
                        return true;
                } elseif ( $token === 'tag' ) {
                        switch ( $value ) {
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                        case 'base':
                        case 'basefont':
                        case 'bgsound':
                        case 'link':
                        case 'meta':
                        case 'noframes':
-                       # OMITTED: <script>
+                       // OMITTED: <script>
                        case 'style':
                        case 'template':
-                       # OMITTED: <title>
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
-                       # OMITTED: <body>
-                       # OMITTED: <frameset>
+                       // OMITTED: <title>
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
+                       // OMITTED: <body>
+                       // OMITTED: <frameset>
 
                        case 'address':
                        case 'article':
@@ -2319,15 +2480,28 @@ class Balancer {
                                        $this->inBodyMode( 'endtag', 'p' );
                                }
                                $this->stack->insertHTMLElement( $value, $attribs );
-                               # As described in "simplifications" above:
-                               # 1. We don't touch the next token, even if it's a linefeed.
-                               # 2. OMITTED: frameset_ok
+                               $this->ignoreLinefeed = true;
+                               // OMITTED: frameset_ok
                                return true;
 
-                       # OMITTED: <form>
+                       case 'form':
+                               if (
+                                       $this->formElementPointer &&
+                                       $this->stack->indexOf( 'template' ) < 0
+                               ) {
+                                       return true; // in a form, not in a template.
+                               }
+                               if ( $this->stack->inButtonScope( "p" ) ) {
+                                       $this->inBodyMode( 'endtag', 'p' );
+                               }
+                               $elt = $this->stack->insertHTMLElement( $value, $attribs );
+                               if ( $this->stack->indexOf( 'template' ) < 0 ) {
+                                       $this->formElementPointer = $elt;
+                               }
+                               return true;
 
                        case 'li':
-                               # OMITTED: frameset_ok
+                               // OMITTED: frameset_ok
                                foreach ( $this->stack as $node ) {
                                        if ( $node->isHtmlNamed( 'li' ) ) {
                                                $this->inBodyMode( 'endtag', 'li' );
@@ -2348,7 +2522,7 @@ class Balancer {
 
                        case 'dd':
                        case 'dt':
-                               # OMITTED: frameset_ok
+                               // OMITTED: frameset_ok
                                foreach ( $this->stack as $node ) {
                                        if ( $node->isHtmlNamed( 'dd' ) ) {
                                                $this->inBodyMode( 'endtag', 'dd' );
@@ -2371,12 +2545,12 @@ class Balancer {
                                $this->stack->insertHTMLElement( $value, $attribs );
                                return true;
 
-                       # OMITTED: <plaintext>
+                       // OMITTED: <plaintext>
 
                        case 'button':
                                if ( $this->stack->inScope( 'button' ) ) {
                                        $this->inBodyMode( 'endtag', 'button' );
-                                       return $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       return $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                $this->afe->reconstruct( $this->stack );
                                $this->stack->insertHTMLElement( $value, $attribs );
@@ -2394,7 +2568,7 @@ class Balancer {
                                                $this->stack->removeElement( $activeElement, false );
                                        }
                                }
-                               /* Falls through */
+                               // Falls through
                        case 'b':
                        case 'big':
                        case 'code':
@@ -2426,17 +2600,17 @@ class Balancer {
                                $this->afe->reconstruct( $this->stack );
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->afe->insertMarker();
-                               # OMITTED: frameset_ok
+                               // OMITTED: frameset_ok
                                return true;
 
                        case 'table':
-                               # The document is never in "quirks mode"; see simplifications
-                               # above.
+                               // The document is never in "quirks mode"; see simplifications
+                               // above.
                                if ( $this->stack->inButtonScope( 'p' ) ) {
                                        $this->inBodyMode( 'endtag', 'p' );
                                }
                                $this->stack->insertHTMLElement( $value, $attribs );
-                               # OMITTED: frameset_ok
+                               // OMITTED: frameset_ok
                                $this->switchMode( 'inTableMode' );
                                return true;
 
@@ -2449,15 +2623,15 @@ class Balancer {
                                $this->afe->reconstruct( $this->stack );
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->stack->pop();
-                               # OMITTED: frameset_ok
+                               // OMITTED: frameset_ok
                                return true;
 
                        case 'input':
                                $this->afe->reconstruct( $this->stack );
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->stack->pop();
-                               # OMITTED: frameset_ok
-                               # (hence we don't need to examine the tag's "type" attribute)
+                               // OMITTED: frameset_ok
+                               // (hence we don't need to examine the tag's "type" attribute)
                                return true;
 
                        case 'menuitem':
@@ -2477,18 +2651,23 @@ class Balancer {
                                return true;
 
                        case 'image':
-                               # warts!
-                               return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
-
-                       # OMITTED: <isindex>
-                       # OMITTED: <textarea>
-                       # OMITTED: <xmp>
-                       # OMITTED: <iframe>
-                       # OMITTED: <noembed>
-                       # OMITTED: <noscript>
-
-                       # OMITTED: <select>
-                       /*
+                               // warts!
+                               return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
+
+                       // OMITTED: <isindex>
+
+                       case 'textarea':
+                               $this->stack->insertHTMLElement( $value, $attribs );
+                               $this->ignoreLinefeed = true;
+                               $this->inRCDATA = $value; // emulate rcdata tokenizer mode
+                               // OMITTED: frameset_ok
+                               return true;
+
+                       // OMITTED: <xmp>
+                       // OMITTED: <iframe>
+                       // OMITTED: <noembed>
+                       // OMITTED: <noscript>
+
                        case 'select':
                                $this->afe->reconstruct( $this->stack );
                                $this->stack->insertHTMLElement( $value, $attribs );
@@ -2504,7 +2683,6 @@ class Balancer {
                                        $this->switchMode( 'inSelectMode' );
                                        return true;
                                }
-                       */
 
                        case 'optgroup':
                        case 'option':
@@ -2533,30 +2711,30 @@ class Balancer {
 
                        case 'math':
                                $this->afe->reconstruct( $this->stack );
-                               # We skip the spec's "adjust MathML attributes" and
-                               # "adjust foreign attributes" steps, since the browser will
-                               # do this later when it parses the output and it doesn't affect
-                               # balancing.
+                               // We skip the spec's "adjust MathML attributes" and
+                               // "adjust foreign attributes" steps, since the browser will
+                               // do this later when it parses the output and it doesn't affect
+                               // balancing.
                                $this->stack->insertForeignElement(
                                        BalanceSets::MATHML_NAMESPACE, $value, $attribs
                                );
-                               if ( $selfclose ) {
-                                       # emit explicit </math> tag.
+                               if ( $selfClose ) {
+                                       // emit explicit </math> tag.
                                        $this->stack->pop();
                                }
                                return true;
 
                        case 'svg':
                                $this->afe->reconstruct( $this->stack );
-                               # We skip the spec's "adjust SVG attributes" and
-                               # "adjust foreign attributes" steps, since the browser will
-                               # do this later when it parses the output and it doesn't affect
-                               # balancing.
+                               // We skip the spec's "adjust SVG attributes" and
+                               // "adjust foreign attributes" steps, since the browser will
+                               // do this later when it parses the output and it doesn't affect
+                               // balancing.
                                $this->stack->insertForeignElement(
                                        BalanceSets::SVG_NAMESPACE, $value, $attribs
                                );
-                               if ( $selfclose ) {
-                                       # emit explicit </svg> tag.
+                               if ( $selfClose ) {
+                                       // emit explicit </svg> tag.
                                        $this->stack->pop();
                                }
                                return true;
@@ -2564,7 +2742,7 @@ class Balancer {
                        case 'caption':
                        case 'col':
                        case 'colgroup':
-                       # OMITTED: <frame>
+                       // OMITTED: <frame>
                        case 'head':
                        case 'tbody':
                        case 'td':
@@ -2582,10 +2760,10 @@ class Balancer {
                        return true;
                } elseif ( $token === 'endtag' ) {
                        switch ( $value ) {
-                       # </body>,</html> are unsupported.
+                       // </body>,</html> are unsupported.
 
                        case 'template':
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
 
                        case 'address':
                        case 'article':
@@ -2621,12 +2799,31 @@ class Balancer {
                                $this->stack->popTag( $value );
                                return true;
 
-                       # OMITTED: <form>
+                       case 'form':
+                               if ( $this->stack->indexOf( 'template' ) < 0 ) {
+                                       $openform = $this->formElementPointer;
+                                       $this->formElementPointer = null;
+                                       if ( !$openform || !$this->stack->inScope( $openform ) ) {
+                                               return true;
+                                       }
+                                       $this->stack->generateImpliedEndTags();
+                                       // Don't flatten yet if we're removing a <form> element
+                                       // out-of-order. (eg. `<form><div></form>`)
+                                       $flatten = ( $this->stack->currentNode === $openform );
+                                       $this->stack->removeElement( $openform, $flatten );
+                               } else {
+                                       if ( !$this->stack->inScope( 'form' ) ) {
+                                               return true;
+                                       }
+                                       $this->stack->generateImpliedEndTags();
+                                       $this->stack->popTag( 'form' );
+                               }
+                               return true;
 
                        case 'p':
                                if ( !$this->stack->inButtonScope( 'p' ) ) {
                                        $this->inBodyMode( 'tag', 'p', [] );
-                                       return $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       return $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                $this->stack->generateImpliedEndTags( $value );
                                $this->stack->popTag( $value );
@@ -2634,7 +2831,7 @@ class Balancer {
 
                        case 'li':
                                if ( !$this->stack->inListItemScope( $value ) ) {
-                                       return true; # ignore
+                                       return true; // ignore
                                }
                                $this->stack->generateImpliedEndTags( $value );
                                $this->stack->popTag( $value );
@@ -2643,7 +2840,7 @@ class Balancer {
                        case 'dd':
                        case 'dt':
                                if ( !$this->stack->inScope( $value ) ) {
-                                       return true; # ignore
+                                       return true; // ignore
                                }
                                $this->stack->generateImpliedEndTags( $value );
                                $this->stack->popTag( $value );
@@ -2656,14 +2853,14 @@ class Balancer {
                        case 'h5':
                        case 'h6':
                                if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
-                                       return;
+                                       return true; // ignore
                                }
                                $this->stack->generateImpliedEndTags();
                                $this->stack->popTag( BalanceSets::$headingSet );
                                return true;
 
                        case 'sarcasm':
-                               # Take a deep breath, then:
+                               // Take a deep breath, then:
                                break;
 
                        case 'a':
@@ -2681,15 +2878,15 @@ class Balancer {
                        case 'tt':
                        case 'u':
                                if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
-                                       return true; # If we did something, we're done.
+                                       return true; // If we did something, we're done.
                                }
-                               break; # Go to the "any other end tag" case.
+                               break; // Go to the "any other end tag" case.
 
                        case 'applet':
                        case 'marquee':
                        case 'object':
                                if ( !$this->stack->inScope( $value ) ) {
-                                       return true; # ignore
+                                       return true; // ignore
                                }
                                $this->stack->generateImpliedEndTags();
                                $this->stack->popTag( $value );
@@ -2697,7 +2894,7 @@ class Balancer {
                                return true;
 
                        case 'br':
-                               # Turn </br> into <br>
+                               // Turn </br> into <br>
                                return $this->inBodyMode( 'tag', $value, [] );
                        }
 
@@ -2705,26 +2902,30 @@ class Balancer {
                        foreach ( $this->stack as $i => $node ) {
                                if ( $node->isHtmlNamed( $value ) ) {
                                        $this->stack->generateImpliedEndTags( $value );
-                                       $this->stack->popTo( $i ); # including $i
+                                       $this->stack->popTo( $i ); // including $i
                                        break;
                                } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
                                        return true; // ignore this close token.
                                }
                        }
                        return true;
+               } elseif ( $token === 'comment' ) {
+                       $this->stack->insertComment( $value );
+                       return true;
                } else {
                        Assert::invariant( false, "Bad token type: $token" );
                }
        }
 
-       private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inTableMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        if ( $this->textIntegrationMode ) {
-                               return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+                               return $this->inBodyMode( $token, $value, $attribs, $selfClose );
                        } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
                                $this->pendingTableText = '';
                                $this->originalInsertionMode = $this->parseMode;
-                               return $this->switchModeAndReprocess( 'inTableTextMode', $token, $value, $attribs, $selfclose );
+                               return $this->switchModeAndReprocess( 'inTableTextMode',
+                                       $token, $value, $attribs, $selfClose );
                        }
                        // fall through to default case.
                } elseif ( $token === 'eof' ) {
@@ -2744,7 +2945,7 @@ class Balancer {
                                return true;
                        case 'col':
                                $this->inTableMode( 'tag', 'colgroup', [] );
-                               return $this->insertToken( $token, $value, $attribs, $selfclose );
+                               return $this->insertToken( $token, $value, $attribs, $selfClose );
                        case 'tbody':
                        case 'tfoot':
                        case 'thead':
@@ -2756,18 +2957,18 @@ class Balancer {
                        case 'th':
                        case 'tr':
                                $this->inTableMode( 'tag', 'tbody', [] );
-                               return $this->insertToken( $token, $value, $attribs, $selfclose );
+                               return $this->insertToken( $token, $value, $attribs, $selfClose );
                        case 'table':
                                if ( !$this->stack->inTableScope( $value ) ) {
                                        return true; // Ignore this tag.
                                }
                                $this->inTableMode( 'endtag', $value );
-                               return $this->insertToken( $token, $value, $attribs, $selfclose );
+                               return $this->insertToken( $token, $value, $attribs, $selfClose );
 
                        case 'style':
-                       # OMITTED: <script>
+                       // OMITTED: <script>
                        case 'template':
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
 
                        case 'input':
                                if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
@@ -2777,7 +2978,17 @@ class Balancer {
                                $this->stack->pop();
                                return true;
 
-                       # OMITTED: <form>
+                       case 'form':
+                               if (
+                                       $this->formElementPointer ||
+                                       $this->stack->indexOf( 'template' ) >= 0
+                               ) {
+                                       return true; // ignore this token
+                               }
+                               $this->formElementPointer =
+                                       $this->stack->insertHTMLElement( $value, $attribs );
+                               $this->stack->popTag( $this->formElementPointer );
+                               return true;
                        }
                        // Fall through for "anything else" clause.
                } elseif ( $token === 'endtag' ) {
@@ -2789,11 +3000,11 @@ class Balancer {
                                $this->stack->popTag( $value );
                                $this->resetInsertionMode();
                                return true;
-                       # OMITTED: <body>
+                       // OMITTED: <body>
                        case 'caption':
                        case 'col':
                        case 'colgroup':
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                        case 'tbody':
                        case 'td':
                        case 'tfoot':
@@ -2802,18 +3013,21 @@ class Balancer {
                        case 'tr':
                                return true; // Ignore the token.
                        case 'template':
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
                        }
                        // Fall through for "anything else" clause.
+               } elseif ( $token === 'comment' ) {
+                       $this->stack->insertComment( $value );
+                       return true;
                }
                // This is the "anything else" case:
                $this->stack->fosterParentMode = true;
-               $this->inBodyMode( $token, $value, $attribs, $selfclose );
+               $this->inBodyMode( $token, $value, $attribs, $selfClose );
                $this->stack->fosterParentMode = false;
                return true;
        }
 
-       private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inTableTextMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        $this->pendingTableText .= $value;
                        return true;
@@ -2831,7 +3045,7 @@ class Balancer {
                        $this->stack->insertText( $text );
                }
                return $this->switchModeAndReprocess(
-                       $this->originalInsertionMode, $token, $value, $attribs, $selfclose
+                       $this->originalInsertionMode, $token, $value, $attribs, $selfClose
                );
        }
 
@@ -2847,7 +3061,7 @@ class Balancer {
                return true;
        }
 
-       private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inCaptionMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'tag' ) {
                        switch ( $value ) {
                        case 'caption':
@@ -2860,7 +3074,7 @@ class Balancer {
                        case 'thead':
                        case 'tr':
                                if ( $this->endCaption() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        }
@@ -2872,13 +3086,13 @@ class Balancer {
                                return true;
                        case 'table':
                                if ( $this->endCaption() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        case 'body':
                        case 'col':
                        case 'colgroup':
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                        case 'tbody':
                        case 'td':
                        case 'tfoot':
@@ -2891,10 +3105,10 @@ class Balancer {
                        // Fall through to "anything else" case.
                }
                // The Anything Else case
-               return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+               return $this->inBodyMode( $token, $value, $attribs, $selfClose );
        }
 
-       private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inColumnGroupMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'text' ) {
                        if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
                                $this->stack->insertText( $matches[0] );
@@ -2906,13 +3120,13 @@ class Balancer {
                        // Fall through to handle non-whitespace below.
                } elseif ( $token === 'tag' ) {
                        switch ( $value ) {
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                        case 'col':
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->stack->pop();
                                return true;
                        case 'template':
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
                        }
                        // Fall through for "anything else".
                } elseif ( $token === 'endtag' ) {
@@ -2927,11 +3141,14 @@ class Balancer {
                        case 'col':
                                return true; // Ignore the token.
                        case 'template':
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
                        }
                        // Fall through for "anything else".
                } elseif ( $token === 'eof' ) {
-                       return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+                       return $this->inBodyMode( $token, $value, $attribs, $selfClose );
+               } elseif ( $token === 'comment' ) {
+                       $this->stack->insertComment( $value );
+                       return true;
                }
 
                // Anything else
@@ -2939,7 +3156,7 @@ class Balancer {
                        return true; // Ignore the token.
                }
                $this->inColumnGroupMode( 'endtag', 'colgroup' );
-               return $this->insertToken( $token, $value, $attribs, $selfclose );
+               return $this->insertToken( $token, $value, $attribs, $selfClose );
        }
 
        // Helper function for inTableBodyMode
@@ -2956,7 +3173,7 @@ class Balancer {
                $this->switchMode( 'inTableMode' );
                return true;
        }
-       private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inTableBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'tag' ) {
                        switch ( $value ) {
                        case 'tr':
@@ -2967,7 +3184,7 @@ class Balancer {
                        case 'th':
                        case 'td':
                                $this->inTableBodyMode( 'tag', 'tr', [] );
-                               $this->insertToken( $token, $value, $attribs, $selfclose );
+                               $this->insertToken( $token, $value, $attribs, $selfClose );
                                return true;
                        case 'caption':
                        case 'col':
@@ -2976,7 +3193,7 @@ class Balancer {
                        case 'tfoot':
                        case 'thead':
                                if ( $this->endSection() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        }
@@ -2984,7 +3201,7 @@ class Balancer {
                        switch ( $value ) {
                        case 'table':
                                if ( $this->endSection() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        case 'tbody':
@@ -2994,11 +3211,11 @@ class Balancer {
                                        $this->endSection();
                                }
                                return true;
-                       # OMITTED: <body>
+                       // OMITTED: <body>
                        case 'caption':
                        case 'col':
                        case 'colgroup':
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                        case 'td':
                        case 'th':
                        case 'tr':
@@ -3006,7 +3223,7 @@ class Balancer {
                        }
                }
                // Anything else:
-               return $this->inTableMode( $token, $value, $attribs, $selfclose );
+               return $this->inTableMode( $token, $value, $attribs, $selfClose );
        }
 
        // Helper function for inRowMode
@@ -3019,7 +3236,7 @@ class Balancer {
                $this->switchMode( 'inTableBodyMode' );
                return true;
        }
-       private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inRowMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'tag' ) {
                        switch ( $value ) {
                        case 'th':
@@ -3037,7 +3254,7 @@ class Balancer {
                        case 'thead':
                        case 'tr':
                                if ( $this->endRow() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        }
@@ -3048,7 +3265,7 @@ class Balancer {
                                return true;
                        case 'table':
                                if ( $this->endRow() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        case 'tbody':
@@ -3058,21 +3275,21 @@ class Balancer {
                                        $this->stack->inTableScope( $value ) &&
                                        $this->endRow()
                                ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
-                       # OMITTED: <body>
+                       // OMITTED: <body>
                        case 'caption':
                        case 'col':
                        case 'colgroup':
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                        case 'td':
                        case 'th':
                                return true; // Ignore the token.
                        }
                }
                // Anything else:
-               return $this->inTableMode( $token, $value, $attribs, $selfclose );
+               return $this->inTableMode( $token, $value, $attribs, $selfClose );
        }
 
        // Helper for inCellMode
@@ -3087,7 +3304,7 @@ class Balancer {
                        return false;
                }
        }
-       private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
+       private function inCellMode( $token, $value, $attribs = null, $selfClose = false ) {
                if ( $token === 'tag' ) {
                        switch ( $value ) {
                        case 'caption':
@@ -3100,7 +3317,7 @@ class Balancer {
                        case 'thead':
                        case 'tr':
                                if ( $this->endCell() ) {
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        }
@@ -3115,11 +3332,11 @@ class Balancer {
                                        $this->switchMode( 'inRowMode' );
                                }
                                return true;
-                       # OMITTED: <body>
+                       // OMITTED: <body>
                        case 'caption':
                        case 'col':
                        case 'colgroup':
-                       # OMITTED: <html>
+                       // OMITTED: <html>
                                return true;
 
                        case 'table':
@@ -3132,29 +3349,119 @@ class Balancer {
                                        $this->stack->popTag( BalanceSets::$tableCellSet );
                                        $this->afe->clearToMarker();
                                        $this->switchMode( 'inRowMode' );
-                                       $this->insertToken( $token, $value, $attribs, $selfclose );
+                                       $this->insertToken( $token, $value, $attribs, $selfClose );
                                }
                                return true;
                        }
                }
                // Anything else:
-               return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+               return $this->inBodyMode( $token, $value, $attribs, $selfClose );
        }
 
-       # OMITTED: <select>
-       /*
-       private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
-               Assert::invariant( false, 'Unimplemented' );
+       private function inSelectMode( $token, $value, $attribs = null, $selfClose = false ) {
+               if ( $token === 'text' ) {
+                       $this->stack->insertText( $value );
+                       return true;
+               } elseif ( $token === 'eof' ) {
+                       return $this->inBodyMode( $token, $value, $attribs, $selfClose );
+               } elseif ( $token === 'tag' ) {
+                       switch ( $value ) {
+                       // OMITTED: <html>
+                       case 'option':
+                               if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
+                                       $this->stack->pop();
+                               }
+                               $this->stack->insertHTMLElement( $value, $attribs );
+                               return true;
+                       case 'optgroup':
+                               if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
+                                       $this->stack->pop();
+                               }
+                               if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
+                                       $this->stack->pop();
+                               }
+                               $this->stack->insertHTMLElement( $value, $attribs );
+                               return true;
+                       case 'select':
+                               $this->inSelectMode( 'endtag', $value ); // treat it like endtag
+                               return true;
+                       case 'input':
+                       case 'keygen':
+                       case 'textarea':
+                               if ( !$this->stack->inSelectScope( 'select' ) ) {
+                                       return true; // ignore token (fragment case)
+                               }
+                               $this->inSelectMode( 'endtag', 'select' );
+                               return $this->insertToken( $token, $value, $attribs, $selfClose );
+                       case 'script':
+                       case 'template':
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
+                       }
+               } elseif ( $token === 'endtag' ) {
+                       switch ( $value ) {
+                       case 'optgroup':
+                               if (
+                                       $this->stack->currentNode->isHtmlNamed( 'option' ) &&
+                                       $this->stack->length() >= 2 &&
+                                       $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
+                               ) {
+                                       $this->stack->pop();
+                               }
+                               if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
+                                       $this->stack->pop();
+                               }
+                               return true;
+                       case 'option':
+                               if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
+                                       $this->stack->pop();
+                               }
+                               return true;
+                       case 'select':
+                               if ( !$this->stack->inSelectScope( $value ) ) {
+                                       return true; // fragment case
+                               }
+                               $this->stack->popTag( $value );
+                               $this->resetInsertionMode();
+                               return true;
+                       case 'template':
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
+                       }
+               } elseif ( $token === 'comment' ) {
+                       $this->stack->insertComment( $value );
+                       return true;
+               }
+               // anything else: just ignore the token
+               return true;
        }
 
-       private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
-               Assert::invariant( false, 'Unimplemented' );
+       private function inSelectInTableMode( $token, $value, $attribs = null, $selfClose = false ) {
+               switch ( $value ) {
+               case 'caption':
+               case 'table':
+               case 'tbody':
+               case 'tfoot':
+               case 'thead':
+               case 'tr':
+               case 'td':
+               case 'th':
+                       if ( $token === 'tag' ) {
+                               $this->inSelectInTableMode( 'endtag', 'select' );
+                               return $this->insertToken( $token, $value, $attribs, $selfClose );
+                       } elseif ( $token === 'endtag' ) {
+                               if ( $this->stack->inTableScope( $value ) ) {
+                                       $this->inSelectInTableMode( 'endtag', 'select' );
+                                       return $this->insertToken( $token, $value, $attribs, $selfClose );
+                               }
+                               return true;
+                       }
+               }
+               // anything else
+               return $this->inSelectMode( $token, $value, $attribs, $selfClose );
        }
-       */
 
-       private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
-               if ( $token === 'text' ) {
-                       return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+       private function inTemplateMode( $token, $value, $attribs = null, $selfClose = false ) {
+               if ( $token === 'text' || $token === 'comment' ) {
+                       return $this->inBodyMode( $token, $value, $attribs, $selfClose );
                } elseif ( $token === 'eof' ) {
                        if ( $this->stack->indexOf( 'template' ) < 0 ) {
                                $this->stopParsing();
@@ -3163,7 +3470,7 @@ class Balancer {
                                $this->afe->clearToMarker();
                                array_pop( $this->templateInsertionModes );
                                $this->resetInsertionMode();
-                               $this->insertToken( $token, $value, $attribs, $selfclose );
+                               $this->insertToken( $token, $value, $attribs, $selfClose );
                        }
                        return true;
                } elseif ( $token === 'tag' ) {
@@ -3174,11 +3481,11 @@ class Balancer {
                        case 'link':
                        case 'meta':
                        case 'noframes':
-                       # OMITTED: <script>
+                       // OMITTED: <script>
                        case 'style':
                        case 'template':
-                       # OMITTED: <title>
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                       // OMITTED: <title>
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
 
                        case 'caption':
                        case 'colgroup':
@@ -3186,32 +3493,32 @@ class Balancer {
                        case 'tfoot':
                        case 'thead':
                                return $this->switchModeAndReprocess(
-                                       'inTableMode', $token, $value, $attribs, $selfclose
+                                       'inTableMode', $token, $value, $attribs, $selfClose
                                );
 
                        case 'col':
                                return $this->switchModeAndReprocess(
-                                       'inColumnGroupMode', $token, $value, $attribs, $selfclose
+                                       'inColumnGroupMode', $token, $value, $attribs, $selfClose
                                );
 
                        case 'tr':
                                return $this->switchModeAndReprocess(
-                                       'inTableBodyMode', $token, $value, $attribs, $selfclose
+                                       'inTableBodyMode', $token, $value, $attribs, $selfClose
                                );
 
                        case 'td':
                        case 'th':
                                return $this->switchModeAndReprocess(
-                                       'inRowMode', $token, $value, $attribs, $selfclose
+                                       'inRowMode', $token, $value, $attribs, $selfClose
                                );
                        }
                        return $this->switchModeAndReprocess(
-                               'inBodyMode', $token, $value, $attribs, $selfclose
+                               'inBodyMode', $token, $value, $attribs, $selfClose
                        );
                } elseif ( $token === 'endtag' ) {
                        switch ( $value ) {
                        case 'template':
-                               return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+                               return $this->inHeadMode( $token, $value, $attribs, $selfClose );
                        }
                        return true;
                } else {