From: C. Scott Ananian Date: Thu, 14 Jul 2016 21:00:22 +0000 (-0400) Subject: Support tags in Balancer. Change-Id: Ibc346624a9d035c98a29132a541e7ed6d82b364e --- diff --git a/includes/tidy/Balancer.php b/includes/tidy/Balancer.php index 4bcaf1a064..1ad8a4a533 100644 --- a/includes/tidy/Balancer.php +++ b/includes/tidy/Balancer.php @@ -72,7 +72,7 @@ class BalanceSets { 'form' => true, 'frame' => true, 'plaintext' => true, 'isindex' => true, 'textarea' => true, 'xmp' => true, 'iframe' => true, 'noembed' => true, - 'noscript' => true, 'select' => true, 'script' => true, + 'noscript' => true, 'script' => true, 'title' => true ] ]; @@ -228,6 +228,12 @@ class BalanceSets { ] ]; + public static $inInvertedSelectScopeSet = [ + self::HTML_NAMESPACE => [ + 'option' => true, 'optgroup' => true + ] + ]; + public static $mathmlTextIntegrationPointSet = [ self::MATHML_NAMESPACE => [ 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true, @@ -784,6 +790,26 @@ class BalanceStack implements IteratorAggregate { return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet ); } + /** + * Determine if the stack has $tag in select scope. + * @param BalanceElement|array|string $tag + * @return bool + * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-select-scope + */ + public function inSelectScope( $tag ) { + // Can't use inSpecificScope to implement this, since it involves + // *inverting* a set of tags. Implement manually. + foreach ( $this as $elt ) { + if ( $elt->isA( $tag ) ) { + return true; + } + if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) { + return false; + } + } + return false; + } + /** * Determine if the stack has $tag in a specific scope, $set. * @param BalanceElement|array|string $tag @@ -1698,7 +1724,7 @@ class BalanceActiveFormattingElements { * - We don't alter linefeeds after
/.
  * - The following elements are disallowed: , , , ,
  *   
, , , <isindex>, <textarea>, <xmp>, <iframe>, - * <noembed>, <noscript>, <select>, <script>, <title>. As a result, + * <noembed>, <noscript>, <script>, <title>. As a result, * further simplifications can be made: * - `frameset-ok` is not tracked. * - `form element pointer` is not tracked. @@ -2076,8 +2102,6 @@ class Balancer { } if ( $node->isHtml() ) { switch ( $node->localName ) { - # OMITTED: <select> - /* case 'select': $stacklen = $this->stack->length(); for ( $j = $i + 1; $j < $stacklen-1; $j++ ) { @@ -2092,7 +2116,6 @@ class Balancer { } $this->switchMode( 'inSelectMode' ); return; - */ case 'tr': $this->switchMode( 'inRowMode' ); return; @@ -2489,8 +2512,6 @@ class Balancer { # OMITTED: <noembed> # OMITTED: <noscript> - # OMITTED: <select> - /* case 'select': $this->afe->reconstruct( $this->stack ); $this->stack->insertHTMLElement( $value, $attribs ); @@ -2506,7 +2527,6 @@ class Balancer { $this->switchMode( 'inSelectMode' ); return true; } - */ case 'optgroup': case 'option': @@ -3143,16 +3163,103 @@ class Balancer { return $this->inBodyMode( $token, $value, $attribs, $selfclose ); } - # OMITTED: <select> - /* private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) { - Assert::invariant( false, 'Unimplemented' ); + if ( $token === 'text' ) { + $this->stack->insertText( $value ); + return true; + } elseif ( $token === 'eof' ) { + return $this->inBodyMode( $token, $value, $attribs, $selfclose ); + } elseif ( $token === 'tag' ) { + switch ( $value ) { + # OMITTED: <html> + case 'option': + if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) { + $this->stack->pop(); + } + $this->stack->insertHTMLElement( $value, $attribs ); + return true; + case 'optgroup': + if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) { + $this->stack->pop(); + } + if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) { + $this->stack->pop(); + } + $this->stack->insertHTMLElement( $value, $attribs ); + return true; + case 'select': + $this->inSelectMode( 'endtag', $value ); // treat it like endtag + return true; + case 'input': + case 'keygen': + case 'textarea': + if ( !$this->stack->inSelectScope( 'select' ) ) { + return true; // ignore token (fragment case) + } + $this->inSelectMode( 'endtag', 'select' ); + return $this->insertToken( $token, $value, $attribs, $selfclose ); + case 'script': + case 'template': + return $this->inHeadMode( $token, $value, $attribs, $selfclose ); + } + } elseif ( $token === 'endtag' ) { + switch ( $value ) { + case 'optgroup': + if ( + $this->stack->currentNode->isHtmlNamed( 'option' ) && + $this->stack->length() >= 2 && + $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' ) + ) { + $this->stack->pop(); + } + if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) { + $this->stack->pop(); + } + return true; + case 'option': + if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) { + $this->stack->pop(); + } + return true; + case 'select': + if ( !$this->stack->inSelectScope( $value ) ) { + return true; // fragment case + } + $this->stack->popTag( $value ); + $this->resetInsertionMode(); + return true; + case 'template': + return $this->inHeadMode( $token, $value, $attribs, $selfclose ); + } + } + // anything else: just ignore the token + return true; } private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) { - Assert::invariant( false, 'Unimplemented' ); + switch ( $value ) { + case 'caption': + case 'table': + case 'tbody': + case 'tfoot': + case 'thead': + case 'tr': + case 'td': + case 'th': + if ( $token === 'tag' ) { + $this->inSelectInTableMode( 'endtag', 'select' ); + return $this->insertToken( $token, $value, $attribs, $selfclose ); + } elseif ( $token === 'endtag' ) { + if ( $this->stack->inTableScope( $value ) ) { + $this->inSelectInTableMode( 'endtag', 'select' ); + return $this->insertToken( $token, $value, $attribs, $selfclose ); + } + return true; + } + } + // anything else + return $this->inSelectMode( $token, $value, $attribs, $selfclose ); } - */ private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) { if ( $token === 'text' ) { diff --git a/tests/phpunit/includes/tidy/BalancerTest.php b/tests/phpunit/includes/tidy/BalancerTest.php index 7b94e40417..50931c7913 100644 --- a/tests/phpunit/includes/tidy/BalancerTest.php +++ b/tests/phpunit/includes/tidy/BalancerTest.php @@ -94,7 +94,6 @@ class BalancerTest extends MediaWikiTestCase { isset( $case['document']['props']['tags']['noembed'] ) || isset( $case['document']['props']['tags']['noscript'] ) || isset( $case['document']['props']['tags']['script'] ) || - isset( $case['document']['props']['tags']['select'] ) || isset( $case['document']['props']['tags']['svg script'] ) || isset( $case['document']['props']['tags']['svg title'] ) || isset( $case['document']['props']['tags']['textarea'] ) ||