self::HTML_NAMESPACE => [
'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
'frame' => true,
- 'plaintext' => true, 'isindex' => true,
+ 'plaintext' => true,
'xmp' => true, 'iframe' => true, 'noembed' => true,
'noscript' => true, 'script' => true,
'title' => true
'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
- 'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
+ 'input' => true, 'li' => true, 'link' => true,
'listing' => true, 'main' => true, 'marquee' => true,
- 'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
+ 'menu' => true, 'meta' => true, 'nav' => true,
'noembed' => true, 'noframes' => true, 'noscript' => true,
'object' => true, 'ol' => true, 'p' => true, 'param' => true,
'plaintext' => true, 'pre' => true, 'script' => true,
public static $impliedEndTagsSet = [
self::HTML_NAMESPACE => [
- 'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
+ 'dd' => true, 'dt' => true, 'li' => true,
+ 'menuitem' => true, 'optgroup' => true,
'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
'rt' => true, 'rtc' => true
]
/**
* Parent of this element, or the string "flat" if this element has
* already been flattened into its parent.
- * @var string|null $parent
+ * @var BalanceElement|string|null $parent
*/
public $parent;
* child will be an actual BalanceElement object; the rest will
* be strings, representing either text nodes or flattened
* BalanceElement objects.
- * @var array $children
+ * @var BalanceElement[]|string[] $children
*/
public $children;
* by the HTML serialization specification, and replace this node
* in its parent by that string.
*
+ * @param array $config Balancer configuration; see Balancer::__construct().
+ * @return string
+ *
* @see __toString()
*/
- public function flatten( $tidyCompat = false ) {
+ public function flatten( array $config ) {
Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
$idx = array_search( $this, $this->parent->children, true );
Assert::parameter(
$idx !== false, '$this', 'must be a child of its parent'
);
+ $tidyCompat = $config['tidyCompat'];
if ( $tidyCompat ) {
$blank = true;
foreach ( $this->children as $elt ) {
if ( !is_string( $elt ) ) {
- $elt = $elt->flatten( $tidyCompat );
+ $elt = $elt->flatten( $config );
}
if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
$blank = false;
$this->attribs = [ 'class' => "mw-empty-elt" ];
}
$blank = false;
+ } elseif (
+ $this->isA( BalanceSets::$extraLinefeedSet ) &&
+ count( $this->children ) > 0 &&
+ substr( $this->children[0], 0, 1 ) == "\n"
+ ) {
+ // Double the linefeed after pre/listing/textarea
+ // according to the (old) HTML5 fragment serialization
+ // algorithm (see https://github.com/whatwg/html/issues/944)
+ // to ensure this will round-trip.
+ array_unshift( $this->children, "\n" );
}
$flat = $blank ? '' : "{$this}";
} else {
$out .= "{$elt}";
}
$out .= "</{$this->localName}>";
- if (
- $this->isA( BalanceSets::$extraLinefeedSet ) &&
- $out[$len] === "\n"
- ) {
- // Double the linefeed after pre/listing/textarea
- // according to the HTML5 fragment serialization algorithm.
- $out = substr( $out, 0, $len + 1 ) .
- substr( $out, $len );
- }
} else {
$out = "<{$this->localName}{$encAttribs} />";
Assert::invariant(
class BalanceStack implements IteratorAggregate {
/**
* Backing storage for the stack.
- * @var array $elements
+ * @var BalanceElement[] $elements
*/
private $elements = [];
/**
*/
public $fosterParentMode = false;
/**
- * Tidy compatibility mode, determines behavior of body/blockquote
+ * Configuration options governing flattening.
+ * @var array $config
+ * @see Balancer::__construct()
*/
- public $tidyCompat = false;
+ private $config;
/**
* Reference to the current element
*/
/**
* Create a new BalanceStack with a single BalanceElement on it,
* representing the root <html> node.
+ * @param array $config Balancer configuration; see Balancer::_construct().
*/
- public function __construct() {
+ public function __construct( array $config ) {
// always a root <html> element on the stack
array_push(
$this->elements,
new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
);
$this->currentNode = $this->elements[0];
+ $this->config = $config;
}
/**
$out = '';
foreach ( $this->elements[0]->children as $elt ) {
$out .= is_string( $elt ) ? $elt :
- $elt->flatten( $this->tidyCompat );
+ $elt->flatten( $this->config );
}
return $out;
}
/**
* Insert text at the appropriate place for inserting a node.
* @param string $value
+ * @param bool $isComment
* @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
*/
public function insertText( $value, $isComment = false ) {
) {
$this->fosterParent( $value );
} elseif (
- $this->tidyCompat && !$isComment &&
+ $this->config['tidyCompat'] && !$isComment &&
$this->currentNode->isA( BalanceSets::$tidyPWrapSet )
) {
- $this->insertHTMLELement( 'mw:p-wrap', [] );
+ $this->insertHTMLElement( 'mw:p-wrap', [] );
return $this->insertText( $value );
} else {
$this->currentNode->appendChild( $value );
/**
* Return an iterator over this stack which visits the current node
* first, and the root node last.
- * @return Iterator
+ * @return \Iterator
*/
public function getIterator() {
return new ReverseArrayIterator( $this->elements );
$this->currentNode = null;
}
if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
- $elt->flatten( $this->tidyCompat );
+ $elt->flatten( $this->config );
}
}
* @param int $idx
*/
public function popTo( $idx ) {
- $length = count( $this->elements );
for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
$this->pop();
}
// otherwise, it will eventually serialize when the parent
// is serialized, we just hold onto the memory for its
// tree of objects a little longer.
- $elt->flatten( $this->tidyCompat );
+ $elt->flatten( $this->config );
}
Assert::postcondition(
array_search( $elt, $this->elements, true ) === false,
/**
* Foster parent the given $elt in the stack of open elements.
* @param BalanceElement|string $elt
+ * @return BalanceElement|string
+ *
* @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
*/
private function fosterParent( $elt ) {
$parent = $this->elements[0]; // the `html` element.
}
- if ( $this->tidyCompat ) {
+ if ( $this->config['tidyCompat'] ) {
if ( is_string( $elt ) ) {
// We're fostering text: do we need a p-wrapper?
if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
return true; // no more handling required
}
- // Let outer loop counter be zero.
- $outer = 0;
-
// Outer loop: If outer loop counter is greater than or
// equal to eight, then abort these steps.
- while ( $outer < 8 ) {
- // Increment outer loop counter by one.
- $outer++;
-
+ for ( $outer = 0; $outer < 8; $outer++ ) {
// Let the formatting element be the last element in the list
// of active formatting elements that: is between the end of
// the list and the last scope marker in the list, if any, or
// the start of the list otherwise, and has the same tag name
// as the token.
- $fmtelt = $afe->findElementByTag( $tag );
+ $fmtElt = $afe->findElementByTag( $tag );
// If there is no such node, then abort these steps and instead
// act as described in the "any other end tag" entry below.
- if ( !$fmtelt ) {
+ if ( !$fmtElt ) {
return false; // false means handle by the default case
}
// Otherwise, if there is such a node, but that node is not in
// the stack of open elements, then this is a parse error;
// remove the element from the list, and abort these steps.
- $index = $this->indexOf( $fmtelt );
+ $index = $this->indexOf( $fmtElt );
if ( $index < 0 ) {
- $afe->remove( $fmtelt );
+ $afe->remove( $fmtElt );
return true; // true means no more handling required
}
// the stack of open elements, but the element is not in scope,
// then this is a parse error; ignore the token, and abort
// these steps.
- if ( !$this->inScope( $fmtelt ) ) {
+ if ( !$this->inScope( $fmtElt ) ) {
return true;
}
// open elements that is lower in the stack than the formatting
// element, and is an element in the special category. There
// might not be one.
- $furthestblock = null;
- $furthestblockindex = -1;
- $stacklen = $this->length();
- for ( $i = $index+1; $i < $stacklen; $i++ ) {
+ $furthestBlock = null;
+ $furthestBlockIndex = -1;
+ $stackLength = $this->length();
+ for ( $i = $index+1; $i < $stackLength; $i++ ) {
if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
- $furthestblock = $this->node( $i );
- $furthestblockindex = $i;
+ $furthestBlock = $this->node( $i );
+ $furthestBlockIndex = $i;
break;
}
}
// up to and including the formatting element, and remove the
// formatting element from the list of active formatting
// elements.
- if ( !$furthestblock ) {
- $this->popTag( $fmtelt );
- $afe->remove( $fmtelt );
- return true;
- } else {
- // Let the common ancestor be the element immediately above
- // the formatting element in the stack of open elements.
- $ancestor = $this->node( $index-1 );
-
- // Let a bookmark note the position of the formatting
- // element in the list of active formatting elements
- // relative to the elements on either side of it in the
- // list.
- $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
- $afe->insertAfter( $fmtelt, $BOOKMARK );
-
- // Let node and last node be the furthest block.
- $node = $furthestblock;
- $lastnode = $furthestblock;
- $nodeindex = $furthestblockindex;
- $isAFE = false;
-
- // Let inner loop counter be zero.
- $inner = 0;
-
- while ( true ) {
-
- // Increment inner loop counter by one.
- $inner++;
-
- // Let node be the element immediately above node in
- // the stack of open elements, or if node is no longer
- // in the stack of open elements (e.g. because it got
- // removed by this algorithm), the element that was
- // immediately above node in the stack of open elements
- // before node was removed.
- $node = $this->node( --$nodeindex );
-
- // If node is the formatting element, then go
- // to the next step in the overall algorithm.
- if ( $node === $fmtelt ) break;
-
- // If the inner loop counter is greater than three and node
- // is in the list of active formatting elements, then remove
- // node from the list of active formatting elements.
- $isAFE = $afe->isInList( $node );
- if ( $inner > 3 && $isAFE ) {
- $afe->remove( $node );
- $isAFE = false;
- }
-
- // If node is not in the list of active formatting
- // elements, then remove node from the stack of open
- // elements and then go back to the step labeled inner
- // loop.
- if ( !$isAFE ) {
- // Don't flatten here, since we're about to relocate
- // parts of this $node.
- $this->removeElement( $node, false );
- continue;
- }
-
- // Create an element for the token for which the
- // element node was created with common ancestor as
- // the intended parent, replace the entry for node
- // in the list of active formatting elements with an
- // entry for the new element, replace the entry for
- // node in the stack of open elements with an entry for
- // the new element, and let node be the new element.
- $newelt = new BalanceElement(
- $node->namespaceURI, $node->localName, $node->attribs );
- $afe->replace( $node, $newelt );
- $this->replaceAt( $nodeindex, $newelt );
- $node = $newelt;
-
- // If last node is the furthest block, then move the
- // aforementioned bookmark to be immediately after the
- // new node in the list of active formatting elements.
- if ( $lastnode === $furthestblock ) {
- $afe->remove( $BOOKMARK );
- $afe->insertAfter( $newelt, $BOOKMARK );
- }
-
- // Insert last node into node, first removing it from
- // its previous parent node if any.
- $node->appendChild( $lastnode );
-
- // Let last node be node.
- $lastnode = $node;
- }
-
- // If the common ancestor node is a table, tbody, tfoot,
- // thead, or tr element, then, foster parent whatever last
- // node ended up being in the previous step, first removing
- // it from its previous parent node if any.
- if (
- $this->fosterParentMode &&
- $ancestor->isA( BalanceSets::$tableSectionRowSet )
- ) {
- $this->fosterParent( $lastnode );
- } else {
- // Otherwise, append whatever last node ended up being in
- // the previous step to the common ancestor node, first
- // removing it from its previous parent node if any.
- $ancestor->appendChild( $lastnode );
+ if ( !$furthestBlock ) {
+ $this->popTag( $fmtElt );
+ $afe->remove( $fmtElt );
+ return true;
+ }
+
+ // Let the common ancestor be the element immediately above
+ // the formatting element in the stack of open elements.
+ $ancestor = $this->node( $index-1 );
+
+ // Let a bookmark note the position of the formatting
+ // element in the list of active formatting elements
+ // relative to the elements on either side of it in the
+ // list.
+ $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
+ $afe->insertAfter( $fmtElt, $BOOKMARK );
+
+ // Let node and last node be the furthest block.
+ $node = $furthestBlock;
+ $lastNode = $furthestBlock;
+ $nodeIndex = $furthestBlockIndex;
+ $isAFE = false;
+
+ // Inner loop
+ for ( $inner = 1; true; $inner++ ) {
+ // Let node be the element immediately above node in
+ // the stack of open elements, or if node is no longer
+ // in the stack of open elements (e.g. because it got
+ // removed by this algorithm), the element that was
+ // immediately above node in the stack of open elements
+ // before node was removed.
+ $node = $this->node( --$nodeIndex );
+
+ // If node is the formatting element, then go
+ // to the next step in the overall algorithm.
+ if ( $node === $fmtElt ) break;
+
+ // If the inner loop counter is greater than three and node
+ // is in the list of active formatting elements, then remove
+ // node from the list of active formatting elements.
+ $isAFE = $afe->isInList( $node );
+ if ( $inner > 3 && $isAFE ) {
+ $afe->remove( $node );
+ $isAFE = false;
+ }
+
+ // If node is not in the list of active formatting
+ // elements, then remove node from the stack of open
+ // elements and then go back to the step labeled inner
+ // loop.
+ if ( !$isAFE ) {
+ // Don't flatten here, since we're about to relocate
+ // parts of this $node.
+ $this->removeElement( $node, false );
+ continue;
}
// Create an element for the token for which the
- // formatting element was created, with furthest block
- // as the intended parent.
- $newelt2 = new BalanceElement(
- $fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
+ // element node was created with common ancestor as
+ // the intended parent, replace the entry for node
+ // in the list of active formatting elements with an
+ // entry for the new element, replace the entry for
+ // node in the stack of open elements with an entry for
+ // the new element, and let node be the new element.
+ $newElt = new BalanceElement(
+ $node->namespaceURI, $node->localName, $node->attribs );
+ $afe->replace( $node, $newElt );
+ $this->replaceAt( $nodeIndex, $newElt );
+ $node = $newElt;
+
+ // If last node is the furthest block, then move the
+ // aforementioned bookmark to be immediately after the
+ // new node in the list of active formatting elements.
+ if ( $lastNode === $furthestBlock ) {
+ $afe->remove( $BOOKMARK );
+ $afe->insertAfter( $newElt, $BOOKMARK );
+ }
+
+ // Insert last node into node, first removing it from
+ // its previous parent node if any.
+ $node->appendChild( $lastNode );
+
+ // Let last node be node.
+ $lastNode = $node;
+ }
+
+ // If the common ancestor node is a table, tbody, tfoot,
+ // thead, or tr element, then, foster parent whatever last
+ // node ended up being in the previous step, first removing
+ // it from its previous parent node if any.
+ if (
+ $this->fosterParentMode &&
+ $ancestor->isA( BalanceSets::$tableSectionRowSet )
+ ) {
+ $this->fosterParent( $lastNode );
+ } else {
+ // Otherwise, append whatever last node ended up being in
+ // the previous step to the common ancestor node, first
+ // removing it from its previous parent node if any.
+ $ancestor->appendChild( $lastNode );
+ }
- // Take all of the child nodes of the furthest block and
- // append them to the element created in the last step.
- $newelt2->adoptChildren( $furthestblock );
+ // Create an element for the token for which the
+ // formatting element was created, with furthest block
+ // as the intended parent.
+ $newElt2 = new BalanceElement(
+ $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
- // Append that new element to the furthest block.
- $furthestblock->appendChild( $newelt2 );
+ // Take all of the child nodes of the furthest block and
+ // append them to the element created in the last step.
+ $newElt2->adoptChildren( $furthestBlock );
- // Remove the formatting element from the list of active
- // formatting elements, and insert the new element into the
- // list of active formatting elements at the position of
- // the aforementioned bookmark.
- $afe->remove( $fmtelt );
- $afe->replace( $BOOKMARK, $newelt2 );
+ // Append that new element to the furthest block.
+ $furthestBlock->appendChild( $newElt2 );
- // Remove the formatting element from the stack of open
- // elements, and insert the new element into the stack of
- // open elements immediately below the position of the
- // furthest block in that stack.
- $this->removeElement( $fmtelt );
- $this->insertAfter( $furthestblock, $newelt2 );
- }
+ // Remove the formatting element from the list of active
+ // formatting elements, and insert the new element into the
+ // list of active formatting elements at the position of
+ // the aforementioned bookmark.
+ $afe->remove( $fmtElt );
+ $afe->replace( $BOOKMARK, $newElt2 );
+
+ // Remove the formatting element from the stack of open
+ // elements, and insert the new element into the stack of
+ // open elements immediately below the position of the
+ // furthest block in that stack.
+ $this->removeElement( $fmtElt );
+ $this->insertAfter( $furthestBlock, $newElt2 );
}
return true;
private $noahTableStack = [ [] ];
public function __destruct() {
+ $next = null;
for ( $node = $this->head; $node; $node = $next ) {
$next = $node->nextAFE;
$node->prevAFE = $node->nextAFE = $node->nextNoah = null;
/**
* Determine whether an element is in the list of formatting elements.
+ * @param BalanceElement $elt
* @return boolean
*/
public function isInList( BalanceElement $elt ) {
/**
* Find the element $elt in the list and remove it.
* Used when parsing <a> in body mode.
+ *
+ * @param BalanceElement $elt
*/
public function remove( BalanceElement $elt ) {
if ( $this->head !== $elt && !$elt->prevAFE ) {
/**
* Find element $a in the list and replace it with element $b
+ *
+ * @param BalanceElement $a
+ * @param BalanceElement $b
*/
public function replace( BalanceElement $a, BalanceElement $b ) {
if ( $this->head !== $a && !$a->prevAFE ) {
/**
* Find $a in the list and insert $b after it.
+
+ * @param BalanceElement $a
+ * @param BalanceElement $b
*/
public function insertAfter( BalanceElement $a, BalanceElement $b ) {
if ( $this->head !== $a && !$a->prevAFE ) {
// Loop backward through the list until we find a marker or an
// open element
- $foundit = false;
+ $foundIt = false;
while ( $entry->prevAFE ) {
$entry = $entry->prevAFE;
if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
- $foundit = true;
+ $foundIt = true;
break;
}
}
// the first element if we didn't find a marker or open element),
// recreating formatting elements and pushing them back onto the list
// of open elements.
- if ( $foundit ) {
+ if ( $foundIt ) {
$entry = $entry->nextAFE;
}
do {
* and escaped.
* - All null characters are assumed to have been removed.
* - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
- * <frame>, <plaintext>, <isindex>, <xmp>, <iframe>,
+ * <frame>, <plaintext>, <xmp>, <iframe>,
* <noembed>, <noscript>, <script>, <title>. As a result,
* further simplifications can be made:
* - `frameset-ok` is not tracked.
*/
class Balancer {
private $parseMode;
+ /** @var \Iterator */
private $bitsIterator;
private $allowedHtmlElements;
+ /** @var BalanceActiveFormattingElements */
private $afe;
+ /** @var BalanceStack */
private $stack;
private $strict;
- private $tidyCompat;
private $allowComments;
+ private $config;
private $textIntegrationMode;
private $pendingTableText;
private $inRCDATA;
private $inRAWTEXT;
+ /** @var callable|null */
+ private $processingCallback;
+ /** @var array */
+ private $processingArgs;
+
/**
* Valid HTML5 comments.
* Regex borrowed from Tim Starling's "remex-html" project.
*/
const VALID_COMMENT_REGEX = "~ !--
- ( # 1. Comment match detector
+ ( # 1. Comment match detector
> | -> | # Invalid short close
( # 2. Comment contents
(?:
( # 3. Comment close
--> | # Normal close
--!> | # Comment end bang
- ( # 4. Indicate matches requiring EOF
- --! | # EOF in comment end bang state
- -- | # EOF in comment end state
- - | # EOF in comment end dash state
- # EOF in comment state
+ ( # 4. Indicate matches requiring EOF
+ --! | # EOF in comment end bang state
+ -- | # EOF in comment end state
+ - | # EOF in comment end dash state
+ (?#nothing) # EOF in comment state
)
)
)
- ([^<]*) \z # 5. Non-tag text after the comment
+ ([^<]*) \z # 5. Non-tag text after the comment
~xs";
/**
* provide historical compatibility with the old "tidy"
* program: <p>-wrapping is done to the children of
* <body> and <blockquote> elements, and empty elements
- * are removed.
+ * are removed. The <pre>/<listing>/<textarea> serialization
+ * is also tweaked to allow lossless round trips.
+ * (See: https://github.com/whatwg/html/issues/944)
* 'allowComments': boolean, defaults to true.
* When true, allows HTML comments in the input.
* The Sanitizer generally strips all comments, so if you
* false to get a bit more performance.
*/
public function __construct( array $config = [] ) {
- $config = $config + [
+ $this->config = $config = $config + [
'strict' => false,
'allowedHtmlElements' => null,
'tidyCompat' => false,
];
$this->allowedHtmlElements = $config['allowedHtmlElements'];
$this->strict = $config['strict'];
- $this->tidyCompat = $config['tidyCompat'];
$this->allowComments = $config['allowComments'];
if ( $this->allowedHtmlElements !== null ) {
// Sanity check!
$this->parseMode = 'inBodyMode';
$this->bitsIterator = new ExplodeIterator( '<', $text );
$this->afe = new BalanceActiveFormattingElements();
- $this->stack = new BalanceStack();
- $this->stack->tidyCompat = $this->tidyCompat;
+ $this->stack = new BalanceStack( $this->config );
$this->processingCallback = $processingCallback;
$this->processingArgs = $processingArgs;
* Pass a token to the tree builder. The $token will be one of the
* strings "tag", "endtag", or "text".
*/
- private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
+ private function insertToken( $token, $value, $attribs = null, $selfClose = false ) {
// validate tags against $unsupportedSet
if ( $token === 'tag' || $token === 'endtag' ) {
if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
// Some hoops we have to jump through
$adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
+ // The spec calls this the "tree construction dispatcher".
$isForeign = true;
if (
$this->stack->length() === 0 ||
$isForeign = false;
}
if ( $isForeign ) {
- return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
+ return $this->insertForeignToken( $token, $value, $attribs, $selfClose );
} else {
$func = $this->parseMode;
- return $this->$func( $token, $value, $attribs, $selfclose );
+ return $this->$func( $token, $value, $attribs, $selfClose );
}
}
- private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
+ private function insertForeignToken( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
$this->stack->insertText( $value );
return true;
+ } elseif ( $token === 'comment' ) {
+ $this->stack->insertComment( $value );
+ return true;
} elseif ( $token === 'tag' ) {
switch ( $value ) {
case 'font':
break;
}
}
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
// "Any other start tag"
$adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
$this->stack->insertForeignElement(
$adjusted->namespaceURI, $value, $attribs
);
- if ( $selfclose ) {
+ if ( $selfClose ) {
$this->stack->pop();
}
return true;
if ( $node->isHtml() && !$first ) {
// process the end tag as HTML
$func = $this->parseMode;
- return $this->$func( $token, $value, $attribs, $selfclose );
+ return $this->$func( $token, $value, $attribs, $selfClose );
} elseif ( $i === 0 ) {
return true;
} elseif ( $node->localName === $value ) {
);
$slash = $t = $attribStr = $brace = $rest = null;
}
- $goodtag = $t;
+ $goodTag = $t;
if ( $this->inRCDATA ) {
if ( $slash && $t === $this->inRCDATA ) {
$this->inRCDATA = false;
} else {
// No tags allowed; this emulates the "rcdata" tokenizer mode.
- $goodtag = false;
+ $goodTag = false;
}
}
if ( $this->inRAWTEXT ) {
$this->inRAWTEXT = false;
} else {
// No tags allowed, no entity-escaping done.
- $goodtag = false;
+ $goodTag = false;
}
}
$sanitize = $this->allowedHtmlElements !== null;
if ( $sanitize ) {
- $goodtag = $t && isset( $this->allowedHtmlElements[$t] );
+ $goodTag = $t && isset( $this->allowedHtmlElements[$t] );
}
- if ( $goodtag ) {
+ if ( $goodTag ) {
if ( is_callable( $this->processingCallback ) ) {
call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
}
if ( $sanitize ) {
- $goodtag = Sanitizer::validateTag( $attribStr, $t );
+ $goodTag = Sanitizer::validateTag( $attribStr, $t );
}
}
- if ( $goodtag ) {
+ if ( $goodTag ) {
if ( $sanitize ) {
$attribs = Sanitizer::decodeTagAttributes( $attribStr );
$attribs = Sanitizer::validateTagAttributes( $attribs, $t );
} else {
$attribs = Sanitizer::decodeTagAttributes( $attribStr );
}
- $goodtag = $this->insertToken(
+ $goodTag = $this->insertToken(
$slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
);
}
- if ( $goodtag ) {
+ if ( $goodTag ) {
$rest = str_replace( '>', '>', $rest );
$this->insertToken( 'text', str_replace( '>', '>', $rest ) );
} elseif ( $this->inRAWTEXT ) {
return $oldMode;
}
- private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
+ private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose ) {
$this->switchMode( $mode );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
private function resetInsertionMode() {
if ( $node->isHtml() ) {
switch ( $node->localName ) {
case 'select':
- $stacklen = $this->stack->length();
- for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
- $ancestor = $this->stack->node( $stacklen-$j-1 );
+ $stackLength = $this->stack->length();
+ for ( $j = $i + 1; $j < $stackLength-1; $j++ ) {
+ $ancestor = $this->stack->node( $stackLength-$j-1 );
if ( $ancestor->isHtmlNamed( 'template' ) ) {
break;
}
return true;
}
- private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inTextMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
$this->stack->insertText( $value );
return true;
} elseif ( $token === 'eof' ) {
$this->stack->pop();
return $this->switchModeAndReprocess(
- $this->originalInsertionMode, $token, $value, $attribs, $selfclose
+ $this->originalInsertionMode, $token, $value, $attribs, $selfClose
);
} elseif ( $token === 'endtag' ) {
$this->stack->pop();
return true;
}
- private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inHeadMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
$this->stack->insertText( $matches[0] );
// If not handled above
$this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
// Then redo this one
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
- private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
$this->afe->reconstruct( $this->stack );
$this->stack->insertText( $value );
return true;
} elseif ( $token === 'eof' ) {
if ( !empty( $this->templateInsertionModes ) ) {
- return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
+ return $this->inTemplateMode( $token, $value, $attribs, $selfClose );
}
$this->stopParsing();
return true;
case 'style':
case 'template':
// OMITTED: <title>
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
// OMITTED: <body>
// OMITTED: <frameset>
case 'header':
case 'hgroup':
case 'main':
- case 'menu':
case 'nav':
case 'ol':
case 'p':
$this->stack->insertHTMLElement( $value, $attribs );
return true;
+ case 'menu':
+ if ( $this->stack->inButtonScope( "p" ) ) {
+ $this->inBodyMode( 'endtag', 'p' );
+ }
+ if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
+ $this->stack->pop();
+ }
+ $this->stack->insertHTMLElement( $value, $attribs );
+ return true;
+
case 'h1':
case 'h2':
case 'h3':
case 'button':
if ( $this->stack->inScope( 'button' ) ) {
$this->inBodyMode( 'endtag', 'button' );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
$this->afe->reconstruct( $this->stack );
$this->stack->insertHTMLElement( $value, $attribs );
case 'tt':
case 'u':
$this->afe->reconstruct( $this->stack );
- $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
+ $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
return true;
case 'nobr':
$this->inBodyMode( 'endtag', 'nobr' );
$this->afe->reconstruct( $this->stack );
}
- $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
+ $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
return true;
case 'applet':
// (hence we don't need to examine the tag's "type" attribute)
return true;
- case 'menuitem':
case 'param':
case 'source':
case 'track':
if ( $this->stack->inButtonScope( 'p' ) ) {
$this->inBodyMode( 'endtag', 'p' );
}
+ if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
+ $this->stack->pop();
+ }
$this->stack->insertHTMLElement( $value, $attribs );
$this->stack->pop();
return true;
case 'image':
// warts!
- return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
-
- // OMITTED: <isindex>
+ return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
case 'textarea':
$this->stack->insertHTMLElement( $value, $attribs );
$this->stack->insertHTMLElement( $value, $attribs );
return true;
+ case 'menuitem':
+ if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
+ $this->stack->pop();
+ }
+ $this->afe->reconstruct( $this->stack );
+ $this->stack->insertHTMLElement( $value, $attribs );
+ return true;
+
case 'rb':
case 'rtc':
if ( $this->stack->inScope( 'ruby' ) ) {
$this->stack->insertForeignElement(
BalanceSets::MATHML_NAMESPACE, $value, $attribs
);
- if ( $selfclose ) {
+ if ( $selfClose ) {
// emit explicit </math> tag.
$this->stack->pop();
}
$this->stack->insertForeignElement(
BalanceSets::SVG_NAMESPACE, $value, $attribs
);
- if ( $selfclose ) {
+ if ( $selfClose ) {
// emit explicit </svg> tag.
$this->stack->pop();
}
// </body>,</html> are unsupported.
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
case 'address':
case 'article':
case 'p':
if ( !$this->stack->inButtonScope( 'p' ) ) {
$this->inBodyMode( 'tag', 'p', [] );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
$this->stack->generateImpliedEndTags( $value );
$this->stack->popTag( $value );
}
}
- private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inTableMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
if ( $this->textIntegrationMode ) {
- return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ return $this->inBodyMode( $token, $value, $attribs, $selfClose );
} elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
$this->pendingTableText = '';
$this->originalInsertionMode = $this->parseMode;
return $this->switchModeAndReprocess( 'inTableTextMode',
- $token, $value, $attribs, $selfclose );
+ $token, $value, $attribs, $selfClose );
}
// fall through to default case.
} elseif ( $token === 'eof' ) {
return true;
case 'col':
$this->inTableMode( 'tag', 'colgroup', [] );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
case 'tbody':
case 'tfoot':
case 'thead':
case 'th':
case 'tr':
$this->inTableMode( 'tag', 'tbody', [] );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
case 'table':
if ( !$this->stack->inTableScope( $value ) ) {
return true; // Ignore this tag.
}
$this->inTableMode( 'endtag', $value );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
case 'style':
// OMITTED: <script>
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
case 'input':
if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
case 'tr':
return true; // Ignore the token.
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
}
// Fall through for "anything else" clause.
} elseif ( $token === 'comment' ) {
}
// This is the "anything else" case:
$this->stack->fosterParentMode = true;
- $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ $this->inBodyMode( $token, $value, $attribs, $selfClose );
$this->stack->fosterParentMode = false;
return true;
}
- private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inTableTextMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
$this->pendingTableText .= $value;
return true;
$this->stack->insertText( $text );
}
return $this->switchModeAndReprocess(
- $this->originalInsertionMode, $token, $value, $attribs, $selfclose
+ $this->originalInsertionMode, $token, $value, $attribs, $selfClose
);
}
return true;
}
- private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inCaptionMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'tag' ) {
switch ( $value ) {
case 'caption':
case 'thead':
case 'tr':
if ( $this->endCaption() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
}
return true;
case 'table':
if ( $this->endCaption() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
case 'body':
// Fall through to "anything else" case.
}
// The Anything Else case
- return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ return $this->inBodyMode( $token, $value, $attribs, $selfClose );
}
- private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inColumnGroupMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
$this->stack->insertText( $matches[0] );
$this->stack->pop();
return true;
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
}
// Fall through for "anything else".
} elseif ( $token === 'endtag' ) {
case 'col':
return true; // Ignore the token.
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
}
// Fall through for "anything else".
} elseif ( $token === 'eof' ) {
- return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ return $this->inBodyMode( $token, $value, $attribs, $selfClose );
} elseif ( $token === 'comment' ) {
$this->stack->insertComment( $value );
return true;
return true; // Ignore the token.
}
$this->inColumnGroupMode( 'endtag', 'colgroup' );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
// Helper function for inTableBodyMode
$this->switchMode( 'inTableMode' );
return true;
}
- private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inTableBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'tag' ) {
switch ( $value ) {
case 'tr':
case 'th':
case 'td':
$this->inTableBodyMode( 'tag', 'tr', [] );
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
return true;
case 'caption':
case 'col':
case 'tfoot':
case 'thead':
if ( $this->endSection() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
}
switch ( $value ) {
case 'table':
if ( $this->endSection() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
case 'tbody':
}
}
// Anything else:
- return $this->inTableMode( $token, $value, $attribs, $selfclose );
+ return $this->inTableMode( $token, $value, $attribs, $selfClose );
}
// Helper function for inRowMode
$this->switchMode( 'inTableBodyMode' );
return true;
}
- private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inRowMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'tag' ) {
switch ( $value ) {
case 'th':
case 'thead':
case 'tr':
if ( $this->endRow() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
}
return true;
case 'table':
if ( $this->endRow() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
case 'tbody':
$this->stack->inTableScope( $value ) &&
$this->endRow()
) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
// OMITTED: <body>
}
}
// Anything else:
- return $this->inTableMode( $token, $value, $attribs, $selfclose );
+ return $this->inTableMode( $token, $value, $attribs, $selfClose );
}
// Helper for inCellMode
return false;
}
}
- private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inCellMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'tag' ) {
switch ( $value ) {
case 'caption':
case 'thead':
case 'tr':
if ( $this->endCell() ) {
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
}
$this->stack->popTag( BalanceSets::$tableCellSet );
$this->afe->clearToMarker();
$this->switchMode( 'inRowMode' );
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
}
}
// Anything else:
- return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ return $this->inBodyMode( $token, $value, $attribs, $selfClose );
}
- private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inSelectMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' ) {
$this->stack->insertText( $value );
return true;
} elseif ( $token === 'eof' ) {
- return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ return $this->inBodyMode( $token, $value, $attribs, $selfClose );
} elseif ( $token === 'tag' ) {
switch ( $value ) {
// OMITTED: <html>
return true; // ignore token (fragment case)
}
$this->inSelectMode( 'endtag', 'select' );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
case 'script':
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
}
} elseif ( $token === 'endtag' ) {
switch ( $value ) {
$this->resetInsertionMode();
return true;
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
}
} elseif ( $token === 'comment' ) {
$this->stack->insertComment( $value );
return true;
}
- private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inSelectInTableMode( $token, $value, $attribs = null, $selfClose = false ) {
switch ( $value ) {
case 'caption':
case 'table':
case 'th':
if ( $token === 'tag' ) {
$this->inSelectInTableMode( 'endtag', 'select' );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
} elseif ( $token === 'endtag' ) {
if ( $this->stack->inTableScope( $value ) ) {
$this->inSelectInTableMode( 'endtag', 'select' );
- return $this->insertToken( $token, $value, $attribs, $selfclose );
+ return $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
}
}
// anything else
- return $this->inSelectMode( $token, $value, $attribs, $selfclose );
+ return $this->inSelectMode( $token, $value, $attribs, $selfClose );
}
- private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
+ private function inTemplateMode( $token, $value, $attribs = null, $selfClose = false ) {
if ( $token === 'text' || $token === 'comment' ) {
- return $this->inBodyMode( $token, $value, $attribs, $selfclose );
+ return $this->inBodyMode( $token, $value, $attribs, $selfClose );
} elseif ( $token === 'eof' ) {
if ( $this->stack->indexOf( 'template' ) < 0 ) {
$this->stopParsing();
$this->afe->clearToMarker();
array_pop( $this->templateInsertionModes );
$this->resetInsertionMode();
- $this->insertToken( $token, $value, $attribs, $selfclose );
+ $this->insertToken( $token, $value, $attribs, $selfClose );
}
return true;
} elseif ( $token === 'tag' ) {
case 'style':
case 'template':
// OMITTED: <title>
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
case 'caption':
case 'colgroup':
case 'tfoot':
case 'thead':
return $this->switchModeAndReprocess(
- 'inTableMode', $token, $value, $attribs, $selfclose
+ 'inTableMode', $token, $value, $attribs, $selfClose
);
case 'col':
return $this->switchModeAndReprocess(
- 'inColumnGroupMode', $token, $value, $attribs, $selfclose
+ 'inColumnGroupMode', $token, $value, $attribs, $selfClose
);
case 'tr':
return $this->switchModeAndReprocess(
- 'inTableBodyMode', $token, $value, $attribs, $selfclose
+ 'inTableBodyMode', $token, $value, $attribs, $selfClose
);
case 'td':
case 'th':
return $this->switchModeAndReprocess(
- 'inRowMode', $token, $value, $attribs, $selfclose
+ 'inRowMode', $token, $value, $attribs, $selfClose
);
}
return $this->switchModeAndReprocess(
- 'inBodyMode', $token, $value, $attribs, $selfclose
+ 'inBodyMode', $token, $value, $attribs, $selfClose
);
} elseif ( $token === 'endtag' ) {
switch ( $value ) {
case 'template':
- return $this->inHeadMode( $token, $value, $attribs, $selfclose );
+ return $this->inHeadMode( $token, $value, $attribs, $selfClose );
}
return true;
} else {