--- /dev/null
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\HTMLData;
+use RemexHtml\Serializer\Serializer;
+use RemexHtml\Serializer\SerializerNode;
+use RemexHtml\Tokenizer\Attributes;
+use RemexHtml\Tokenizer\PlainAttributes;
+use RemexHtml\TreeBuilder\TreeBuilder;
+use RemexHtml\TreeBuilder\TreeHandler;
+use RemexHtml\TreeBuilder\Element;
+
+/**
+ * @internal
+ */
+class RemexCompatMunger implements TreeHandler {
+ private static $onlyInlineElements = [
+ "a" => true,
+ "abbr" => true,
+ "acronym" => true,
+ "applet" => true,
+ "b" => true,
+ "basefont" => true,
+ "bdo" => true,
+ "big" => true,
+ "br" => true,
+ "button" => true,
+ "cite" => true,
+ "code" => true,
+ "dfn" => true,
+ "em" => true,
+ "font" => true,
+ "i" => true,
+ "iframe" => true,
+ "img" => true,
+ "input" => true,
+ "kbd" => true,
+ "label" => true,
+ "legend" => true,
+ "map" => true,
+ "object" => true,
+ "param" => true,
+ "q" => true,
+ "rb" => true,
+ "rbc" => true,
+ "rp" => true,
+ "rt" => true,
+ "rtc" => true,
+ "ruby" => true,
+ "s" => true,
+ "samp" => true,
+ "select" => true,
+ "small" => true,
+ "span" => true,
+ "strike" => true,
+ "strong" => true,
+ "sub" => true,
+ "sup" => true,
+ "textarea" => true,
+ "tt" => true,
+ "u" => true,
+ "var" => true,
+ ];
+
+ private static $formattingElements = [
+ 'a' => true,
+ 'b' => true,
+ 'big' => true,
+ 'code' => true,
+ 'em' => true,
+ 'font' => true,
+ 'i' => true,
+ 'nobr' => true,
+ 's' => true,
+ 'small' => true,
+ 'strike' => true,
+ 'strong' => true,
+ 'tt' => true,
+ 'u' => true,
+ ];
+
+ /**
+ * Constructor
+ *
+ * @param Serializer $serializer
+ */
+ public function __construct( Serializer $serializer ) {
+ $this->serializer = $serializer;
+ }
+
+ public function startDocument( $fragmentNamespace, $fragmentName ) {
+ $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
+ $root = $this->serializer->getRootNode();
+ $root->snData = new RemexMungerData;
+ $root->snData->needsPWrapping = true;
+ }
+
+ public function endDocument( $pos ) {
+ $this->serializer->endDocument( $pos );
+ }
+
+ private function getParentForInsert( $preposition, $refElement ) {
+ if ( $preposition === TreeBuilder::ROOT ) {
+ return [ $this->serializer->getRootNode(), null ];
+ } elseif ( $preposition === TreeBuilder::BEFORE ) {
+ $refNode = $refElement->userData;
+ return [ $this->serializer->getParentNode( $refNode ), $refNode ];
+ } else {
+ $refNode = $refElement->userData;
+ $refData = $refNode->snData;
+ if ( $refData->currentCloneElement ) {
+ // Follow a chain of clone links if necessary
+ $origRefData = $refData;
+ while ( $refData->currentCloneElement ) {
+ $refElement = $refData->currentCloneElement;
+ $refNode = $refElement->userData;
+ $refData = $refNode->snData;
+ }
+ // Cache the end of the chain in the requested element
+ $origRefData->currentCloneElement = $refElement;
+ } elseif ( $refData->childPElement ) {
+ $refElement = $refData->childPElement;
+ $refNode = $refElement->userData;
+ }
+ return [ $refNode, $refNode ];
+ }
+ }
+
+ /**
+ * Insert a p-wrapper
+ *
+ * @param SerializerNode $parent
+ * @param integer $sourceStart
+ * @return SerializerNode
+ */
+ private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
+ $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
+ $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
+ $sourceStart, 0 );
+ $data = new RemexMungerData;
+ $data->isPWrapper = true;
+ $data->wrapBaseNode = $parent;
+ $pWrap->userData->snData = $data;
+ $parent->snData->childPElement = $pWrap;
+ return $pWrap->userData;
+ }
+
+ public function characters( $preposition, $refElement, $text, $start, $length,
+ $sourceStart, $sourceLength
+ ) {
+ $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
+
+ list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
+ $parentData = $parent->snData;
+
+ if ( $preposition === TreeBuilder::UNDER ) {
+ if ( $parentData->needsPWrapping && !$isBlank ) {
+ // Add a p-wrapper for bare text under body/blockquote
+ $refNode = $this->insertPWrapper( $refNode, $sourceStart );
+ $parent = $refNode;
+ $parentData = $parent->snData;
+ } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
+ // The parent is splittable and in block mode, so split the tag stack
+ $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
+ $parent = $refNode;
+ $parentData = $parent->snData;
+ }
+ }
+
+ if ( !$isBlank ) {
+ // Non-whitespace characters detected
+ $parentData->nonblankNodeCount++;
+ }
+ $this->serializer->characters( $preposition, $refNode, $text, $start,
+ $length, $sourceStart, $sourceLength );
+ }
+
+ /**
+ * Insert or reparent an element. Create p-wrappers or split the tag stack
+ * as necessary.
+ *
+ * Consider the following insertion locations. The parent may be:
+ *
+ * - A: A body or blockquote (!!needsPWrapping)
+ * - B: A p-wrapper (!!isPWrapper)
+ * - C: A descendant of a p-wrapper (!!ancestorPNode)
+ * - CS: With splittable formatting elements in the stack region up to
+ * the p-wrapper
+ * - CU: With one or more unsplittable elements in the stack region up
+ * to the p-wrapper
+ * - D: Not a descendant of a p-wrapper (!ancestorNode)
+ * - DS: With splittable formatting elements in the stack region up to
+ * the body or blockquote
+ * - DU: With one or more unsplittable elements in the stack region up
+ * to the body or blockquote
+ *
+ * And consider that we may insert two types of element:
+ * - b: block
+ * - i: inline
+ *
+ * We handle the insertion as follows:
+ *
+ * - A/i: Create a p-wrapper, insert under it
+ * - A/b: Insert as normal
+ * - B/i: Insert as normal
+ * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
+ * base) instead)
+ * - C/i: Insert as normal
+ * - CS/b: Split the tag stack, insert the block under cloned formatting
+ * elements which have the wrap base (the parent of the p-wrap) as
+ * their ultimate parent.
+ * - CU/b: Disable the p-wrap, by reparenting the currently open child
+ * of the p-wrap under the p-wrap's parent. Then insert the block as
+ * normal.
+ * - D/b: Insert as normal
+ * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
+ * parent of the formatting elements thus cloned. The parent of the
+ * p-wrapper is the body or blockquote.
+ * - DU/i: Insert as normal
+ *
+ * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
+ * normal, the full algorithm is not followed.
+ *
+ * @param integer $preposition
+ * @param Element|SerializerNode|null $refElement
+ * @param Element $element
+ * @param bool $void
+ * @param integer $sourceStart
+ * @param integer $sourceLength
+ */
+
+ public function insertElement( $preposition, $refElement, Element $element, $void,
+ $sourceStart, $sourceLength
+ ) {
+ list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
+ $parentData = $parent->snData;
+ $parentNs = $parent->namespace;
+ $parentName = $parent->name;
+ $elementName = $element->htmlName;
+
+ $inline = isset( self::$onlyInlineElements[$elementName] );
+ $under = $preposition === TreeBuilder::UNDER;
+
+ if ( $under && $parentData->isPWrapper && !$inline ) {
+ // [B/b] The element is non-inline and the parent is a p-wrapper,
+ // close the parent and insert into its parent instead
+ $newParent = $this->serializer->getParentNode( $parent );
+ $parent = $newParent;
+ $parentData = $parent->snData;
+ $parentData->childPElement = null;
+ $newRef = $refElement->userData;
+ // FIXME cannot call endTag() since we don't have an Element
+ } elseif ( $under && $parentData->isSplittable
+ && (bool)$parentData->ancestorPNode !== $inline
+ ) {
+ // [CS/b, DS/i] The parent is splittable and the current element is
+ // inline in block context, or if the current element is a block
+ // under a p-wrapper, split the tag stack.
+ $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
+ $parent = $newRef;
+ $parentData = $parent->snData;
+ } elseif ( $under && $parentData->needsPWrapping && $inline ) {
+ // [A/i] If the element is inline and we are in body/blockquote,
+ // we need to create a p-wrapper
+ $newRef = $this->insertPWrapper( $newRef, $sourceStart );
+ $parent = $newRef;
+ $parentData = $parent->snData;
+ } elseif ( $parentData->ancestorPNode && !$inline ) {
+ // [CU/b] If the element is non-inline and (despite attempting to
+ // split above) there is still an ancestor p-wrap, disable that
+ // p-wrap
+ $this->disablePWrapper( $parent, $sourceStart );
+ }
+ // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
+
+ // An element with element children is a non-blank element
+ $parentData->nonblankNodeCount++;
+
+ // Insert the element downstream and so initialise its userData
+ $this->serializer->insertElement( $preposition, $newRef,
+ $element, $void, $sourceStart, $sourceLength );
+
+ // Initialise snData
+ if ( !$element->userData->snData ) {
+ $elementData = $element->userData->snData = new RemexMungerData;
+ } else {
+ $elementData = $element->userData->snData;
+ }
+ if ( ( $parentData->isPWrapper || $parentData->isSplittable )
+ && isset( self::$formattingElements[$elementName] )
+ ) {
+ $elementData->isSplittable = true;
+ }
+ if ( $parentData->isPWrapper ) {
+ $elementData->ancestorPNode = $parent;
+ } elseif ( $parentData->ancestorPNode ) {
+ $elementData->ancestorPNode = $parentData->ancestorPNode;
+ }
+ if ( $parentData->wrapBaseNode ) {
+ $elementData->wrapBaseNode = $parentData->wrapBaseNode;
+ } elseif ( $parentData->needsPWrapping ) {
+ $elementData->wrapBaseNode = $parent;
+ }
+ if ( $elementName === 'body'
+ || $elementName === 'blockquote'
+ || $elementName === 'html'
+ ) {
+ $elementData->needsPWrapping = true;
+ }
+ }
+
+ /**
+ * Clone nodes in a stack range and return the new parent
+ *
+ * @param SerializerNode $parentNode
+ * @param bool $inline
+ * @param integer $pos The source position
+ * @return SerializerNode
+ */
+ private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
+ $parentData = $parentNode->snData;
+ $wrapBase = $parentData->wrapBaseNode;
+ $pWrap = $parentData->ancestorPNode;
+ if ( !$pWrap ) {
+ $cloneEnd = $wrapBase;
+ } else {
+ $cloneEnd = $parentData->ancestorPNode;
+ }
+
+ $serializer = $this->serializer;
+ $node = $parentNode;
+ $root = $serializer->getRootNode();
+ $nodes = [];
+ $removableNodes = [];
+ $haveContent = false;
+ while ( $node !== $cloneEnd ) {
+ $nextParent = $serializer->getParentNode( $node );
+ if ( $nextParent === $root ) {
+ throw new \Exception( 'Did not find end of clone range' );
+ }
+ $nodes[] = $node;
+ if ( $node->snData->nonblankNodeCount === 0 ) {
+ $removableNodes[] = $node;
+ $nextParent->snData->nonblankNodeCount--;
+ }
+ $node = $nextParent;
+ }
+
+ if ( $inline ) {
+ $pWrap = $this->insertPWrapper( $wrapBase, $pos );
+ $node = $pWrap;
+ } else {
+ if ( $pWrap ) {
+ // End the p-wrap which was open, cancel the diversion
+ $wrapBase->snData->childPElement = null;
+ }
+ $pWrap = null;
+ $node = $wrapBase;
+ }
+
+ for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
+ $oldNode = $nodes[$i];
+ $oldData = $oldNode->snData;
+ $nodeParent = $node;
+ $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
+ $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
+ $element, false, $pos, 0 );
+ $oldData->currentCloneElement = $element;
+
+ $newNode = $element->userData;
+ $newData = $newNode->snData = new RemexMungerData;
+ if ( $pWrap ) {
+ $newData->ancestorPNode = $pWrap;
+ }
+ $newData->isSplittable = true;
+ $newData->wrapBaseNode = $wrapBase;
+ $newData->isPWrapper = $oldData->isPWrapper;
+
+ $nodeParent->snData->nonblankNodeCount++;
+
+ $node = $newNode;
+ }
+ foreach ( $removableNodes as $rNode ) {
+ $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
+ $fakeElement->userData = $rNode;
+ $this->serializer->removeNode( $fakeElement, $pos );
+ }
+ return $node;
+ }
+
+ /**
+ * Find the ancestor of $node which is a child of a p-wrapper, and
+ * reparent that node so that it is placed after the end of the p-wrapper
+ */
+ private function disablePWrapper( SerializerNode $node, $sourceStart ) {
+ $nodeData = $node->snData;
+ $pWrapNode = $nodeData->ancestorPNode;
+ $newParent = $this->serializer->getParentNode( $pWrapNode );
+ if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
+ // Fostering or something? Abort!
+ return;
+ }
+
+ $nextParent = $node;
+ do {
+ $victim = $nextParent;
+ $victim->snData->ancestorPNode = null;
+ $nextParent = $this->serializer->getParentNode( $victim );
+ } while ( $nextParent !== $pWrapNode );
+
+ // Make a fake Element to use in a reparenting operation
+ $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
+ $victimElement->userData = $victim;
+
+ // Reparent
+ $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
+ false, $sourceStart, 0 );
+
+ // Decrement nonblank node count
+ $pWrapNode->snData->nonblankNodeCount--;
+
+ // Cancel the diversion so that no more elements are inserted under this p-wrap
+ $newParent->snData->childPElement = null;
+ }
+
+ public function endTag( Element $element, $sourceStart, $sourceLength ) {
+ $this->serializer->endTag( $element, $sourceStart, $sourceLength );
+ }
+
+ public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
+ $this->serializer->doctype( $name, $public, $system, $quirks,
+ $sourceStart, $sourceLength );
+ }
+
+ public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
+ list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
+ $this->serializer->comment( $preposition, $refNode, $text,
+ $sourceStart, $sourceLength );
+ }
+
+ public function error( $text, $pos ) {
+ $this->serializer->error( $text, $pos );
+ }
+
+ public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
+ $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
+ }
+
+ public function removeNode( Element $element, $sourceStart ) {
+ $this->serializer->removeNode( $element, $sourceStart );
+ }
+
+ public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
+ $self = $element->userData;
+ $children = $self->children;
+ $self->children = [];
+ $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
+ $newParentNode = $newParent->userData;
+ $newParentId = $newParentNode->id;
+ foreach ( $children as $child ) {
+ if ( is_object( $child ) ) {
+ $child->parentId = $newParentId;
+ }
+ }
+ $newParentNode->children = $children;
+ }
+}
--- /dev/null
+<?php
+
+class RemexDriverTest extends MediaWikiTestCase {
+ static private $remexTidyTestData = [
+ // Tests from Html5Depurate
+ [
+ 'Empty string',
+ "",
+ ""
+ ],
+ [
+ 'Simple p-wrap',
+ "x",
+ "<p>x</p>"
+ ],
+ [
+ 'No p-wrap of blank node',
+ " ",
+ " "
+ ],
+ [
+ 'p-wrap terminated by div',
+ "x<div></div>",
+ "<p>x</p><div></div>"
+ ],
+ [
+ 'p-wrap not terminated by span',
+ "x<span></span>",
+ "<p>x<span></span></p>"
+ ],
+ [
+ 'An element is non-blank and so gets p-wrapped',
+ "<span></span>",
+ "<p><span></span></p>"
+ ],
+ [
+ 'The blank flag is set after a block-level element',
+ "<div></div> ",
+ "<div></div> "
+ ],
+ [
+ 'Blank detection between two block-level elements',
+ "<div></div> <div></div>",
+ "<div></div> <div></div>"
+ ],
+ [
+ 'But p-wrapping of non-blank content works after an element',
+ "<div></div>x",
+ "<div></div><p>x</p>"
+ ],
+ [
+ 'p-wrapping between two block-level elements',
+ "<div></div>x<div></div>",
+ "<div></div><p>x</p><div></div>"
+ ],
+ [
+ 'p-wrap inside blockquote',
+ "<blockquote>x</blockquote>",
+ "<blockquote><p>x</p></blockquote>"
+ ],
+ [
+ 'A comment is blank for p-wrapping purposes',
+ "<!-- x -->",
+ "<!-- x -->"
+ ],
+ [
+ 'A comment is blank even when a p-wrap was opened by a text node',
+ " <!-- x -->",
+ " <!-- x -->"
+ ],
+ [
+ 'A comment does not open a p-wrap',
+ "<!-- x -->x",
+ "<!-- x --><p>x</p>"
+ ],
+ [
+ 'A comment does not close a p-wrap',
+ "x<!-- x -->",
+ "<p>x<!-- x --></p>"
+ ],
+ [
+ 'Empty li',
+ "<ul><li></li></ul>",
+ "<ul><li class=\"mw-empty-elt\"></li></ul>"
+ ],
+ [
+ 'li with element',
+ "<ul><li><span></span></li></ul>",
+ "<ul><li><span></span></li></ul>"
+ ],
+ [
+ 'li with text',
+ "<ul><li>x</li></ul>",
+ "<ul><li>x</li></ul>"
+ ],
+ [
+ 'Empty tr',
+ "<table><tbody><tr></tr></tbody></table>",
+ "<table><tbody><tr class=\"mw-empty-elt\"></tr></tbody></table>"
+ ],
+ [
+ 'Empty p',
+ "<p>\n</p>",
+ "<p class=\"mw-empty-elt\">\n</p>"
+ ],
+ [
+ 'No p-wrapping of an inline element which contains a block element (T150317)',
+ "<small><div>x</div></small>",
+ "<small><div>x</div></small>"
+ ],
+ [
+ 'p-wrapping of an inline element which contains an inline element',
+ "<small><b>x</b></small>",
+ "<p><small><b>x</b></small></p>"
+ ],
+ [
+ 'p-wrapping is enabled in a blockquote in an inline element',
+ "<small><blockquote>x</blockquote></small>",
+ "<small><blockquote><p>x</p></blockquote></small>"
+ ],
+ [
+ 'All bare text should be p-wrapped even when surrounded by block tags',
+ "<small><blockquote>x</blockquote></small>y<div></div>z",
+ "<small><blockquote><p>x</p></blockquote></small><p>y</p><div></div><p>z</p>"
+ ],
+ [
+ 'Split tag stack 1',
+ "<small>x<div>y</div>z</small>",
+ "<p><small>x</small></p><small><div>y</div></small><p><small>z</small></p>"
+ ],
+ [
+ 'Split tag stack 2',
+ "<small><div>y</div>z</small>",
+ "<small><div>y</div></small><p><small>z</small></p>"
+ ],
+ [
+ 'Split tag stack 3',
+ "<small>x<div>y</div></small>",
+ "<p><small>x</small></p><small><div>y</div></small>"
+ ],
+ [
+ 'Split tag stack 4 (modified to use splittable tag)',
+ "a<code>b<i>c<div>d</div></i>e</code>",
+ "<p>a<code>b<i>c</i></code></p><code><i><div>d</div></i></code><p><code>e</code></p>"
+ ],
+ [
+ "Split tag stack regression check 1",
+ "x<span><div>y</div></span>",
+ "<p>x</p><span><div>y</div></span>"
+ ],
+ [
+ "Split tag stack regression check 2 (modified to use splittable tag)",
+ "a<code><i><div>d</div></i>e</code>",
+ "<p>a</p><code><i><div>d</div></i></code><p><code>e</code></p>"
+ ],
+ // Simple tests from pwrap.js
+ [
+ 'Simple pwrap test 1',
+ 'a',
+ '<p>a</p>'
+ ],
+ [
+ '<span> is not a splittable tag, but gets p-wrapped in simple wrapping scenarios',
+ '<span>a</span>',
+ '<p><span>a</span></p>'
+ ],
+ [
+ 'Simple pwrap test 3',
+ 'x <div>a</div> <div>b</div> y',
+ '<p>x </p><div>a</div> <div>b</div><p> y</p>'
+ ],
+ [
+ 'Simple pwrap test 4',
+ 'x<!--c--> <div>a</div> <div>b</div> <!--c-->y',
+ '<p>x<!--c--> </p><div>a</div> <div>b</div> <!--c--><p>y</p>'
+ ],
+ // Complex tests from pwrap.js
+ [
+ 'Complex pwrap test 1',
+ '<i>x<div>a</div>y</i>',
+ '<p><i>x</i></p><i><div>a</div></i><p><i>y</i></p>'
+ ],
+ [
+ 'Complex pwrap test 2',
+ 'a<small>b</small><i>c<div>d</div>e</i>f',
+ '<p>a<small>b</small><i>c</i></p><i><div>d</div></i><p><i>e</i>f</p>'
+ ],
+ [
+ 'Complex pwrap test 3',
+ 'a<small>b<i>c<div>d</div></i>e</small>',
+ '<p>a<small>b<i>c</i></small></p><small><i><div>d</div></i></small><p><small>e</small></p>'
+ ],
+ [
+ 'Complex pwrap test 4',
+ 'x<small><div>y</div></small>',
+ '<p>x</p><small><div>y</div></small>'
+ ],
+ [
+ 'Complex pwrap test 5',
+ 'a<small><i><div>d</div></i>e</small>',
+ '<p>a</p><small><i><div>d</div></i></small><p><small>e</small></p>'
+ ],
+ [
+ 'Complex pwrap test 6',
+ '<i>a<div>b</div>c<b>d<div>e</div>f</b>g</i>',
+ // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
+ // PHP 5 does not allow concatenation in initialisation of a class static variable
+ '<p><i>a</i></p><i><div>b</div></i><p><i>c<b>d</b></i></p><i><b><div>e</div></b></i><p><i><b>f</b>g</i></p>'
+ // @codingStandardsIgnoreEnd
+ ],
+ /* FIXME the second <b> causes a stack split which clones the <i> even
+ * though no <p> is actually generated
+ [
+ 'Complex pwrap test 7',
+ '<i><b><font><div>x</div></font></b><div>y</div><b><font><div>z</div></font></b></i>',
+ '<i><b><font><div>x</div></font></b><div>y</div><b><font><div>z</div></font></b></i>'
+ ],
+ */
+ // New local tests
+ [
+ 'Blank text node after block end',
+ '<small>x<div>y</div> <b>z</b></small>',
+ '<p><small>x</small></p><small><div>y</div></small><p><small> <b>z</b></small></p>'
+ ],
+ [
+ 'Text node fostering (FIXME: wrap missing)',
+ '<table>x</table>',
+ 'x<table></table>'
+ ],
+ [
+ 'Blockquote fostering',
+ '<table><blockquote>x</blockquote></table>',
+ '<blockquote><p>x</p></blockquote><table></table>'
+ ],
+ [
+ 'Block element fostering',
+ '<table><div>x',
+ '<div>x</div><table></table>'
+ ],
+ [
+ 'Formatting element fostering (FIXME: wrap missing)',
+ '<table><b>x',
+ '<b>x</b><table></table>'
+ ],
+ [
+ 'AAA clone of p-wrapped element (FIXME: empty b)',
+ '<b>x<p>y</b>z</p>',
+ '<p><b>x</b></p><b></b><p><b>y</b>z</p>',
+ ],
+ [
+ 'AAA with fostering (FIXME: wrap missing)',
+ '<table><b>1<p>2</b>3</p>',
+ '<b>1</b><p><b>2</b>3</p><table></table>'
+ ],
+ ];
+
+ public function provider() {
+ return self::$remexTidyTestData;
+ }
+
+ /**
+ * @dataProvider provider
+ * @covers MediaWiki\Tidy\RemexCompatFormatter
+ * @covers MediaWiki\Tidy\RemexCompatMunger
+ * @covers MediaWiki\Tidy\RemexDriver
+ * @covers MediaWiki\Tidy\RemexMungerData
+ */
+ public function testTidy( $desc, $input, $expected ) {
+ $r = new MediaWiki\Tidy\RemexDriver( [] );
+ $result = $r->tidy( $input );
+ $this->assertEquals( $expected, $result, $desc );
+ }
+
+ public function html5libProvider() {
+ $files = json_decode( file_get_contents( __DIR__ . '/html5lib-tests.json' ), true );
+ $tests = [];
+ foreach ( $files as $file => $fileTests ) {
+ foreach ( $fileTests as $i => $test ) {
+ $tests[] = [ "$file:$i", $test['data'] ];
+ }
+ }
+ return $tests;
+ }
+
+ /**
+ * This is a quick and dirty test to make sure none of the html5lib tests
+ * generate exceptions. We don't really know what the expected output is.
+ *
+ * @dataProvider html5libProvider
+ * @coversNothing
+ */
+ public function testHtml5Lib( $desc, $input ) {
+ $r = new MediaWiki\Tidy\RemexDriver( [] );
+ $result = $r->tidy( $input );
+ $this->assertTrue( true, $desc );
+ }
+}