From 9341a00ed1b27edb86618cf7bbfaf8a9f720c124 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Fri, 17 Feb 2017 15:10:15 +1100 Subject: [PATCH] RemexHtml tidy driver with p-wrapping Pull in the RemexHtml library, which is an HTML 5 library I recently created. RemexCompatMunger mutates the event stream, inserting elements where necessary, and occasionally taking even more invasive action such as reparenting and removing nodes maintained in Serializer's tree. RemexCompatFormatter produces a MediaWiki-style serialization which is relatively compatible with existing parser tests. It also does final empty element handling, including translating to

Tests are imported from both Html5Depurate and Subbu's pwrap.js. Depends-On: I864f31d9afdffdde49bfd39f07a0fb7f4df5c5d9 Change-Id: I900155b7dd199b0ae2a3b9cdb6db5136fc4f35a8 --- autoload.php | 4 + composer.json | 1 + includes/tidy/RemexCompatFormatter.php | 71 +++ includes/tidy/RemexCompatMunger.php | 468 ++++++++++++++++++ includes/tidy/RemexDriver.php | 57 +++ includes/tidy/RemexMungerData.php | 78 +++ .../phpunit/includes/tidy/RemexDriverTest.php | 297 +++++++++++ 7 files changed, 976 insertions(+) create mode 100644 includes/tidy/RemexCompatFormatter.php create mode 100644 includes/tidy/RemexCompatMunger.php create mode 100644 includes/tidy/RemexDriver.php create mode 100644 includes/tidy/RemexMungerData.php create mode 100644 tests/phpunit/includes/tidy/RemexDriverTest.php diff --git a/autoload.php b/autoload.php index 0e719ae5b5..5cf9b2e494 100644 --- a/autoload.php +++ b/autoload.php @@ -914,6 +914,10 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Tidy\\RaggettInternalHHVM' => __DIR__ . '/includes/tidy/RaggettInternalHHVM.php', 'MediaWiki\\Tidy\\RaggettInternalPHP' => __DIR__ . '/includes/tidy/RaggettInternalPHP.php', 'MediaWiki\\Tidy\\RaggettWrapper' => __DIR__ . '/includes/tidy/RaggettWrapper.php', + 'MediaWiki\\Tidy\\RemexCompatFormatter' => __DIR__ . '/includes/tidy/RemexCompatFormatter.php', + 'MediaWiki\\Tidy\\RemexCompatMunger' => __DIR__ . '/includes/tidy/RemexCompatMunger.php', + 'MediaWiki\\Tidy\\RemexDriver' => __DIR__ . '/includes/tidy/RemexDriver.php', + 'MediaWiki\\Tidy\\RemexMungerData' => __DIR__ . '/includes/tidy/RemexMungerData.php', 'MediaWiki\\Tidy\\TidyDriverBase' => __DIR__ . '/includes/tidy/TidyDriverBase.php', 'MediaWiki\\Widget\\ComplexNamespaceInputWidget' => __DIR__ . '/includes/widget/ComplexNamespaceInputWidget.php', 'MediaWiki\\Widget\\ComplexTitleInputWidget' => __DIR__ . '/includes/widget/ComplexTitleInputWidget.php', diff --git a/composer.json b/composer.json index d41492ef6b..17abc59c69 100644 --- a/composer.json +++ b/composer.json @@ -38,6 +38,7 @@ "wikimedia/ip-set": "1.1.0", "wikimedia/php-session-serializer": "1.0.4", "wikimedia/relpath": "1.0.3", + "wikimedia/remex-html": "1.0.0", "wikimedia/running-stat": "1.1.0", "wikimedia/scoped-callback": "1.0.0", "wikimedia/utfnormal": "1.1.0", diff --git a/includes/tidy/RemexCompatFormatter.php b/includes/tidy/RemexCompatFormatter.php new file mode 100644 index 0000000000..3dc727bc89 --- /dev/null +++ b/includes/tidy/RemexCompatFormatter.php @@ -0,0 +1,71 @@ + true, + 'p' => true, + 'tr' => true, + ]; + + public function __construct( $options = [] ) { + parent::__construct( $options ); + $this->attributeEscapes["\xc2\xa0"] = ' '; + unset( $this->attributeEscapes["&"] ); + $this->textEscapes["\xc2\xa0"] = ' '; + unset( $this->textEscapes["&"] ); + } + + public function startDocument( $fragmentNamespace, $fragmentName ) { + return ''; + } + + public function element( SerializerNode $parent, SerializerNode $node, $contents ) { + $data = $node->snData; + if ( $data && $data->isPWrapper ) { + if ( $data->nonblankNodeCount ) { + return "

$contents

"; + } else { + return $contents; + } + } + + $name = $node->name; + $attrs = $node->attrs; + if ( isset( self::$markedEmptyElements[$name] ) && $attrs->count() === 0 ) { + if ( strspn( $contents, "\t\n\f\r " ) === strlen( $contents ) ) { + return "<{$name} class=\"mw-empty-elt\">$contents"; + } + } + + $s = "<$name"; + foreach ( $attrs->getValues() as $attrName => $attrValue ) { + $encValue = strtr( $attrValue, $this->attributeEscapes ); + $s .= " $attrName=\"$encValue\""; + } + if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) { + $s .= ' />'; + return $s; + } + + $s .= '>'; + if ( $node->namespace === HTMLData::NS_HTML + && isset( $contents[0] ) && $contents[0] === "\n" + && isset( $this->prefixLfElements[$name] ) + ) { + $s .= "\n$contents"; + } else { + $s .= "$contents"; + } + return $s; + } +} diff --git a/includes/tidy/RemexCompatMunger.php b/includes/tidy/RemexCompatMunger.php new file mode 100644 index 0000000000..d5f5c281c2 --- /dev/null +++ b/includes/tidy/RemexCompatMunger.php @@ -0,0 +1,468 @@ + true, + "abbr" => true, + "acronym" => true, + "applet" => true, + "b" => true, + "basefont" => true, + "bdo" => true, + "big" => true, + "br" => true, + "button" => true, + "cite" => true, + "code" => true, + "dfn" => true, + "em" => true, + "font" => true, + "i" => true, + "iframe" => true, + "img" => true, + "input" => true, + "kbd" => true, + "label" => true, + "legend" => true, + "map" => true, + "object" => true, + "param" => true, + "q" => true, + "rb" => true, + "rbc" => true, + "rp" => true, + "rt" => true, + "rtc" => true, + "ruby" => true, + "s" => true, + "samp" => true, + "select" => true, + "small" => true, + "span" => true, + "strike" => true, + "strong" => true, + "sub" => true, + "sup" => true, + "textarea" => true, + "tt" => true, + "u" => true, + "var" => true, + ]; + + private static $formattingElements = [ + 'a' => true, + 'b' => true, + 'big' => true, + 'code' => true, + 'em' => true, + 'font' => true, + 'i' => true, + 'nobr' => true, + 's' => true, + 'small' => true, + 'strike' => true, + 'strong' => true, + 'tt' => true, + 'u' => true, + ]; + + /** + * Constructor + * + * @param Serializer $serializer + */ + public function __construct( Serializer $serializer ) { + $this->serializer = $serializer; + } + + public function startDocument( $fragmentNamespace, $fragmentName ) { + $this->serializer->startDocument( $fragmentNamespace, $fragmentName ); + $root = $this->serializer->getRootNode(); + $root->snData = new RemexMungerData; + $root->snData->needsPWrapping = true; + } + + public function endDocument( $pos ) { + $this->serializer->endDocument( $pos ); + } + + private function getParentForInsert( $preposition, $refElement ) { + if ( $preposition === TreeBuilder::ROOT ) { + return [ $this->serializer->getRootNode(), null ]; + } elseif ( $preposition === TreeBuilder::BEFORE ) { + $refNode = $refElement->userData; + return [ $this->serializer->getParentNode( $refNode ), $refNode ]; + } else { + $refNode = $refElement->userData; + $refData = $refNode->snData; + if ( $refData->currentCloneElement ) { + // Follow a chain of clone links if necessary + $origRefData = $refData; + while ( $refData->currentCloneElement ) { + $refElement = $refData->currentCloneElement; + $refNode = $refElement->userData; + $refData = $refNode->snData; + } + // Cache the end of the chain in the requested element + $origRefData->currentCloneElement = $refElement; + } elseif ( $refData->childPElement ) { + $refElement = $refData->childPElement; + $refNode = $refElement->userData; + } + return [ $refNode, $refNode ]; + } + } + + /** + * Insert a p-wrapper + * + * @param SerializerNode $parent + * @param integer $sourceStart + * @return SerializerNode + */ + private function insertPWrapper( SerializerNode $parent, $sourceStart ) { + $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes ); + $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false, + $sourceStart, 0 ); + $data = new RemexMungerData; + $data->isPWrapper = true; + $data->wrapBaseNode = $parent; + $pWrap->userData->snData = $data; + $parent->snData->childPElement = $pWrap; + return $pWrap->userData; + } + + public function characters( $preposition, $refElement, $text, $start, $length, + $sourceStart, $sourceLength + ) { + $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length; + + list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement ); + $parentData = $parent->snData; + + if ( $preposition === TreeBuilder::UNDER ) { + if ( $parentData->needsPWrapping && !$isBlank ) { + // Add a p-wrapper for bare text under body/blockquote + $refNode = $this->insertPWrapper( $refNode, $sourceStart ); + $parent = $refNode; + $parentData = $parent->snData; + } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) { + // The parent is splittable and in block mode, so split the tag stack + $refNode = $this->splitTagStack( $refNode, true, $sourceStart ); + $parent = $refNode; + $parentData = $parent->snData; + } + } + + if ( !$isBlank ) { + // Non-whitespace characters detected + $parentData->nonblankNodeCount++; + } + $this->serializer->characters( $preposition, $refNode, $text, $start, + $length, $sourceStart, $sourceLength ); + } + + /** + * Insert or reparent an element. Create p-wrappers or split the tag stack + * as necessary. + * + * Consider the following insertion locations. The parent may be: + * + * - A: A body or blockquote (!!needsPWrapping) + * - B: A p-wrapper (!!isPWrapper) + * - C: A descendant of a p-wrapper (!!ancestorPNode) + * - CS: With splittable formatting elements in the stack region up to + * the p-wrapper + * - CU: With one or more unsplittable elements in the stack region up + * to the p-wrapper + * - D: Not a descendant of a p-wrapper (!ancestorNode) + * - DS: With splittable formatting elements in the stack region up to + * the body or blockquote + * - DU: With one or more unsplittable elements in the stack region up + * to the body or blockquote + * + * And consider that we may insert two types of element: + * - b: block + * - i: inline + * + * We handle the insertion as follows: + * + * - A/i: Create a p-wrapper, insert under it + * - A/b: Insert as normal + * - B/i: Insert as normal + * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap + * base) instead) + * - C/i: Insert as normal + * - CS/b: Split the tag stack, insert the block under cloned formatting + * elements which have the wrap base (the parent of the p-wrap) as + * their ultimate parent. + * - CU/b: Disable the p-wrap, by reparenting the currently open child + * of the p-wrap under the p-wrap's parent. Then insert the block as + * normal. + * - D/b: Insert as normal + * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate + * parent of the formatting elements thus cloned. The parent of the + * p-wrapper is the body or blockquote. + * - DU/i: Insert as normal + * + * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as + * normal, the full algorithm is not followed. + * + * @param integer $preposition + * @param Element|SerializerNode|null $refElement + * @param Element $element + * @param bool $void + * @param integer $sourceStart + * @param integer $sourceLength + */ + + public function insertElement( $preposition, $refElement, Element $element, $void, + $sourceStart, $sourceLength + ) { + list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement ); + $parentData = $parent->snData; + $parentNs = $parent->namespace; + $parentName = $parent->name; + $elementName = $element->htmlName; + + $inline = isset( self::$onlyInlineElements[$elementName] ); + $under = $preposition === TreeBuilder::UNDER; + + if ( $under && $parentData->isPWrapper && !$inline ) { + // [B/b] The element is non-inline and the parent is a p-wrapper, + // close the parent and insert into its parent instead + $newParent = $this->serializer->getParentNode( $parent ); + $parent = $newParent; + $parentData = $parent->snData; + $parentData->childPElement = null; + $newRef = $refElement->userData; + // FIXME cannot call endTag() since we don't have an Element + } elseif ( $under && $parentData->isSplittable + && (bool)$parentData->ancestorPNode !== $inline + ) { + // [CS/b, DS/i] The parent is splittable and the current element is + // inline in block context, or if the current element is a block + // under a p-wrapper, split the tag stack. + $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart ); + $parent = $newRef; + $parentData = $parent->snData; + } elseif ( $under && $parentData->needsPWrapping && $inline ) { + // [A/i] If the element is inline and we are in body/blockquote, + // we need to create a p-wrapper + $newRef = $this->insertPWrapper( $newRef, $sourceStart ); + $parent = $newRef; + $parentData = $parent->snData; + } elseif ( $parentData->ancestorPNode && !$inline ) { + // [CU/b] If the element is non-inline and (despite attempting to + // split above) there is still an ancestor p-wrap, disable that + // p-wrap + $this->disablePWrapper( $parent, $sourceStart ); + } + // else [A/b, B/i, C/i, D/b, DU/i] insert as normal + + // An element with element children is a non-blank element + $parentData->nonblankNodeCount++; + + // Insert the element downstream and so initialise its userData + $this->serializer->insertElement( $preposition, $newRef, + $element, $void, $sourceStart, $sourceLength ); + + // Initialise snData + if ( !$element->userData->snData ) { + $elementData = $element->userData->snData = new RemexMungerData; + } else { + $elementData = $element->userData->snData; + } + if ( ( $parentData->isPWrapper || $parentData->isSplittable ) + && isset( self::$formattingElements[$elementName] ) + ) { + $elementData->isSplittable = true; + } + if ( $parentData->isPWrapper ) { + $elementData->ancestorPNode = $parent; + } elseif ( $parentData->ancestorPNode ) { + $elementData->ancestorPNode = $parentData->ancestorPNode; + } + if ( $parentData->wrapBaseNode ) { + $elementData->wrapBaseNode = $parentData->wrapBaseNode; + } elseif ( $parentData->needsPWrapping ) { + $elementData->wrapBaseNode = $parent; + } + if ( $elementName === 'body' + || $elementName === 'blockquote' + || $elementName === 'html' + ) { + $elementData->needsPWrapping = true; + } + } + + /** + * Clone nodes in a stack range and return the new parent + * + * @param SerializerNode $parentNode + * @param bool $inline + * @param integer $pos The source position + * @return SerializerNode + */ + private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) { + $parentData = $parentNode->snData; + $wrapBase = $parentData->wrapBaseNode; + $pWrap = $parentData->ancestorPNode; + if ( !$pWrap ) { + $cloneEnd = $wrapBase; + } else { + $cloneEnd = $parentData->ancestorPNode; + } + + $serializer = $this->serializer; + $node = $parentNode; + $root = $serializer->getRootNode(); + $nodes = []; + $removableNodes = []; + $haveContent = false; + while ( $node !== $cloneEnd ) { + $nextParent = $serializer->getParentNode( $node ); + if ( $nextParent === $root ) { + throw new \Exception( 'Did not find end of clone range' ); + } + $nodes[] = $node; + if ( $node->snData->nonblankNodeCount === 0 ) { + $removableNodes[] = $node; + $nextParent->snData->nonblankNodeCount--; + } + $node = $nextParent; + } + + if ( $inline ) { + $pWrap = $this->insertPWrapper( $wrapBase, $pos ); + $node = $pWrap; + } else { + if ( $pWrap ) { + // End the p-wrap which was open, cancel the diversion + $wrapBase->snData->childPElement = null; + } + $pWrap = null; + $node = $wrapBase; + } + + for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) { + $oldNode = $nodes[$i]; + $oldData = $oldNode->snData; + $nodeParent = $node; + $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs ); + $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent, + $element, false, $pos, 0 ); + $oldData->currentCloneElement = $element; + + $newNode = $element->userData; + $newData = $newNode->snData = new RemexMungerData; + if ( $pWrap ) { + $newData->ancestorPNode = $pWrap; + } + $newData->isSplittable = true; + $newData->wrapBaseNode = $wrapBase; + $newData->isPWrapper = $oldData->isPWrapper; + + $nodeParent->snData->nonblankNodeCount++; + + $node = $newNode; + } + foreach ( $removableNodes as $rNode ) { + $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs ); + $fakeElement->userData = $rNode; + $this->serializer->removeNode( $fakeElement, $pos ); + } + return $node; + } + + /** + * Find the ancestor of $node which is a child of a p-wrapper, and + * reparent that node so that it is placed after the end of the p-wrapper + */ + private function disablePWrapper( SerializerNode $node, $sourceStart ) { + $nodeData = $node->snData; + $pWrapNode = $nodeData->ancestorPNode; + $newParent = $this->serializer->getParentNode( $pWrapNode ); + if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) { + // Fostering or something? Abort! + return; + } + + $nextParent = $node; + do { + $victim = $nextParent; + $victim->snData->ancestorPNode = null; + $nextParent = $this->serializer->getParentNode( $victim ); + } while ( $nextParent !== $pWrapNode ); + + // Make a fake Element to use in a reparenting operation + $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs ); + $victimElement->userData = $victim; + + // Reparent + $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement, + false, $sourceStart, 0 ); + + // Decrement nonblank node count + $pWrapNode->snData->nonblankNodeCount--; + + // Cancel the diversion so that no more elements are inserted under this p-wrap + $newParent->snData->childPElement = null; + } + + public function endTag( Element $element, $sourceStart, $sourceLength ) { + $this->serializer->endTag( $element, $sourceStart, $sourceLength ); + } + + public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) { + $this->serializer->doctype( $name, $public, $system, $quirks, + $sourceStart, $sourceLength ); + } + + public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) { + list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement ); + $this->serializer->comment( $preposition, $refNode, $text, + $sourceStart, $sourceLength ); + } + + public function error( $text, $pos ) { + $this->serializer->error( $text, $pos ); + } + + public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) { + $this->serializer->mergeAttributes( $element, $attrs, $sourceStart ); + } + + public function removeNode( Element $element, $sourceStart ) { + $this->serializer->removeNode( $element, $sourceStart ); + } + + public function reparentChildren( Element $element, Element $newParent, $sourceStart ) { + $self = $element->userData; + $children = $self->children; + $self->children = []; + $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 ); + $newParentNode = $newParent->userData; + $newParentId = $newParentNode->id; + foreach ( $children as $child ) { + if ( is_object( $child ) ) { + $child->parentId = $newParentId; + } + } + $newParentNode->children = $children; + } +} diff --git a/includes/tidy/RemexDriver.php b/includes/tidy/RemexDriver.php new file mode 100644 index 0000000000..e02af88fd9 --- /dev/null +++ b/includes/tidy/RemexDriver.php @@ -0,0 +1,57 @@ + false, + 'pwrap' => true + ]; + $this->trace = $config['treeMutationTrace']; + $this->pwrap = $config['pwrap']; + parent::__construct( $config ); + } + + public function tidy( $text ) { + $formatter = new RemexCompatFormatter; + $serializer = new Serializer( $formatter ); + if ( $this->pwrap ) { + $munger = new RemexCompatMunger( $serializer ); + } else { + $munger = $serializer; + } + if ( $this->trace ) { + $tracer = new TreeMutationTracer( $munger, function ( $msg ) { + wfDebug( "RemexHtml: $msg" ); + } ); + } else { + $tracer = $munger; + } + $treeBuilder = new TreeBuilder( $tracer, [ + 'ignoreErrors' => true, + 'ignoreNulls' => true, + ] ); + $dispatcher = new Dispatcher( $treeBuilder ); + $tokenizer = new Tokenizer( $dispatcher, $text, [ + 'ignoreErrors' => true, + 'ignoreCharRefs' => true, + 'ignoreNulls' => true, + 'skipPreprocess' => true, + ] ); + $tokenizer->execute( [ + 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML, + 'fragmentName' => 'body' + ] ); + return $serializer->getResult(); + } +} diff --git a/includes/tidy/RemexMungerData.php b/includes/tidy/RemexMungerData.php new file mode 100644 index 0000000000..d614a38183 --- /dev/null +++ b/includes/tidy/RemexMungerData.php @@ -0,0 +1,78 @@ +x

" + ], + [ + 'No p-wrap of blank node', + " ", + " " + ], + [ + 'p-wrap terminated by div', + "x
", + "

x

" + ], + [ + 'p-wrap not terminated by span', + "x", + "

x

" + ], + [ + 'An element is non-blank and so gets p-wrapped', + "", + "

" + ], + [ + 'The blank flag is set after a block-level element', + "
", + "
" + ], + [ + 'Blank detection between two block-level elements', + "
", + "
" + ], + [ + 'But p-wrapping of non-blank content works after an element', + "
x", + "

x

" + ], + [ + 'p-wrapping between two block-level elements', + "
x
", + "

x

" + ], + [ + 'p-wrap inside blockquote', + "
x
", + "

x

" + ], + [ + 'A comment is blank for p-wrapping purposes', + "", + "" + ], + [ + 'A comment is blank even when a p-wrap was opened by a text node', + " ", + " " + ], + [ + 'A comment does not open a p-wrap', + "x", + "

x

" + ], + [ + 'A comment does not close a p-wrap', + "x", + "

x

" + ], + [ + 'Empty li', + "
", + "
" + ], + [ + 'li with element', + "
", + "
" + ], + [ + 'li with text', + "
  • x
", + "
  • x
" + ], + [ + 'Empty tr', + "
", + "
" + ], + [ + 'Empty p', + "

\n

", + "

\n

" + ], + [ + 'No p-wrapping of an inline element which contains a block element (T150317)', + "
x
", + "
x
" + ], + [ + 'p-wrapping of an inline element which contains an inline element', + "x", + "

x

" + ], + [ + 'p-wrapping is enabled in a blockquote in an inline element', + "
x
", + "

x

" + ], + [ + 'All bare text should be p-wrapped even when surrounded by block tags', + "
x
y
z", + "

x

y

z

" + ], + [ + 'Split tag stack 1', + "x
y
z
", + "

x

y

z

" + ], + [ + 'Split tag stack 2', + "
y
z
", + "
y

z

" + ], + [ + 'Split tag stack 3', + "x
y
", + "

x

y
" + ], + [ + 'Split tag stack 4 (modified to use splittable tag)', + "abc
d
e
", + "

abc

d

e

" + ], + [ + "Split tag stack regression check 1", + "x
y
", + "

x

y
" + ], + [ + "Split tag stack regression check 2 (modified to use splittable tag)", + "a
d
e
", + "

a

d

e

" + ], + // Simple tests from pwrap.js + [ + 'Simple pwrap test 1', + 'a', + '

a

' + ], + [ + ' is not a splittable tag, but gets p-wrapped in simple wrapping scenarios', + 'a', + '

a

' + ], + [ + 'Simple pwrap test 3', + 'x
a
b
y', + '

x

a
b

y

' + ], + [ + 'Simple pwrap test 4', + 'x
a
b
y', + '

x

a
b

y

' + ], + // Complex tests from pwrap.js + [ + 'Complex pwrap test 1', + 'x
a
y
', + '

x

a

y

' + ], + [ + 'Complex pwrap test 2', + 'abc
d
e
f', + '

abc

d

ef

' + ], + [ + 'Complex pwrap test 3', + 'abc
d
e
', + '

abc

d

e

' + ], + [ + 'Complex pwrap test 4', + 'x
y
', + '

x

y
' + ], + [ + 'Complex pwrap test 5', + 'a
d
e
', + '

a

d

e

' + ], + [ + 'Complex pwrap test 6', + 'a
b
cd
e
f
g
', + // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong + // PHP 5 does not allow concatenation in initialisation of a class static variable + '

a

b

cd

e

fg

' + // @codingStandardsIgnoreEnd + ], + /* FIXME the second causes a stack split which clones the even + * though no

is actually generated + [ + 'Complex pwrap test 7', + '

x
y
z
', + '
x
y
z
' + ], + */ + // New local tests + [ + 'Blank text node after block end', + 'x
y
z
', + '

x

y

z

' + ], + [ + 'Text node fostering (FIXME: wrap missing)', + 'x
', + 'x
' + ], + [ + 'Blockquote fostering', + '
x
', + '

x

' + ], + [ + 'Block element fostering', + '
x', + '
x
' + ], + [ + 'Formatting element fostering (FIXME: wrap missing)', + 'x', + 'x
' + ], + [ + 'AAA clone of p-wrapped element (FIXME: empty b)', + 'x

yz

', + '

x

yz

', + ], + [ + 'AAA with fostering (FIXME: wrap missing)', + '1

23

', + '1

23

' + ], + ]; + + public function provider() { + return self::$remexTidyTestData; + } + + /** + * @dataProvider provider + * @covers MediaWiki\Tidy\RemexCompatFormatter + * @covers MediaWiki\Tidy\RemexCompatMunger + * @covers MediaWiki\Tidy\RemexDriver + * @covers MediaWiki\Tidy\RemexMungerData + */ + public function testTidy( $desc, $input, $expected ) { + $r = new MediaWiki\Tidy\RemexDriver( [] ); + $result = $r->tidy( $input ); + $this->assertEquals( $expected, $result, $desc ); + } + + public function html5libProvider() { + $files = json_decode( file_get_contents( __DIR__ . '/html5lib-tests.json' ), true ); + $tests = []; + foreach ( $files as $file => $fileTests ) { + foreach ( $fileTests as $i => $test ) { + $tests[] = [ "$file:$i", $test['data'] ]; + } + } + return $tests; + } + + /** + * This is a quick and dirty test to make sure none of the html5lib tests + * generate exceptions. We don't really know what the expected output is. + * + * @dataProvider html5libProvider + * @coversNothing + */ + public function testHtml5Lib( $desc, $input ) { + $r = new MediaWiki\Tidy\RemexDriver( [] ); + $result = $r->tidy( $input ); + $this->assertTrue( true, $desc ); + } +} -- 2.20.1