From 9341a00ed1b27edb86618cf7bbfaf8a9f720c124 Mon Sep 17 00:00:00 2001
From: Tim Starling
Date: Fri, 17 Feb 2017 15:10:15 +1100
Subject: [PATCH] RemexHtml tidy driver with p-wrapping
Pull in the RemexHtml library, which is an HTML 5 library I recently
created.
RemexCompatMunger mutates the event stream, inserting
elements where necessary, and occasionally taking even more invasive
action such as reparenting and removing nodes maintained in Serializer's
tree.
RemexCompatFormatter produces a MediaWiki-style serialization which is
relatively compatible with existing parser tests. It also does final
empty element handling, including translating to
Tests are imported from both Html5Depurate and Subbu's pwrap.js.
Depends-On: I864f31d9afdffdde49bfd39f07a0fb7f4df5c5d9
Change-Id: I900155b7dd199b0ae2a3b9cdb6db5136fc4f35a8
---
autoload.php | 4 +
composer.json | 1 +
includes/tidy/RemexCompatFormatter.php | 71 +++
includes/tidy/RemexCompatMunger.php | 468 ++++++++++++++++++
includes/tidy/RemexDriver.php | 57 +++
includes/tidy/RemexMungerData.php | 78 +++
.../phpunit/includes/tidy/RemexDriverTest.php | 297 +++++++++++
7 files changed, 976 insertions(+)
create mode 100644 includes/tidy/RemexCompatFormatter.php
create mode 100644 includes/tidy/RemexCompatMunger.php
create mode 100644 includes/tidy/RemexDriver.php
create mode 100644 includes/tidy/RemexMungerData.php
create mode 100644 tests/phpunit/includes/tidy/RemexDriverTest.php
diff --git a/autoload.php b/autoload.php
index 0e719ae5b5..5cf9b2e494 100644
--- a/autoload.php
+++ b/autoload.php
@@ -914,6 +914,10 @@ $wgAutoloadLocalClasses = [
'MediaWiki\\Tidy\\RaggettInternalHHVM' => __DIR__ . '/includes/tidy/RaggettInternalHHVM.php',
'MediaWiki\\Tidy\\RaggettInternalPHP' => __DIR__ . '/includes/tidy/RaggettInternalPHP.php',
'MediaWiki\\Tidy\\RaggettWrapper' => __DIR__ . '/includes/tidy/RaggettWrapper.php',
+ 'MediaWiki\\Tidy\\RemexCompatFormatter' => __DIR__ . '/includes/tidy/RemexCompatFormatter.php',
+ 'MediaWiki\\Tidy\\RemexCompatMunger' => __DIR__ . '/includes/tidy/RemexCompatMunger.php',
+ 'MediaWiki\\Tidy\\RemexDriver' => __DIR__ . '/includes/tidy/RemexDriver.php',
+ 'MediaWiki\\Tidy\\RemexMungerData' => __DIR__ . '/includes/tidy/RemexMungerData.php',
'MediaWiki\\Tidy\\TidyDriverBase' => __DIR__ . '/includes/tidy/TidyDriverBase.php',
'MediaWiki\\Widget\\ComplexNamespaceInputWidget' => __DIR__ . '/includes/widget/ComplexNamespaceInputWidget.php',
'MediaWiki\\Widget\\ComplexTitleInputWidget' => __DIR__ . '/includes/widget/ComplexTitleInputWidget.php',
diff --git a/composer.json b/composer.json
index d41492ef6b..17abc59c69 100644
--- a/composer.json
+++ b/composer.json
@@ -38,6 +38,7 @@
"wikimedia/ip-set": "1.1.0",
"wikimedia/php-session-serializer": "1.0.4",
"wikimedia/relpath": "1.0.3",
+ "wikimedia/remex-html": "1.0.0",
"wikimedia/running-stat": "1.1.0",
"wikimedia/scoped-callback": "1.0.0",
"wikimedia/utfnormal": "1.1.0",
diff --git a/includes/tidy/RemexCompatFormatter.php b/includes/tidy/RemexCompatFormatter.php
new file mode 100644
index 0000000000..3dc727bc89
--- /dev/null
+++ b/includes/tidy/RemexCompatFormatter.php
@@ -0,0 +1,71 @@
+ true,
+ 'p' => true,
+ 'tr' => true,
+ ];
+
+ public function __construct( $options = [] ) {
+ parent::__construct( $options );
+ $this->attributeEscapes["\xc2\xa0"] = ' ';
+ unset( $this->attributeEscapes["&"] );
+ $this->textEscapes["\xc2\xa0"] = ' ';
+ unset( $this->textEscapes["&"] );
+ }
+
+ public function startDocument( $fragmentNamespace, $fragmentName ) {
+ return '';
+ }
+
+ public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
+ $data = $node->snData;
+ if ( $data && $data->isPWrapper ) {
+ if ( $data->nonblankNodeCount ) {
+ return "
$contents
";
+ } else {
+ return $contents;
+ }
+ }
+
+ $name = $node->name;
+ $attrs = $node->attrs;
+ if ( isset( self::$markedEmptyElements[$name] ) && $attrs->count() === 0 ) {
+ if ( strspn( $contents, "\t\n\f\r " ) === strlen( $contents ) ) {
+ return "<{$name} class=\"mw-empty-elt\">$contents{$name}>";
+ }
+ }
+
+ $s = "<$name";
+ foreach ( $attrs->getValues() as $attrName => $attrValue ) {
+ $encValue = strtr( $attrValue, $this->attributeEscapes );
+ $s .= " $attrName=\"$encValue\"";
+ }
+ if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
+ $s .= ' />';
+ return $s;
+ }
+
+ $s .= '>';
+ if ( $node->namespace === HTMLData::NS_HTML
+ && isset( $contents[0] ) && $contents[0] === "\n"
+ && isset( $this->prefixLfElements[$name] )
+ ) {
+ $s .= "\n$contents$name>";
+ } else {
+ $s .= "$contents$name>";
+ }
+ return $s;
+ }
+}
diff --git a/includes/tidy/RemexCompatMunger.php b/includes/tidy/RemexCompatMunger.php
new file mode 100644
index 0000000000..d5f5c281c2
--- /dev/null
+++ b/includes/tidy/RemexCompatMunger.php
@@ -0,0 +1,468 @@
+ true,
+ "abbr" => true,
+ "acronym" => true,
+ "applet" => true,
+ "b" => true,
+ "basefont" => true,
+ "bdo" => true,
+ "big" => true,
+ "br" => true,
+ "button" => true,
+ "cite" => true,
+ "code" => true,
+ "dfn" => true,
+ "em" => true,
+ "font" => true,
+ "i" => true,
+ "iframe" => true,
+ "img" => true,
+ "input" => true,
+ "kbd" => true,
+ "label" => true,
+ "legend" => true,
+ "map" => true,
+ "object" => true,
+ "param" => true,
+ "q" => true,
+ "rb" => true,
+ "rbc" => true,
+ "rp" => true,
+ "rt" => true,
+ "rtc" => true,
+ "ruby" => true,
+ "s" => true,
+ "samp" => true,
+ "select" => true,
+ "small" => true,
+ "span" => true,
+ "strike" => true,
+ "strong" => true,
+ "sub" => true,
+ "sup" => true,
+ "textarea" => true,
+ "tt" => true,
+ "u" => true,
+ "var" => true,
+ ];
+
+ private static $formattingElements = [
+ 'a' => true,
+ 'b' => true,
+ 'big' => true,
+ 'code' => true,
+ 'em' => true,
+ 'font' => true,
+ 'i' => true,
+ 'nobr' => true,
+ 's' => true,
+ 'small' => true,
+ 'strike' => true,
+ 'strong' => true,
+ 'tt' => true,
+ 'u' => true,
+ ];
+
+ /**
+ * Constructor
+ *
+ * @param Serializer $serializer
+ */
+ public function __construct( Serializer $serializer ) {
+ $this->serializer = $serializer;
+ }
+
+ public function startDocument( $fragmentNamespace, $fragmentName ) {
+ $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
+ $root = $this->serializer->getRootNode();
+ $root->snData = new RemexMungerData;
+ $root->snData->needsPWrapping = true;
+ }
+
+ public function endDocument( $pos ) {
+ $this->serializer->endDocument( $pos );
+ }
+
+ private function getParentForInsert( $preposition, $refElement ) {
+ if ( $preposition === TreeBuilder::ROOT ) {
+ return [ $this->serializer->getRootNode(), null ];
+ } elseif ( $preposition === TreeBuilder::BEFORE ) {
+ $refNode = $refElement->userData;
+ return [ $this->serializer->getParentNode( $refNode ), $refNode ];
+ } else {
+ $refNode = $refElement->userData;
+ $refData = $refNode->snData;
+ if ( $refData->currentCloneElement ) {
+ // Follow a chain of clone links if necessary
+ $origRefData = $refData;
+ while ( $refData->currentCloneElement ) {
+ $refElement = $refData->currentCloneElement;
+ $refNode = $refElement->userData;
+ $refData = $refNode->snData;
+ }
+ // Cache the end of the chain in the requested element
+ $origRefData->currentCloneElement = $refElement;
+ } elseif ( $refData->childPElement ) {
+ $refElement = $refData->childPElement;
+ $refNode = $refElement->userData;
+ }
+ return [ $refNode, $refNode ];
+ }
+ }
+
+ /**
+ * Insert a p-wrapper
+ *
+ * @param SerializerNode $parent
+ * @param integer $sourceStart
+ * @return SerializerNode
+ */
+ private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
+ $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
+ $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
+ $sourceStart, 0 );
+ $data = new RemexMungerData;
+ $data->isPWrapper = true;
+ $data->wrapBaseNode = $parent;
+ $pWrap->userData->snData = $data;
+ $parent->snData->childPElement = $pWrap;
+ return $pWrap->userData;
+ }
+
+ public function characters( $preposition, $refElement, $text, $start, $length,
+ $sourceStart, $sourceLength
+ ) {
+ $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
+
+ list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
+ $parentData = $parent->snData;
+
+ if ( $preposition === TreeBuilder::UNDER ) {
+ if ( $parentData->needsPWrapping && !$isBlank ) {
+ // Add a p-wrapper for bare text under body/blockquote
+ $refNode = $this->insertPWrapper( $refNode, $sourceStart );
+ $parent = $refNode;
+ $parentData = $parent->snData;
+ } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
+ // The parent is splittable and in block mode, so split the tag stack
+ $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
+ $parent = $refNode;
+ $parentData = $parent->snData;
+ }
+ }
+
+ if ( !$isBlank ) {
+ // Non-whitespace characters detected
+ $parentData->nonblankNodeCount++;
+ }
+ $this->serializer->characters( $preposition, $refNode, $text, $start,
+ $length, $sourceStart, $sourceLength );
+ }
+
+ /**
+ * Insert or reparent an element. Create p-wrappers or split the tag stack
+ * as necessary.
+ *
+ * Consider the following insertion locations. The parent may be:
+ *
+ * - A: A body or blockquote (!!needsPWrapping)
+ * - B: A p-wrapper (!!isPWrapper)
+ * - C: A descendant of a p-wrapper (!!ancestorPNode)
+ * - CS: With splittable formatting elements in the stack region up to
+ * the p-wrapper
+ * - CU: With one or more unsplittable elements in the stack region up
+ * to the p-wrapper
+ * - D: Not a descendant of a p-wrapper (!ancestorNode)
+ * - DS: With splittable formatting elements in the stack region up to
+ * the body or blockquote
+ * - DU: With one or more unsplittable elements in the stack region up
+ * to the body or blockquote
+ *
+ * And consider that we may insert two types of element:
+ * - b: block
+ * - i: inline
+ *
+ * We handle the insertion as follows:
+ *
+ * - A/i: Create a p-wrapper, insert under it
+ * - A/b: Insert as normal
+ * - B/i: Insert as normal
+ * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
+ * base) instead)
+ * - C/i: Insert as normal
+ * - CS/b: Split the tag stack, insert the block under cloned formatting
+ * elements which have the wrap base (the parent of the p-wrap) as
+ * their ultimate parent.
+ * - CU/b: Disable the p-wrap, by reparenting the currently open child
+ * of the p-wrap under the p-wrap's parent. Then insert the block as
+ * normal.
+ * - D/b: Insert as normal
+ * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
+ * parent of the formatting elements thus cloned. The parent of the
+ * p-wrapper is the body or blockquote.
+ * - DU/i: Insert as normal
+ *
+ * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
+ * normal, the full algorithm is not followed.
+ *
+ * @param integer $preposition
+ * @param Element|SerializerNode|null $refElement
+ * @param Element $element
+ * @param bool $void
+ * @param integer $sourceStart
+ * @param integer $sourceLength
+ */
+
+ public function insertElement( $preposition, $refElement, Element $element, $void,
+ $sourceStart, $sourceLength
+ ) {
+ list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
+ $parentData = $parent->snData;
+ $parentNs = $parent->namespace;
+ $parentName = $parent->name;
+ $elementName = $element->htmlName;
+
+ $inline = isset( self::$onlyInlineElements[$elementName] );
+ $under = $preposition === TreeBuilder::UNDER;
+
+ if ( $under && $parentData->isPWrapper && !$inline ) {
+ // [B/b] The element is non-inline and the parent is a p-wrapper,
+ // close the parent and insert into its parent instead
+ $newParent = $this->serializer->getParentNode( $parent );
+ $parent = $newParent;
+ $parentData = $parent->snData;
+ $parentData->childPElement = null;
+ $newRef = $refElement->userData;
+ // FIXME cannot call endTag() since we don't have an Element
+ } elseif ( $under && $parentData->isSplittable
+ && (bool)$parentData->ancestorPNode !== $inline
+ ) {
+ // [CS/b, DS/i] The parent is splittable and the current element is
+ // inline in block context, or if the current element is a block
+ // under a p-wrapper, split the tag stack.
+ $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
+ $parent = $newRef;
+ $parentData = $parent->snData;
+ } elseif ( $under && $parentData->needsPWrapping && $inline ) {
+ // [A/i] If the element is inline and we are in body/blockquote,
+ // we need to create a p-wrapper
+ $newRef = $this->insertPWrapper( $newRef, $sourceStart );
+ $parent = $newRef;
+ $parentData = $parent->snData;
+ } elseif ( $parentData->ancestorPNode && !$inline ) {
+ // [CU/b] If the element is non-inline and (despite attempting to
+ // split above) there is still an ancestor p-wrap, disable that
+ // p-wrap
+ $this->disablePWrapper( $parent, $sourceStart );
+ }
+ // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
+
+ // An element with element children is a non-blank element
+ $parentData->nonblankNodeCount++;
+
+ // Insert the element downstream and so initialise its userData
+ $this->serializer->insertElement( $preposition, $newRef,
+ $element, $void, $sourceStart, $sourceLength );
+
+ // Initialise snData
+ if ( !$element->userData->snData ) {
+ $elementData = $element->userData->snData = new RemexMungerData;
+ } else {
+ $elementData = $element->userData->snData;
+ }
+ if ( ( $parentData->isPWrapper || $parentData->isSplittable )
+ && isset( self::$formattingElements[$elementName] )
+ ) {
+ $elementData->isSplittable = true;
+ }
+ if ( $parentData->isPWrapper ) {
+ $elementData->ancestorPNode = $parent;
+ } elseif ( $parentData->ancestorPNode ) {
+ $elementData->ancestorPNode = $parentData->ancestorPNode;
+ }
+ if ( $parentData->wrapBaseNode ) {
+ $elementData->wrapBaseNode = $parentData->wrapBaseNode;
+ } elseif ( $parentData->needsPWrapping ) {
+ $elementData->wrapBaseNode = $parent;
+ }
+ if ( $elementName === 'body'
+ || $elementName === 'blockquote'
+ || $elementName === 'html'
+ ) {
+ $elementData->needsPWrapping = true;
+ }
+ }
+
+ /**
+ * Clone nodes in a stack range and return the new parent
+ *
+ * @param SerializerNode $parentNode
+ * @param bool $inline
+ * @param integer $pos The source position
+ * @return SerializerNode
+ */
+ private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
+ $parentData = $parentNode->snData;
+ $wrapBase = $parentData->wrapBaseNode;
+ $pWrap = $parentData->ancestorPNode;
+ if ( !$pWrap ) {
+ $cloneEnd = $wrapBase;
+ } else {
+ $cloneEnd = $parentData->ancestorPNode;
+ }
+
+ $serializer = $this->serializer;
+ $node = $parentNode;
+ $root = $serializer->getRootNode();
+ $nodes = [];
+ $removableNodes = [];
+ $haveContent = false;
+ while ( $node !== $cloneEnd ) {
+ $nextParent = $serializer->getParentNode( $node );
+ if ( $nextParent === $root ) {
+ throw new \Exception( 'Did not find end of clone range' );
+ }
+ $nodes[] = $node;
+ if ( $node->snData->nonblankNodeCount === 0 ) {
+ $removableNodes[] = $node;
+ $nextParent->snData->nonblankNodeCount--;
+ }
+ $node = $nextParent;
+ }
+
+ if ( $inline ) {
+ $pWrap = $this->insertPWrapper( $wrapBase, $pos );
+ $node = $pWrap;
+ } else {
+ if ( $pWrap ) {
+ // End the p-wrap which was open, cancel the diversion
+ $wrapBase->snData->childPElement = null;
+ }
+ $pWrap = null;
+ $node = $wrapBase;
+ }
+
+ for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
+ $oldNode = $nodes[$i];
+ $oldData = $oldNode->snData;
+ $nodeParent = $node;
+ $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
+ $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
+ $element, false, $pos, 0 );
+ $oldData->currentCloneElement = $element;
+
+ $newNode = $element->userData;
+ $newData = $newNode->snData = new RemexMungerData;
+ if ( $pWrap ) {
+ $newData->ancestorPNode = $pWrap;
+ }
+ $newData->isSplittable = true;
+ $newData->wrapBaseNode = $wrapBase;
+ $newData->isPWrapper = $oldData->isPWrapper;
+
+ $nodeParent->snData->nonblankNodeCount++;
+
+ $node = $newNode;
+ }
+ foreach ( $removableNodes as $rNode ) {
+ $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
+ $fakeElement->userData = $rNode;
+ $this->serializer->removeNode( $fakeElement, $pos );
+ }
+ return $node;
+ }
+
+ /**
+ * Find the ancestor of $node which is a child of a p-wrapper, and
+ * reparent that node so that it is placed after the end of the p-wrapper
+ */
+ private function disablePWrapper( SerializerNode $node, $sourceStart ) {
+ $nodeData = $node->snData;
+ $pWrapNode = $nodeData->ancestorPNode;
+ $newParent = $this->serializer->getParentNode( $pWrapNode );
+ if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
+ // Fostering or something? Abort!
+ return;
+ }
+
+ $nextParent = $node;
+ do {
+ $victim = $nextParent;
+ $victim->snData->ancestorPNode = null;
+ $nextParent = $this->serializer->getParentNode( $victim );
+ } while ( $nextParent !== $pWrapNode );
+
+ // Make a fake Element to use in a reparenting operation
+ $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
+ $victimElement->userData = $victim;
+
+ // Reparent
+ $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
+ false, $sourceStart, 0 );
+
+ // Decrement nonblank node count
+ $pWrapNode->snData->nonblankNodeCount--;
+
+ // Cancel the diversion so that no more elements are inserted under this p-wrap
+ $newParent->snData->childPElement = null;
+ }
+
+ public function endTag( Element $element, $sourceStart, $sourceLength ) {
+ $this->serializer->endTag( $element, $sourceStart, $sourceLength );
+ }
+
+ public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
+ $this->serializer->doctype( $name, $public, $system, $quirks,
+ $sourceStart, $sourceLength );
+ }
+
+ public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
+ list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
+ $this->serializer->comment( $preposition, $refNode, $text,
+ $sourceStart, $sourceLength );
+ }
+
+ public function error( $text, $pos ) {
+ $this->serializer->error( $text, $pos );
+ }
+
+ public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
+ $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
+ }
+
+ public function removeNode( Element $element, $sourceStart ) {
+ $this->serializer->removeNode( $element, $sourceStart );
+ }
+
+ public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
+ $self = $element->userData;
+ $children = $self->children;
+ $self->children = [];
+ $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
+ $newParentNode = $newParent->userData;
+ $newParentId = $newParentNode->id;
+ foreach ( $children as $child ) {
+ if ( is_object( $child ) ) {
+ $child->parentId = $newParentId;
+ }
+ }
+ $newParentNode->children = $children;
+ }
+}
diff --git a/includes/tidy/RemexDriver.php b/includes/tidy/RemexDriver.php
new file mode 100644
index 0000000000..e02af88fd9
--- /dev/null
+++ b/includes/tidy/RemexDriver.php
@@ -0,0 +1,57 @@
+ false,
+ 'pwrap' => true
+ ];
+ $this->trace = $config['treeMutationTrace'];
+ $this->pwrap = $config['pwrap'];
+ parent::__construct( $config );
+ }
+
+ public function tidy( $text ) {
+ $formatter = new RemexCompatFormatter;
+ $serializer = new Serializer( $formatter );
+ if ( $this->pwrap ) {
+ $munger = new RemexCompatMunger( $serializer );
+ } else {
+ $munger = $serializer;
+ }
+ if ( $this->trace ) {
+ $tracer = new TreeMutationTracer( $munger, function ( $msg ) {
+ wfDebug( "RemexHtml: $msg" );
+ } );
+ } else {
+ $tracer = $munger;
+ }
+ $treeBuilder = new TreeBuilder( $tracer, [
+ 'ignoreErrors' => true,
+ 'ignoreNulls' => true,
+ ] );
+ $dispatcher = new Dispatcher( $treeBuilder );
+ $tokenizer = new Tokenizer( $dispatcher, $text, [
+ 'ignoreErrors' => true,
+ 'ignoreCharRefs' => true,
+ 'ignoreNulls' => true,
+ 'skipPreprocess' => true,
+ ] );
+ $tokenizer->execute( [
+ 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
+ 'fragmentName' => 'body'
+ ] );
+ return $serializer->getResult();
+ }
+}
diff --git a/includes/tidy/RemexMungerData.php b/includes/tidy/RemexMungerData.php
new file mode 100644
index 0000000000..d614a38183
--- /dev/null
+++ b/includes/tidy/RemexMungerData.php
@@ -0,0 +1,78 @@
+x
"
+ ],
+ [
+ 'No p-wrap of blank node',
+ " ",
+ " "
+ ],
+ [
+ 'p-wrap terminated by div',
+ "x",
+ "x
"
+ ],
+ [
+ 'p-wrap not terminated by span',
+ "x",
+ "x
"
+ ],
+ [
+ 'An element is non-blank and so gets p-wrapped',
+ "",
+ "
"
+ ],
+ [
+ 'The blank flag is set after a block-level element',
+ " ",
+ " "
+ ],
+ [
+ 'Blank detection between two block-level elements',
+ " ",
+ " "
+ ],
+ [
+ 'But p-wrapping of non-blank content works after an element',
+ "x",
+ "x
"
+ ],
+ [
+ 'p-wrapping between two block-level elements',
+ "x",
+ "x
"
+ ],
+ [
+ 'p-wrap inside blockquote',
+ "x
",
+ "x
"
+ ],
+ [
+ 'A comment is blank for p-wrapping purposes',
+ "",
+ ""
+ ],
+ [
+ 'A comment is blank even when a p-wrap was opened by a text node',
+ " ",
+ " "
+ ],
+ [
+ 'A comment does not open a p-wrap',
+ "x",
+ "x
"
+ ],
+ [
+ 'A comment does not close a p-wrap',
+ "x",
+ "x
"
+ ],
+ [
+ 'Empty li',
+ "",
+ ""
+ ],
+ [
+ 'li with element',
+ "",
+ ""
+ ],
+ [
+ 'li with text',
+ "",
+ ""
+ ],
+ [
+ 'Empty tr',
+ "",
+ ""
+ ],
+ [
+ 'Empty p',
+ "\n
",
+ "\n
"
+ ],
+ [
+ 'No p-wrapping of an inline element which contains a block element (T150317)',
+ "x
",
+ "x
"
+ ],
+ [
+ 'p-wrapping of an inline element which contains an inline element',
+ "x",
+ "x
"
+ ],
+ [
+ 'p-wrapping is enabled in a blockquote in an inline element',
+ "x
",
+ "x
"
+ ],
+ [
+ 'All bare text should be p-wrapped even when surrounded by block tags',
+ "x
yz",
+ "x
y
z
"
+ ],
+ [
+ 'Split tag stack 1',
+ "xy
z",
+ "x
y
z
"
+ ],
+ [
+ 'Split tag stack 2',
+ "y
z",
+ "y
z
"
+ ],
+ [
+ 'Split tag stack 3',
+ "xy
",
+ "x
y
"
+ ],
+ [
+ 'Split tag stack 4 (modified to use splittable tag)',
+ "abcd
e
",
+ "abc
d
e
"
+ ],
+ [
+ "Split tag stack regression check 1",
+ "xy
",
+ "x
y
"
+ ],
+ [
+ "Split tag stack regression check 2 (modified to use splittable tag)",
+ "ad
e
",
+ "a
d
e
"
+ ],
+ // Simple tests from pwrap.js
+ [
+ 'Simple pwrap test 1',
+ 'a',
+ 'a
'
+ ],
+ [
+ ' is not a splittable tag, but gets p-wrapped in simple wrapping scenarios',
+ 'a',
+ 'a
'
+ ],
+ [
+ 'Simple pwrap test 3',
+ 'x a
b
y',
+ 'x
a
b
y
'
+ ],
+ [
+ 'Simple pwrap test 4',
+ 'x a
b
y',
+ 'x
a
b
y
'
+ ],
+ // Complex tests from pwrap.js
+ [
+ 'Complex pwrap test 1',
+ 'xa
y',
+ 'x
a
y
'
+ ],
+ [
+ 'Complex pwrap test 2',
+ 'abcd
ef',
+ 'abc
d
ef
'
+ ],
+ [
+ 'Complex pwrap test 3',
+ 'abcd
e',
+ 'abc
d
e
'
+ ],
+ [
+ 'Complex pwrap test 4',
+ 'xy
',
+ 'x
y
'
+ ],
+ [
+ 'Complex pwrap test 5',
+ 'ad
e',
+ 'a
d
e
'
+ ],
+ [
+ 'Complex pwrap test 6',
+ 'ab
cde
fg',
+ // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
+ // PHP 5 does not allow concatenation in initialisation of a class static variable
+ 'a
b
cd
e
fg
'
+ // @codingStandardsIgnoreEnd
+ ],
+ /* FIXME the second causes a stack split which clones the even
+ * though no is actually generated
+ [
+ 'Complex pwrap test 7',
+ 'x
y
z
',
+ 'x
y
z
'
+ ],
+ */
+ // New local tests
+ [
+ 'Blank text node after block end',
+ 'xy
z',
+ 'x
y
z
'
+ ],
+ [
+ 'Text node fostering (FIXME: wrap missing)',
+ '',
+ 'x'
+ ],
+ [
+ 'Blockquote fostering',
+ '',
+ 'x
'
+ ],
+ [
+ 'Block element fostering',
+ 'x',
+ '
x
'
+ ],
+ [
+ 'Formatting element fostering (FIXME: wrap missing)',
+ '
x',
+ 'x'
+ ],
+ [
+ 'AAA clone of p-wrapped element (FIXME: empty b)',
+ 'xyz
',
+ 'x
yz
',
+ ],
+ [
+ 'AAA with fostering (FIXME: wrap missing)',
+ '123
',
+ '123
'
+ ],
+ ];
+
+ public function provider() {
+ return self::$remexTidyTestData;
+ }
+
+ /**
+ * @dataProvider provider
+ * @covers MediaWiki\Tidy\RemexCompatFormatter
+ * @covers MediaWiki\Tidy\RemexCompatMunger
+ * @covers MediaWiki\Tidy\RemexDriver
+ * @covers MediaWiki\Tidy\RemexMungerData
+ */
+ public function testTidy( $desc, $input, $expected ) {
+ $r = new MediaWiki\Tidy\RemexDriver( [] );
+ $result = $r->tidy( $input );
+ $this->assertEquals( $expected, $result, $desc );
+ }
+
+ public function html5libProvider() {
+ $files = json_decode( file_get_contents( __DIR__ . '/html5lib-tests.json' ), true );
+ $tests = [];
+ foreach ( $files as $file => $fileTests ) {
+ foreach ( $fileTests as $i => $test ) {
+ $tests[] = [ "$file:$i", $test['data'] ];
+ }
+ }
+ return $tests;
+ }
+
+ /**
+ * This is a quick and dirty test to make sure none of the html5lib tests
+ * generate exceptions. We don't really know what the expected output is.
+ *
+ * @dataProvider html5libProvider
+ * @coversNothing
+ */
+ public function testHtml5Lib( $desc, $input ) {
+ $r = new MediaWiki\Tidy\RemexDriver( [] );
+ $result = $r->tidy( $input );
+ $this->assertTrue( true, $desc );
+ }
+}
--
2.20.1