3 namespace MediaWiki\Tidy
;
5 use RemexHtml\HTMLData
;
6 use RemexHtml\Serializer\Serializer
;
7 use RemexHtml\Serializer\SerializerNode
;
8 use RemexHtml\Tokenizer\Attributes
;
9 use RemexHtml\Tokenizer\PlainAttributes
;
10 use RemexHtml\TreeBuilder\TreeBuilder
;
11 use RemexHtml\TreeBuilder\TreeHandler
;
12 use RemexHtml\TreeBuilder\Element
;
17 class RemexCompatMunger
implements TreeHandler
{
18 private static $onlyInlineElements = [
66 private static $formattingElements = [
84 * @param Serializer $serializer
86 public function __construct( Serializer
$serializer ) {
87 $this->serializer
= $serializer;
90 public function startDocument( $fragmentNamespace, $fragmentName ) {
91 $this->serializer
->startDocument( $fragmentNamespace, $fragmentName );
92 $root = $this->serializer
->getRootNode();
93 $root->snData
= new RemexMungerData
;
94 $root->snData
->needsPWrapping
= true;
97 public function endDocument( $pos ) {
98 $this->serializer
->endDocument( $pos );
101 private function getParentForInsert( $preposition, $refElement ) {
102 if ( $preposition === TreeBuilder
::ROOT
) {
103 return [ $this->serializer
->getRootNode(), null ];
104 } elseif ( $preposition === TreeBuilder
::BEFORE
) {
105 $refNode = $refElement->userData
;
106 return [ $this->serializer
->getParentNode( $refNode ), $refNode ];
108 $refNode = $refElement->userData
;
109 $refData = $refNode->snData
;
110 if ( $refData->currentCloneElement
) {
111 // Follow a chain of clone links if necessary
112 $origRefData = $refData;
113 while ( $refData->currentCloneElement
) {
114 $refElement = $refData->currentCloneElement
;
115 $refNode = $refElement->userData
;
116 $refData = $refNode->snData
;
118 // Cache the end of the chain in the requested element
119 $origRefData->currentCloneElement
= $refElement;
120 } elseif ( $refData->childPElement
) {
121 $refElement = $refData->childPElement
;
122 $refNode = $refElement->userData
;
124 return [ $refNode, $refNode ];
131 * @param SerializerNode $parent
132 * @param int $sourceStart
133 * @return SerializerNode
135 private function insertPWrapper( SerializerNode
$parent, $sourceStart ) {
136 $pWrap = new Element( HTMLData
::NS_HTML
, 'mw:p-wrap', new PlainAttributes
);
137 $this->serializer
->insertElement( TreeBuilder
::UNDER
, $parent, $pWrap, false,
139 $data = new RemexMungerData
;
140 $data->isPWrapper
= true;
141 $data->wrapBaseNode
= $parent;
142 $pWrap->userData
->snData
= $data;
143 $parent->snData
->childPElement
= $pWrap;
144 return $pWrap->userData
;
147 public function characters( $preposition, $refElement, $text, $start, $length,
148 $sourceStart, $sourceLength
150 $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
152 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
153 $parentData = $parent->snData
;
155 if ( $preposition === TreeBuilder
::UNDER
) {
156 if ( $parentData->needsPWrapping
&& !$isBlank ) {
157 // Add a p-wrapper for bare text under body/blockquote
158 $refNode = $this->insertPWrapper( $refNode, $sourceStart );
160 $parentData = $parent->snData
;
161 } elseif ( $parentData->isSplittable
&& !$parentData->ancestorPNode
) {
162 // The parent is splittable and in block mode, so split the tag stack
163 $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
165 $parentData = $parent->snData
;
170 // Non-whitespace characters detected
171 $parentData->nonblankNodeCount++
;
173 $this->serializer
->characters( $preposition, $refNode, $text, $start,
174 $length, $sourceStart, $sourceLength );
178 * Insert or reparent an element. Create p-wrappers or split the tag stack
181 * Consider the following insertion locations. The parent may be:
183 * - A: A body or blockquote (!!needsPWrapping)
184 * - B: A p-wrapper (!!isPWrapper)
185 * - C: A descendant of a p-wrapper (!!ancestorPNode)
186 * - CS: With splittable formatting elements in the stack region up to
188 * - CU: With one or more unsplittable elements in the stack region up
190 * - D: Not a descendant of a p-wrapper (!ancestorNode)
191 * - DS: With splittable formatting elements in the stack region up to
192 * the body or blockquote
193 * - DU: With one or more unsplittable elements in the stack region up
194 * to the body or blockquote
196 * And consider that we may insert two types of element:
200 * We handle the insertion as follows:
202 * - A/i: Create a p-wrapper, insert under it
203 * - A/b: Insert as normal
204 * - B/i: Insert as normal
205 * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
207 * - C/i: Insert as normal
208 * - CS/b: Split the tag stack, insert the block under cloned formatting
209 * elements which have the wrap base (the parent of the p-wrap) as
210 * their ultimate parent.
211 * - CU/b: Disable the p-wrap, by reparenting the currently open child
212 * of the p-wrap under the p-wrap's parent. Then insert the block as
214 * - D/b: Insert as normal
215 * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
216 * parent of the formatting elements thus cloned. The parent of the
217 * p-wrapper is the body or blockquote.
218 * - DU/i: Insert as normal
220 * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
221 * normal, the full algorithm is not followed.
223 * @param int $preposition
224 * @param Element|SerializerNode|null $refElement
225 * @param Element $element
227 * @param int $sourceStart
228 * @param int $sourceLength
230 public function insertElement( $preposition, $refElement, Element
$element, $void,
231 $sourceStart, $sourceLength
233 list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
234 $parentData = $parent->snData
;
235 $parentNs = $parent->namespace;
236 $parentName = $parent->name
;
237 $elementName = $element->htmlName
;
239 $inline = isset( self
::$onlyInlineElements[$elementName] );
240 $under = $preposition === TreeBuilder
::UNDER
;
242 if ( $under && $parentData->isPWrapper
&& !$inline ) {
243 // [B/b] The element is non-inline and the parent is a p-wrapper,
244 // close the parent and insert into its parent instead
245 $newParent = $this->serializer
->getParentNode( $parent );
246 $parent = $newParent;
247 $parentData = $parent->snData
;
248 $pElement = $parentData->childPElement
;
249 $parentData->childPElement
= null;
250 $newRef = $refElement->userData
;
251 $this->endTag( $pElement, $sourceStart, 0 );
252 } elseif ( $under && $parentData->isSplittable
253 && (bool)$parentData->ancestorPNode
!== $inline
255 // [CS/b, DS/i] The parent is splittable and the current element is
256 // inline in block context, or if the current element is a block
257 // under a p-wrapper, split the tag stack.
258 $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
260 $parentData = $parent->snData
;
261 } elseif ( $under && $parentData->needsPWrapping
&& $inline ) {
262 // [A/i] If the element is inline and we are in body/blockquote,
263 // we need to create a p-wrapper
264 $newRef = $this->insertPWrapper( $newRef, $sourceStart );
266 $parentData = $parent->snData
;
267 } elseif ( $parentData->ancestorPNode
&& !$inline ) {
268 // [CU/b] If the element is non-inline and (despite attempting to
269 // split above) there is still an ancestor p-wrap, disable that
271 $this->disablePWrapper( $parent, $sourceStart );
273 // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
275 // An element with element children is a non-blank element
276 $parentData->nonblankNodeCount++
;
278 // Insert the element downstream and so initialise its userData
279 $this->serializer
->insertElement( $preposition, $newRef,
280 $element, $void, $sourceStart, $sourceLength );
283 if ( !$element->userData
->snData
) {
284 $elementData = $element->userData
->snData
= new RemexMungerData
;
286 $elementData = $element->userData
->snData
;
288 if ( ( $parentData->isPWrapper ||
$parentData->isSplittable
)
289 && isset( self
::$formattingElements[$elementName] )
291 $elementData->isSplittable
= true;
293 if ( $parentData->isPWrapper
) {
294 $elementData->ancestorPNode
= $parent;
295 } elseif ( $parentData->ancestorPNode
) {
296 $elementData->ancestorPNode
= $parentData->ancestorPNode
;
298 if ( $parentData->wrapBaseNode
) {
299 $elementData->wrapBaseNode
= $parentData->wrapBaseNode
;
300 } elseif ( $parentData->needsPWrapping
) {
301 $elementData->wrapBaseNode
= $parent;
303 if ( $elementName === 'body'
304 ||
$elementName === 'blockquote'
305 ||
$elementName === 'html'
307 $elementData->needsPWrapping
= true;
312 * Clone nodes in a stack range and return the new parent
314 * @param SerializerNode $parentNode
315 * @param bool $inline
316 * @param int $pos The source position
317 * @return SerializerNode
319 private function splitTagStack( SerializerNode
$parentNode, $inline, $pos ) {
320 $parentData = $parentNode->snData
;
321 $wrapBase = $parentData->wrapBaseNode
;
322 $pWrap = $parentData->ancestorPNode
;
324 $cloneEnd = $wrapBase;
326 $cloneEnd = $parentData->ancestorPNode
;
329 $serializer = $this->serializer
;
331 $root = $serializer->getRootNode();
333 $removableNodes = [];
334 $haveContent = false;
335 while ( $node !== $cloneEnd ) {
336 $nextParent = $serializer->getParentNode( $node );
337 if ( $nextParent === $root ) {
338 throw new \
Exception( 'Did not find end of clone range' );
341 if ( $node->snData
->nonblankNodeCount
=== 0 ) {
342 $removableNodes[] = $node;
343 $nextParent->snData
->nonblankNodeCount
--;
349 $pWrap = $this->insertPWrapper( $wrapBase, $pos );
353 // End the p-wrap which was open, cancel the diversion
354 $wrapBase->snData
->childPElement
= null;
360 for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
361 $oldNode = $nodes[$i];
362 $oldData = $oldNode->snData
;
364 $element = new Element( $oldNode->namespace, $oldNode->name
, $oldNode->attrs
);
365 $this->serializer
->insertElement( TreeBuilder
::UNDER
, $nodeParent,
366 $element, false, $pos, 0 );
367 $oldData->currentCloneElement
= $element;
369 $newNode = $element->userData
;
370 $newData = $newNode->snData
= new RemexMungerData
;
372 $newData->ancestorPNode
= $pWrap;
374 $newData->isSplittable
= true;
375 $newData->wrapBaseNode
= $wrapBase;
376 $newData->isPWrapper
= $oldData->isPWrapper
;
378 $nodeParent->snData
->nonblankNodeCount++
;
382 foreach ( $removableNodes as $rNode ) {
383 $fakeElement = new Element( $rNode->namespace, $rNode->name
, $rNode->attrs
);
384 $fakeElement->userData
= $rNode;
385 $this->serializer
->removeNode( $fakeElement, $pos );
391 * Find the ancestor of $node which is a child of a p-wrapper, and
392 * reparent that node so that it is placed after the end of the p-wrapper
394 private function disablePWrapper( SerializerNode
$node, $sourceStart ) {
395 $nodeData = $node->snData
;
396 $pWrapNode = $nodeData->ancestorPNode
;
397 $newParent = $this->serializer
->getParentNode( $pWrapNode );
398 if ( $pWrapNode !== $this->serializer
->getLastChild( $newParent ) ) {
399 // Fostering or something? Abort!
405 $victim = $nextParent;
406 $victim->snData
->ancestorPNode
= null;
407 $nextParent = $this->serializer
->getParentNode( $victim );
408 } while ( $nextParent !== $pWrapNode );
410 // Make a fake Element to use in a reparenting operation
411 $victimElement = new Element( $victim->namespace, $victim->name
, $victim->attrs
);
412 $victimElement->userData
= $victim;
415 $this->serializer
->insertElement( TreeBuilder
::UNDER
, $newParent, $victimElement,
416 false, $sourceStart, 0 );
418 // Decrement nonblank node count
419 $pWrapNode->snData
->nonblankNodeCount
--;
421 // Cancel the diversion so that no more elements are inserted under this p-wrap
422 $newParent->snData
->childPElement
= null;
425 public function endTag( Element
$element, $sourceStart, $sourceLength ) {
426 $data = $element->userData
->snData
;
427 if ( $data->childPElement
) {
428 $this->endTag( $data->childPElement
, $sourceStart, 0 );
430 $this->serializer
->endTag( $element, $sourceStart, $sourceLength );
431 $element->userData
->snData
= null;
432 $element->userData
= null;
435 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
436 $this->serializer
->doctype( $name, $public, $system, $quirks,
437 $sourceStart, $sourceLength );
440 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
441 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
442 $this->serializer
->comment( $preposition, $refNode, $text,
443 $sourceStart, $sourceLength );
446 public function error( $text, $pos ) {
447 $this->serializer
->error( $text, $pos );
450 public function mergeAttributes( Element
$element, Attributes
$attrs, $sourceStart ) {
451 $this->serializer
->mergeAttributes( $element, $attrs, $sourceStart );
454 public function removeNode( Element
$element, $sourceStart ) {
455 $this->serializer
->removeNode( $element, $sourceStart );
458 public function reparentChildren( Element
$element, Element
$newParent, $sourceStart ) {
459 $self = $element->userData
;
460 $children = $self->children
;
461 $self->children
= [];
462 $this->insertElement( TreeBuilder
::UNDER
, $element, $newParent, false, $sourceStart, 0 );
463 $newParentNode = $newParent->userData
;
464 $newParentId = $newParentNode->id
;
465 foreach ( $children as $child ) {
466 if ( is_object( $child ) ) {
467 $child->parentId
= $newParentId;
470 $newParentNode->children
= $children;