3 namespace MediaWiki\Tidy
;
5 use RemexHtml\HTMLData
;
6 use RemexHtml\Serializer\Serializer
;
7 use RemexHtml\Serializer\SerializerNode
;
8 use RemexHtml\Tokenizer\Attributes
;
9 use RemexHtml\Tokenizer\PlainAttributes
;
10 use RemexHtml\TreeBuilder\TreeBuilder
;
11 use RemexHtml\TreeBuilder\TreeHandler
;
12 use RemexHtml\TreeBuilder\Element
;
17 class RemexCompatMunger
implements TreeHandler
{
18 private static $onlyInlineElements = [
66 // Those defined in tidy.conf
76 * For the purposes of this class, "metadata" elements are those that
77 * should neither trigger p-wrapping nor stop an outer p-wrapping,
78 * typically those that are themselves invisible in a browser's rendering.
79 * This isn't a complete list, it's just the tags that we're likely to
80 * encounter in practice.
83 private static $metadataElements = [
90 private static $formattingElements = [
107 /** @var Serializer */
114 * @param Serializer $serializer
117 public function __construct( Serializer
$serializer, $trace = false ) {
118 $this->serializer
= $serializer;
119 $this->trace
= $trace;
122 public function startDocument( $fragmentNamespace, $fragmentName ) {
123 $this->serializer
->startDocument( $fragmentNamespace, $fragmentName );
124 $root = $this->serializer
->getRootNode();
125 $root->snData
= new RemexMungerData
;
126 $root->snData
->needsPWrapping
= true;
129 public function endDocument( $pos ) {
130 $this->serializer
->endDocument( $pos );
133 private function getParentForInsert( $preposition, $refElement ) {
134 if ( $preposition === TreeBuilder
::ROOT
) {
135 return [ $this->serializer
->getRootNode(), null ];
136 } elseif ( $preposition === TreeBuilder
::BEFORE
) {
137 $refNode = $refElement->userData
;
138 return [ $this->serializer
->getParentNode( $refNode ), $refNode ];
140 $refNode = $refElement->userData
;
141 $refData = $refNode->snData
;
142 if ( $refData->currentCloneElement
) {
143 // Follow a chain of clone links if necessary
144 $origRefData = $refData;
145 while ( $refData->currentCloneElement
) {
146 $refElement = $refData->currentCloneElement
;
147 $refNode = $refElement->userData
;
148 $refData = $refNode->snData
;
150 // Cache the end of the chain in the requested element
151 $origRefData->currentCloneElement
= $refElement;
152 } elseif ( $refData->childPElement
) {
153 $refElement = $refData->childPElement
;
154 $refNode = $refElement->userData
;
156 return [ $refNode, $refNode ];
163 * @param SerializerNode $parent
164 * @param int $sourceStart
165 * @return SerializerNode
167 private function insertPWrapper( SerializerNode
$parent, $sourceStart ) {
168 $pWrap = new Element( HTMLData
::NS_HTML
, 'mw:p-wrap', new PlainAttributes
);
169 $this->serializer
->insertElement( TreeBuilder
::UNDER
, $parent, $pWrap, false,
171 $data = new RemexMungerData
;
172 $data->isPWrapper
= true;
173 $data->wrapBaseNode
= $parent;
174 $pWrap->userData
->snData
= $data;
175 $parent->snData
->childPElement
= $pWrap;
176 return $pWrap->userData
;
179 public function characters( $preposition, $refElement, $text, $start, $length,
180 $sourceStart, $sourceLength
182 $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
184 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
185 $parentData = $parent->snData
;
187 if ( $preposition === TreeBuilder
::UNDER
) {
188 if ( $parentData->needsPWrapping
&& !$isBlank ) {
189 // Add a p-wrapper for bare text under body/blockquote
190 $refNode = $this->insertPWrapper( $refNode, $sourceStart );
192 $parentData = $parent->snData
;
193 } elseif ( $parentData->isSplittable
&& !$parentData->ancestorPNode
) {
194 // The parent is splittable and in block mode, so split the tag stack
195 $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
197 $parentData = $parent->snData
;
202 // Non-whitespace characters detected
203 $parentData->nonblankNodeCount++
;
205 $this->serializer
->characters( $preposition, $refNode, $text, $start,
206 $length, $sourceStart, $sourceLength );
209 private function trace( $msg ) {
210 if ( $this->trace
) {
211 wfDebug( "[RCM] $msg" );
216 * Insert or reparent an element. Create p-wrappers or split the tag stack
219 * Consider the following insertion locations. The parent may be:
221 * - A: A body or blockquote (!!needsPWrapping)
222 * - B: A p-wrapper (!!isPWrapper)
223 * - C: A descendant of a p-wrapper (!!ancestorPNode)
224 * - CS: With splittable formatting elements in the stack region up to
226 * - CU: With one or more unsplittable elements in the stack region up
228 * - D: Not a descendant of a p-wrapper (!ancestorNode)
229 * - DS: With splittable formatting elements in the stack region up to
230 * the body or blockquote
231 * - DU: With one or more unsplittable elements in the stack region up
232 * to the body or blockquote
234 * And consider that we may insert two types of element:
238 * We handle the insertion as follows:
240 * - A/i: Create a p-wrapper, insert under it
241 * - A/b: Insert as normal
242 * - B/i: Insert as normal
243 * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
245 * - C/i: Insert as normal
246 * - CS/b: Split the tag stack, insert the block under cloned formatting
247 * elements which have the wrap base (the parent of the p-wrap) as
248 * their ultimate parent.
249 * - CU/b: Disable the p-wrap, by reparenting the currently open child
250 * of the p-wrap under the p-wrap's parent. Then insert the block as
252 * - D/b: Insert as normal
253 * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
254 * parent of the formatting elements thus cloned. The parent of the
255 * p-wrapper is the body or blockquote.
256 * - DU/i: Insert as normal
258 * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
259 * normal, the full algorithm is not followed.
261 * @param int $preposition
262 * @param Element|SerializerNode|null $refElement
263 * @param Element $element
265 * @param int $sourceStart
266 * @param int $sourceLength
268 public function insertElement( $preposition, $refElement, Element
$element, $void,
269 $sourceStart, $sourceLength
271 list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
272 $parentData = $parent->snData
;
273 $elementName = $element->htmlName
;
275 $inline = isset( self
::$onlyInlineElements[$elementName] );
276 $under = $preposition === TreeBuilder
::UNDER
;
277 $elementToEnd = null;
279 if ( isset( self
::$metadataElements[$elementName] ) ) {
280 // The element is a metadata element, that we allow to appear in
281 // both inline and block contexts.
282 $this->trace( 'insert metadata' );
283 } elseif ( $under && $parentData->isPWrapper
&& !$inline ) {
284 // [B/b] The element is non-inline and the parent is a p-wrapper,
285 // close the parent and insert into its parent instead
286 $this->trace( 'insert B/b' );
287 $newParent = $this->serializer
->getParentNode( $parent );
288 $parent = $newParent;
289 $parentData = $parent->snData
;
290 $pElement = $parentData->childPElement
;
291 $parentData->childPElement
= null;
292 $newRef = $refElement->userData
;
293 } elseif ( $under && $parentData->isSplittable
294 && (bool)$parentData->ancestorPNode
!== $inline
296 // [CS/b, DS/i] The parent is splittable and the current element is
297 // inline in block context, or if the current element is a block
298 // under a p-wrapper, split the tag stack.
299 $this->trace( $inline ?
'insert DS/i' : 'insert CS/b' );
300 $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
302 $parentData = $parent->snData
;
303 } elseif ( $under && $parentData->needsPWrapping
&& $inline ) {
304 // [A/i] If the element is inline and we are in body/blockquote,
305 // we need to create a p-wrapper
306 $this->trace( 'insert A/i' );
307 $newRef = $this->insertPWrapper( $newRef, $sourceStart );
309 $parentData = $parent->snData
;
310 } elseif ( $parentData->ancestorPNode
&& !$inline ) {
311 // [CU/b] If the element is non-inline and (despite attempting to
312 // split above) there is still an ancestor p-wrap, disable that
314 $this->trace( 'insert CU/b' );
315 $this->disablePWrapper( $parent, $sourceStart );
317 // [A/b, B/i, C/i, D/b, DU/i] insert as normal
318 $this->trace( 'insert normal' );
321 // An element with element children is a non-blank element
322 $parentData->nonblankNodeCount++
;
324 // Insert the element downstream and so initialise its userData
325 $this->serializer
->insertElement( $preposition, $newRef,
326 $element, $void, $sourceStart, $sourceLength );
329 if ( !$element->userData
->snData
) {
330 $elementData = $element->userData
->snData
= new RemexMungerData
;
332 $elementData = $element->userData
->snData
;
334 if ( ( $parentData->isPWrapper ||
$parentData->isSplittable
)
335 && isset( self
::$formattingElements[$elementName] )
337 $elementData->isSplittable
= true;
339 if ( $parentData->isPWrapper
) {
340 $elementData->ancestorPNode
= $parent;
341 } elseif ( $parentData->ancestorPNode
) {
342 $elementData->ancestorPNode
= $parentData->ancestorPNode
;
344 if ( $parentData->wrapBaseNode
) {
345 $elementData->wrapBaseNode
= $parentData->wrapBaseNode
;
346 } elseif ( $parentData->needsPWrapping
) {
347 $elementData->wrapBaseNode
= $parent;
349 if ( $elementName === 'body'
350 ||
$elementName === 'blockquote'
351 ||
$elementName === 'html'
353 $elementData->needsPWrapping
= true;
358 * Clone nodes in a stack range and return the new parent
360 * @param SerializerNode $parentNode
361 * @param bool $inline
362 * @param int $pos The source position
363 * @return SerializerNode
365 private function splitTagStack( SerializerNode
$parentNode, $inline, $pos ) {
366 $parentData = $parentNode->snData
;
367 $wrapBase = $parentData->wrapBaseNode
;
368 $pWrap = $parentData->ancestorPNode
;
370 $cloneEnd = $wrapBase;
372 $cloneEnd = $parentData->ancestorPNode
;
375 $serializer = $this->serializer
;
377 $root = $serializer->getRootNode();
379 $removableNodes = [];
380 while ( $node !== $cloneEnd ) {
381 $nextParent = $serializer->getParentNode( $node );
382 if ( $nextParent === $root ) {
383 throw new \
Exception( 'Did not find end of clone range' );
386 if ( $node->snData
->nonblankNodeCount
=== 0 ) {
387 $removableNodes[] = $node;
388 $nextParent->snData
->nonblankNodeCount
--;
394 $pWrap = $this->insertPWrapper( $wrapBase, $pos );
398 // End the p-wrap which was open, cancel the diversion
399 $wrapBase->snData
->childPElement
= null;
405 for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
406 $oldNode = $nodes[$i];
407 $oldData = $oldNode->snData
;
409 $element = new Element( $oldNode->namespace, $oldNode->name
, $oldNode->attrs
);
410 $this->serializer
->insertElement( TreeBuilder
::UNDER
, $nodeParent,
411 $element, false, $pos, 0 );
412 $oldData->currentCloneElement
= $element;
414 $newNode = $element->userData
;
415 $newData = $newNode->snData
= new RemexMungerData
;
417 $newData->ancestorPNode
= $pWrap;
419 $newData->isSplittable
= true;
420 $newData->wrapBaseNode
= $wrapBase;
421 $newData->isPWrapper
= $oldData->isPWrapper
;
423 $nodeParent->snData
->nonblankNodeCount++
;
427 foreach ( $removableNodes as $rNode ) {
428 $fakeElement = new Element( $rNode->namespace, $rNode->name
, $rNode->attrs
);
429 $fakeElement->userData
= $rNode;
430 $this->serializer
->removeNode( $fakeElement, $pos );
436 * Find the ancestor of $node which is a child of a p-wrapper, and
437 * reparent that node so that it is placed after the end of the p-wrapper
439 private function disablePWrapper( SerializerNode
$node, $sourceStart ) {
440 $nodeData = $node->snData
;
441 $pWrapNode = $nodeData->ancestorPNode
;
442 $newParent = $this->serializer
->getParentNode( $pWrapNode );
443 if ( $pWrapNode !== $this->serializer
->getLastChild( $newParent ) ) {
444 // Fostering or something? Abort!
450 $victim = $nextParent;
451 $victim->snData
->ancestorPNode
= null;
452 $nextParent = $this->serializer
->getParentNode( $victim );
453 } while ( $nextParent !== $pWrapNode );
455 // Make a fake Element to use in a reparenting operation
456 $victimElement = new Element( $victim->namespace, $victim->name
, $victim->attrs
);
457 $victimElement->userData
= $victim;
460 $this->serializer
->insertElement( TreeBuilder
::UNDER
, $newParent, $victimElement,
461 false, $sourceStart, 0 );
463 // Decrement nonblank node count
464 $pWrapNode->snData
->nonblankNodeCount
--;
466 // Cancel the diversion so that no more elements are inserted under this p-wrap
467 $newParent->snData
->childPElement
= null;
470 public function endTag( Element
$element, $sourceStart, $sourceLength ) {
471 $data = $element->userData
->snData
;
472 if ( $data->childPElement
) {
473 $this->endTag( $data->childPElement
, $sourceStart, 0 );
475 $this->serializer
->endTag( $element, $sourceStart, $sourceLength );
476 $element->userData
->snData
= null;
477 $element->userData
= null;
480 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
481 $this->serializer
->doctype( $name, $public, $system, $quirks,
482 $sourceStart, $sourceLength );
485 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
486 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
487 $this->serializer
->comment( $preposition, $refNode, $text,
488 $sourceStart, $sourceLength );
491 public function error( $text, $pos ) {
492 $this->serializer
->error( $text, $pos );
495 public function mergeAttributes( Element
$element, Attributes
$attrs, $sourceStart ) {
496 $this->serializer
->mergeAttributes( $element, $attrs, $sourceStart );
499 public function removeNode( Element
$element, $sourceStart ) {
500 $this->serializer
->removeNode( $element, $sourceStart );
503 public function reparentChildren( Element
$element, Element
$newParent, $sourceStart ) {
504 $self = $element->userData
;
505 if ( $self->snData
->childPElement
) {
506 // Reparent under the p-wrapper instead, so that e.g.
507 // <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
509 // <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
511 // The formatting element should not be the parent of the p-wrap.
512 // Without this special case, the insertElement() of the <i> below
513 // would be diverted into the p-wrapper, causing infinite recursion
515 $this->reparentChildren( $self->snData
->childPElement
, $newParent, $sourceStart );
519 $children = $self->children
;
520 $self->children
= [];
521 $this->insertElement( TreeBuilder
::UNDER
, $element, $newParent, false, $sourceStart, 0 );
522 $newParentNode = $newParent->userData
;
523 $newParentId = $newParentNode->id
;
524 foreach ( $children as $child ) {
525 if ( is_object( $child ) ) {
526 $this->trace( "reparent <{$child->name}>" );
527 $child->parentId
= $newParentId;
530 $newParentNode->children
= $children;