X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FRemexStripTagHandler.php;h=41c6bf41dfb33b7e37341b032e57a0ad22f697cf;hb=69d27cc0b6d779af8d870fd0e8ccb5cd41f887ab;hp=2839147d4f5ffada99ef6eb33798275caf6843e9;hpb=15f6eff90c305d405fe4331c8a8dc8caa842e5b3;p=lhc%2Fweb%2Fwiklou.git
diff --git a/includes/parser/RemexStripTagHandler.php b/includes/parser/RemexStripTagHandler.php
index 2839147d4f..41c6bf41df 100644
--- a/includes/parser/RemexStripTagHandler.php
+++ b/includes/parser/RemexStripTagHandler.php
@@ -26,10 +26,18 @@ class RemexStripTagHandler implements TokenHandler {
$this->text .= substr( $text, $start, $length );
}
function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
- // Do nothing.
+ // Inject whitespace for typical block-level tags to
+ // prevent merging unrelated
words.
+ if ( $this->isBlockLevelTag( $name ) ) {
+ $this->text .= ' ';
+ }
}
function endTag( $name, $sourceStart, $sourceLength ) {
- // Do nothing.
+ // Inject whitespace for typical block-level tags to
+ // prevent merging unrelated
words.
+ if ( $this->isBlockLevelTag( $name ) ) {
+ $this->text .= ' ';
+ }
}
function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
// Do nothing.
@@ -37,4 +45,63 @@ class RemexStripTagHandler implements TokenHandler {
function comment( $text, $sourceStart, $sourceLength ) {
// Do nothing.
}
+
+ // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
+ // retrieved on sept 12, 2018.
is not block level but was added anyways.
+ // The following is a complete list of all HTML block level elements
+ // (although "block-level" is not technically defined for elements that are
+ // new in HTML5).
+ // Structured as tag => true to allow O(1) membership test.
+ static private $BLOCK_LEVEL_TAGS = [
+ 'address' => true,
+ 'article' => true,
+ 'aside' => true,
+ 'blockquote' => true,
+ 'br' => true,
+ 'canvas' => true,
+ 'dd' => true,
+ 'div' => true,
+ 'dl' => true,
+ 'dt' => true,
+ 'fieldset' => true,
+ 'figcaption' => true,
+ 'figure' => true,
+ 'figcaption' => true,
+ 'footer' => true,
+ 'form' => true,
+ 'h1' => true,
+ 'h2' => true,
+ 'h3' => true,
+ 'h4' => true,
+ 'h5' => true,
+ 'h6' => true,
+ 'header' => true,
+ 'hgroup' => true,
+ 'hr' => true,
+ 'li' => true,
+ 'main' => true,
+ 'nav' => true,
+ 'noscript' => true,
+ 'ol' => true,
+ 'output' => true,
+ 'p' => true,
+ 'pre' => true,
+ 'section' => true,
+ 'table' => true,
+ 'tfoot' => true,
+ 'ul' => true,
+ 'video' => true,
+ ];
+
+ /**
+ * Detect block level tags. Of course css can make anything a block
+ * level tag, but this is still better than nothing.
+ *
+ * @param string $tagName HTML tag name
+ * @return bool True when tag is an html block level element
+ */
+ private function isBlockLevelTag( $tagName ) {
+ $key = strtolower( trim( $tagName ) );
+ return isset( self::$BLOCK_LEVEL_TAGS[$key] );
+ }
}