X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FRemexStripTagHandler.php;h=41c6bf41dfb33b7e37341b032e57a0ad22f697cf;hb=69d27cc0b6d779af8d870fd0e8ccb5cd41f887ab;hp=2839147d4f5ffada99ef6eb33798275caf6843e9;hpb=15f6eff90c305d405fe4331c8a8dc8caa842e5b3;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/RemexStripTagHandler.php b/includes/parser/RemexStripTagHandler.php index 2839147d4f..41c6bf41df 100644 --- a/includes/parser/RemexStripTagHandler.php +++ b/includes/parser/RemexStripTagHandler.php @@ -26,10 +26,18 @@ class RemexStripTagHandler implements TokenHandler { $this->text .= substr( $text, $start, $length ); } function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { - // Do nothing. + // Inject whitespace for typical block-level tags to + // prevent merging unrelated
words. + if ( $this->isBlockLevelTag( $name ) ) { + $this->text .= ' '; + } } function endTag( $name, $sourceStart, $sourceLength ) { - // Do nothing. + // Inject whitespace for typical block-level tags to + // prevent merging unrelated
words. + if ( $this->isBlockLevelTag( $name ) ) { + $this->text .= ' '; + } } function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) { // Do nothing. @@ -37,4 +45,63 @@ class RemexStripTagHandler implements TokenHandler { function comment( $text, $sourceStart, $sourceLength ) { // Do nothing. } + + // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements + // retrieved on sept 12, 2018.
is not block level but was added anyways. + // The following is a complete list of all HTML block level elements + // (although "block-level" is not technically defined for elements that are + // new in HTML5). + // Structured as tag => true to allow O(1) membership test. + static private $BLOCK_LEVEL_TAGS = [ + 'address' => true, + 'article' => true, + 'aside' => true, + 'blockquote' => true, + 'br' => true, + 'canvas' => true, + 'dd' => true, + 'div' => true, + 'dl' => true, + 'dt' => true, + 'fieldset' => true, + 'figcaption' => true, + 'figure' => true, + 'figcaption' => true, + 'footer' => true, + 'form' => true, + 'h1' => true, + 'h2' => true, + 'h3' => true, + 'h4' => true, + 'h5' => true, + 'h6' => true, + 'header' => true, + 'hgroup' => true, + 'hr' => true, + 'li' => true, + 'main' => true, + 'nav' => true, + 'noscript' => true, + 'ol' => true, + 'output' => true, + 'p' => true, + 'pre' => true, + 'section' => true, + 'table' => true, + 'tfoot' => true, + 'ul' => true, + 'video' => true, + ]; + + /** + * Detect block level tags. Of course css can make anything a block + * level tag, but this is still better than nothing. + * + * @param string $tagName HTML tag name + * @return bool True when tag is an html block level element + */ + private function isBlockLevelTag( $tagName ) { + $key = strtolower( trim( $tagName ) ); + return isset( self::$BLOCK_LEVEL_TAGS[$key] ); + } }