From aef02d516d0fb20571120ada7278d1658d4d9222 Mon Sep 17 00:00:00 2001 From: Erik Bernhardson Date: Thu, 14 Mar 2019 13:06:27 -0700 Subject: [PATCH] Improve RemexStripTagHandler working with tables HTML, generated by some infoboxes and perhaps other places, gets stripped in a way that merges words together that should not be merged. Add tr, th, and td to the list of tags that should force word separation. Bug: T218001 Change-Id: Ib374339628b1f543ea4e07f24aa3e3b76f3117b5 --- includes/parser/RemexStripTagHandler.php | 3 +++ tests/phpunit/includes/parser/SanitizerTest.php | 1 + 2 files changed, 4 insertions(+) diff --git a/includes/parser/RemexStripTagHandler.php b/includes/parser/RemexStripTagHandler.php index bf4c09876d..2d75c869ec 100644 --- a/includes/parser/RemexStripTagHandler.php +++ b/includes/parser/RemexStripTagHandler.php @@ -87,7 +87,10 @@ class RemexStripTagHandler implements TokenHandler { 'pre' => true, 'section' => true, 'table' => true, + 'td' => true, 'tfoot' => true, + 'th' => true, + 'tr' => true, 'ul' => true, 'video' => true, ]; diff --git a/tests/phpunit/includes/parser/SanitizerTest.php b/tests/phpunit/includes/parser/SanitizerTest.php index ad8aa1e7eb..1f6f4e873b 100644 --- a/tests/phpunit/includes/parser/SanitizerTest.php +++ b/tests/phpunit/includes/parser/SanitizerTest.php @@ -527,6 +527,7 @@ class SanitizerTest extends MediaWikiTestCase { ], [ '123', '123' ], [ '123', '123' ], + [ '12', '1 2' ], ]; } -- 2.20.1