From f8b7cc890d9fa6fbb6c9673391f37e81abde274e Mon Sep 17 00:00:00 2001
From: "C. Scott Ananian" tags (bug 40670), and *is* actually a valid
synonym for .)
Fix the sanitizer.
Bug: 17663
Change-Id: Iceec404f46703065bf080dd2cbfed1f88c204fa5
---
includes/Sanitizer.php | 2 +-
tests/parser/parserTests.txt | 43 ++++++++++++++++++++++++++++++++----
2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index f3a5281845..1432a8b02d 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -448,7 +448,7 @@ class Sanitizer {
# $params: String between element name and >
# $brace: Ending '>' or '/>'
# $rest: Everything until the next element of $bits
- if ( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) {
+ if ( preg_match( '!^(/?)([^\\s/>]+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) {
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
} else {
$slash = $t = $params = $brace = $rest = null;
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index f4a85bc325..cdd7eed68b 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -874,6 +874,43 @@ Non-html5 tags should be accepted
strike
+
s
+
<bâ> doesn't work! </b> +
<bä> doesn't work! </b> +
<boo> works fine </b> +
<s.foo>foo</s> +
<s.foo>s.foo</s.foo> +
<sub-ID#1> +
+!! end + ### ### Special characters ### @@ -16129,12 +16166,10 @@ a>b !! end -# This fails in the PHP parser (see bug 40670, -# https://bugzilla.wikimedia.org/show_bug.cgi?id=40670), so disabled for it. +# This was a bug in the PHP parser (see bug 17663 and its dups, +# https://bugzilla.wikimedia.org/show_bug.cgi?id=17663) !! test Tag names followed by punctuation should not be recognized as tags -!! options -parsoid !! input