From 89ad2617e3c0bb0587c69cf15ab7def2807492da Mon Sep 17 00:00:00 2001 From: umherirrender Date: Tue, 1 Jul 2014 22:58:41 +0200 Subject: [PATCH] Sanitizer::escapeId: Decode entity before replacing spaces Having inside header should not lead to ids with a plus. This was correct when using the experimental ids, because there the decode was done first and then spaces were replaced by underscores. The non-experimental way replaced spaces with underscores and then decoded the , which results in a space that is URL-encoded to +. Added also a parser test for headers with space, plus and underscore as entity. Change-Id: I455e38c7a9777a42a5cef2dc80bebb3c19ac4700 --- includes/Sanitizer.php | 5 +-- tests/parser/parserTests.txt | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 2cdbe15f2c..ce70047ee5 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -1097,8 +1097,9 @@ class Sanitizer { global $wgExperimentalHtmlIds; $options = (array)$options; + $id = Sanitizer::decodeCharReferences( $id ); + if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) { - $id = Sanitizer::decodeCharReferences( $id ); $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); $id = trim( $id, '_' ); if ( $id === '' ) { @@ -1115,7 +1116,7 @@ class Sanitizer { '%' => '.' ); - $id = urlencode( Sanitizer::decodeCharReferences( strtr( $id, ' ', '_' ) ) ); + $id = urlencode( strtr( $id, ' ', '_' ) ); $id = str_replace( array_keys( $replace ), array_values( $replace ), $id ); if ( !preg_match( '/^[a-zA-Z]/', $id ) diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index e164b12f1a..c6566d1bcc 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -12938,6 +12938,75 @@ section 5

!! end +!! test +Header with space, plus and underscore as entity +!! wikitext +Id should not contain + for spaces + +== Space between Text == +section 1 + +== Space-Entity between Text == +section 2 + +== Plus+between+Text == +section 3 + +== Plus-Entity+between+Text == +section 4 + +== Underscore_between_Text == +section 5 + +== Underscore-Entity_between_Text == +section 6 + +[[#Space between Text]] +[[#Space-Entity between Text]] +[[#Plus+between+Text]] +[[#Plus-Entity+between+Text]] +[[#Underscore_between_Text]] +[[#Underscore-Entity_between_Text]] +!! html +

Id should not contain + for spaces +

+

Contents

+ +
+ +

Space between Text[edit]

+

section 1 +

+

Space-Entity between Text[edit]

+

section 2 +

+

Plus+between+Text[edit]

+

section 3 +

+

Plus-Entity+between+Text[edit]

+

section 4 +

+

Underscore_between_Text[edit]

+

section 5 +

+

Underscore-Entity_between_Text[edit]

+

section 6 +

#Space between Text +#Space-Entity between Text +#Plus+between+Text +#Plus-Entity+between+Text +#Underscore_between_Text +#Underscore-Entity_between_Text +

+!! end + !! test Headers with excess '=' characters (Are similar tests necessary beyond the 1st level?) -- 2.20.1