From: Aryeh Gregor Date: Wed, 25 Aug 2010 17:56:03 +0000 (+0000) Subject: Blacklist % in HTML id's X-Git-Tag: 1.31.0-rc.0~35324 X-Git-Url: http://git.cyclocoop.org/?a=commitdiff_plain;h=839491a18c687f4619e8b243c359e0b8c5e1fdc7;p=lhc%2Fweb%2Fwiklou.git Blacklist % in HTML id's As pointed out by entlinkt at bug 24918, this could cause problems -- browsers don't interpret percent escapes in fragments consistently, so any percent sign that might be part of an escape could be a problem. For simplicity, just strip all of them. --- diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 122a11e733..7fdd1df5f0 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -981,7 +981,9 @@ class Sanitizer { * * To ensure we don't have to bother escaping anything, we also strip ', ", * & even if $wgExperimentalIds is true. TODO: Is this the best tactic? - * We also strip # because it upsets IE6. + * We also strip # because it upsets IE, and % because it could be + * ambiguous if it's part of something that looks like a percent escape + * (which don't work reliably in fragments cross-browser). * * @see http://www.w3.org/TR/html401/types.html#type-name Valid characters * in the id and @@ -1007,7 +1009,7 @@ class Sanitizer { if ( $wgHtml5 && $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) { $id = Sanitizer::decodeCharReferences( $id ); - $id = preg_replace( '/[ \t\n\r\f_\'"&#]+/', '_', $id ); + $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); $id = trim( $id, '_' ); if ( $id === '' ) { # Must have been all whitespace to start with.