$value = self::normalizeCss( $value );
// Reject problematic keywords and control characters
- if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
+ if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ||
+ strpos( $value, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
return '/* invalid control char */';
} elseif ( preg_match(
'! expression
}
/**
- * Returns true if a given Unicode codepoint is a valid character in XML.
+ * Returns true if a given Unicode codepoint is a valid character in
+ * both HTML5 and XML.
* @param int $codepoint
* @return bool
*/
private static function validateCodepoint( $codepoint ) {
+ # U+000C is valid in HTML5 but not allowed in XML.
+ # U+000D is valid in XML but not allowed in HTML5.
+ # U+007F - U+009F are disallowed in HTML5 (control characters).
return $codepoint == 0x09
|| $codepoint == 0x0a
- || $codepoint == 0x0d
- || ( $codepoint >= 0x20 && $codepoint <= 0xd7ff )
+ || ( $codepoint >= 0x20 && $codepoint <= 0x7e )
+ || ( $codepoint >= 0xa0 && $codepoint <= 0xd7ff )
|| ( $codepoint >= 0xe000 && $codepoint <= 0xfffd )
|| ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff );
}
!! wikitext
<div style="background-image: u\ rl(test.jpg); "></div>
!! html
-<div style="/* insecure input */"></div>
+<div style="/* invalid control char */"></div>
!! end
<p><span typeof="mw:Entity">î</span><span typeof="mw:Entity">î</span></p>
!! end
+# See: http://www.w3.org/TR/html5/syntax.html#character-references
+# Note that U+000C (form feed) is not a valid XML character, so
+# it is banned even though allowed in HTML5.
+!! test
+Illegal character references (T106578)
+!! wikitext
+; Null: �
+; FF: 
+; CR: 
+; Control (low): 
+; Control (high):  Ÿ
+; Surrogate: ��
+; This is an okay astral character: 💩
+!! html+tidy
+<dl>
+<dt>Null</dt>
+<dd>&#00;</dd>
+<dt>FF</dt>
+<dd>&#xC;</dd>
+<dt>CR</dt>
+<dd>&#xD;</dd>
+<dt>Control (low)</dt>
+<dd>&#8;</dd>
+<dt>Control (high)</dt>
+<dd>&#x7F; &#x9F;</dd>
+<dt>Surrogate</dt>
+<dd>&#xD83D;&#xDCA9;</dd>
+<dt>This is an okay astral character</dt>
+<dd>💩</dd>
+</dl>
+!! end
+
!! test
__FORCETOC__ override
!! wikitext