dépôts
/
lhc
/
web
/
wiklou.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Move ResultWrapper subclasses to Rdbms
[lhc/web/wiklou.git]
/
includes
/
Sanitizer.php
diff --git
a/includes/Sanitizer.php
b/includes/Sanitizer.php
index
44e4e3e
..
5f6abee
100644
(file)
--- a/
includes/Sanitizer.php
+++ b/
includes/Sanitizer.php
@@
-344,12
+344,12
@@
class Sanitizer {
$space = '[\x09\x0a\x0c\x0d\x20]';
self::$attribsRegex =
"/(?:^|$space)({$attribFirst}{$attrib}*)
$space = '[\x09\x0a\x0c\x0d\x20]';
self::$attribsRegex =
"/(?:^|$space)({$attribFirst}{$attrib}*)
- ($space*=$space*
+
($space*=$space*
(?:
(?:
- # The attribute value: quoted or alone
- \"([^\"]*)(?:\"|\$)
- | '([^']*)(?:'|\$)
-
|
(((?!$space|>).)*)
+
# The attribute value: quoted or alone
+
\"([^\"]*)(?:\"|\$)
+
| '([^']*)(?:'|\$)
+
|
(((?!$space|>).)*)
)
)?(?=$space|\$)/sx";
}
)
)?(?=$space|\$)/sx";
}
@@
-545,7
+545,7
@@
class Sanitizer {
$badtag = true;
} elseif ( in_array( $t, $tagstack ) && !isset( $htmlnest[$t] ) ) {
$badtag = true;
$badtag = true;
} elseif ( in_array( $t, $tagstack ) && !isset( $htmlnest[$t] ) ) {
$badtag = true;
- # Is it a self closed htmlpair ? (
bug 5
487)
+ # Is it a self closed htmlpair ? (
T7
487)
} elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
// Eventually we'll just remove the self-closing
// slash, in order to be consistent with HTML5
} elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
// Eventually we'll just remove the self-closing
// slash, in order to be consistent with HTML5
@@
-922,7
+922,7
@@
class Sanitizer {
// Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
$value = preg_replace_callback(
// Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
$value = preg_replace_callback(
- '/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (
bug 58
088)
+ '/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (
T60
088)
function ( $matches ) {
$cp = UtfNormal\Utils::utf8ToCodepoint( $matches[0] );
if ( $cp === false ) {
function ( $matches ) {
$cp = UtfNormal\Utils::utf8ToCodepoint( $matches[0] );
if ( $cp === false ) {
@@
-1119,6
+1119,7
@@
class Sanitizer {
'>' => '>', // we've received invalid input
'"' => '"', // which should have been escaped.
'{' => '{',
'>' => '>', // we've received invalid input
'"' => '"', // which should have been escaped.
'{' => '{',
+ '}' => '}', // prevent unpaired language conversion syntax
'[' => '[',
"''" => '''',
'ISBN' => 'ISBN',
'[' => '[',
"''" => '''',
'ISBN' => 'ISBN',
@@
-1262,8
+1263,9
@@
class Sanitizer {
static function escapeHtmlAllowEntities( $html ) {
$html = Sanitizer::decodeCharReferences( $html );
# It seems wise to escape ' as well as ", as a matter of course. Can't
static function escapeHtmlAllowEntities( $html ) {
$html = Sanitizer::decodeCharReferences( $html );
# It seems wise to escape ' as well as ", as a matter of course. Can't
- # hurt.
- $html = htmlspecialchars( $html, ENT_QUOTES );
+ # hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters
+ # don't cause the entire string to disappear.
+ $html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE );
return $html;
}
return $html;
}
@@
-1506,7
+1508,7
@@
class Sanitizer {
/**
* Decode any character references, numeric or named entities,
/**
* Decode any character references, numeric or named entities,
- * in the next and normalize the resulting string. (
bug 14
952)
+ * in the next and normalize the resulting string. (
T16
952)
*
* This is useful for page titles, not for text to be displayed,
* MediaWiki allows HTML entities to escape normalization as a feature.
*
* This is useful for page titles, not for text to be displayed,
* MediaWiki allows HTML entities to escape normalization as a feature.
@@
-1924,7
+1926,7
@@
class Sanitizer {
* 3.5.
*
* This function is an implementation of the specification as requested in
* 3.5.
*
* This function is an implementation of the specification as requested in
- *
bug 22
449.
+ *
T24
449.
*
* Client-side forms will use the same standard validation rules via JS or
* HTML 5 validation; additional restrictions can be enforced server-side
*
* Client-side forms will use the same standard validation rules via JS or
* HTML 5 validation; additional restrictions can be enforced server-side
@@
-1947,7
+1949,7
@@
class Sanitizer {
// Please note strings below are enclosed in brackets [], this make the
// hyphen "-" a range indicator. Hence it is double backslashed below.
// Please note strings below are enclosed in brackets [], this make the
// hyphen "-" a range indicator. Hence it is double backslashed below.
- // See
bug 26
948
+ // See
T28
948
$rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~";
$rfc1034_ldh_str = "a-z0-9\\-";
$rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~";
$rfc1034_ldh_str = "a-z0-9\\-";