From: Tim Starling Date: Sun, 29 Apr 2007 08:28:34 +0000 (+0000) Subject: Added character entity aliases &רלמ; and &رلم; as per http://lists.wikimedia.org... X-Git-Tag: 1.31.0-rc.0~53183 X-Git-Url: http://git.cyclocoop.org//%22%22.str_replace%28%27%22%27%2C?a=commitdiff_plain;h=e42ff9ee7908a968d4a53c1587938a2de2e8e544;p=lhc%2Fweb%2Fwiklou.git Added character entity aliases &רלמ; and &رلم; as per lists.wikimedia.org/pipermail/wikitech-l/2007-April/031064.html --- diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 572dafddec..fa5416dc26 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -28,7 +28,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -315,6 +315,16 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + /** * XHTML sanitizer for MediaWiki * @addtogroup Parser @@ -902,16 +912,19 @@ class Sanitizer { /** * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, - * return the named entity reference as is. Otherwise, returns - * HTML-escaped text of pseudo-entity source (eg &foo;) + * return the named entity reference as is. If the entity is a + * MediaWiki-specific alias, returns the HTML equivalent. Otherwise, + * returns HTML-escaped text of pseudo-entity source (eg &foo;) * * @param string $name * @return string * @static */ static function normalizeEntity( $name ) { - global $wgHtmlEntities; - if( isset( $wgHtmlEntities[$name] ) ) { + global $wgHtmlEntities, $wgHtmlEntityAliases; + if ( isset( $wgHtmlEntityAliases[$name] ) ) { + return "&{$wgHtmlEntityAliases[$name]};"; + } elseif( isset( $wgHtmlEntities[$name] ) ) { return "&$name;"; } else { return "&$name;"; @@ -1008,7 +1021,10 @@ class Sanitizer { * @return string */ static function decodeEntity( $name ) { - global $wgHtmlEntities; + global $wgHtmlEntities, $wgHtmlEntityAliases; + if ( isset( $wgHtmlEntityAliases[$name] ) ) { + $name = $wgHtmlEntityAliases[$name]; + } if( isset( $wgHtmlEntities[$name] ) ) { return codepointToUtf8( $wgHtmlEntities[$name] ); } else {