From e42ff9ee7908a968d4a53c1587938a2de2e8e544 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sun, 29 Apr 2007 08:28:34 +0000 Subject: [PATCH] =?utf8?q?Added=20character=20entity=20aliases=20&=D7=A8?= =?utf8?q?=D7=9C=D7=9E;=20and=20&=D8=B1=D9=84=D9=85;=20as=20per=20http://l?= =?utf8?q?ists.wikimedia.org/pipermail/wikitech-l/2007-April/031064.html?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- includes/Sanitizer.php | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 572dafddec..fa5416dc26 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -28,7 +28,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -315,6 +315,16 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + /** * XHTML sanitizer for MediaWiki * @addtogroup Parser @@ -902,16 +912,19 @@ class Sanitizer { /** * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, - * return the named entity reference as is. Otherwise, returns - * HTML-escaped text of pseudo-entity source (eg &foo;) + * return the named entity reference as is. If the entity is a + * MediaWiki-specific alias, returns the HTML equivalent. Otherwise, + * returns HTML-escaped text of pseudo-entity source (eg &foo;) * * @param string $name * @return string * @static */ static function normalizeEntity( $name ) { - global $wgHtmlEntities; - if( isset( $wgHtmlEntities[$name] ) ) { + global $wgHtmlEntities, $wgHtmlEntityAliases; + if ( isset( $wgHtmlEntityAliases[$name] ) ) { + return "&{$wgHtmlEntityAliases[$name]};"; + } elseif( isset( $wgHtmlEntities[$name] ) ) { return "&$name;"; } else { return "&$name;"; @@ -1008,7 +1021,10 @@ class Sanitizer { * @return string */ static function decodeEntity( $name ) { - global $wgHtmlEntities; + global $wgHtmlEntities, $wgHtmlEntityAliases; + if ( isset( $wgHtmlEntityAliases[$name] ) ) { + $name = $wgHtmlEntityAliases[$name]; + } if( isset( $wgHtmlEntities[$name] ) ) { return codepointToUtf8( $wgHtmlEntities[$name] ); } else { -- 2.20.1