From 239e96a01ebf655f21cd9b7a1d3af032b2971d73 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Tue, 3 May 2005 07:53:37 +0000 Subject: [PATCH] * Changed do_html_entity_decode()'s default character set from ISO-8859-1 to UTF-8 and documented the function. * Clened up wfUtf8Sequence() (messy indenting) and documented it * Documented wfMsg() --- includes/GlobalFunctions.php | 41 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 769d0578a5..82911018b5 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -84,10 +84,11 @@ if ( !function_exists( 'mb_substr' ) ) { * with no UTF-8 support. * * @param string $string String having html entities - * @param $quote_style - * @param string $charset Encoding set to use (default 'ISO-8859-1') + * @param $quote_style the quote style to pass as the second argument to + * get_html_translation_table() + * @param string $charset Encoding set to use (default 'UTF-8') */ -function do_html_entity_decode( $string, $quote_style=ENT_COMPAT, $charset='ISO-8859-1' ) { +function do_html_entity_decode( $string, $quote_style=ENT_COMPAT, $charset='UTF-8' ) { $fname = 'do_html_entity_decode'; wfProfileIn( $fname ); @@ -183,23 +184,26 @@ function wfUrlencode ( $s ) { /** * Return the UTF-8 sequence for a given Unicode code point. - * Currently doesn't work for values outside the Basic Multilingual Plane. + * Doesn't work for values outside the Basic Multilingual Plane. * * @param string $codepoint UTF-8 code point. - * @return string HTML UTF-8 Entitie such as 'Ӓ'. + * @return string An UTF-8 character if the codepoint is in the BMP and + * &#$codepoint if it isn't; */ function wfUtf8Sequence( $codepoint ) { - if($codepoint < 0x80) return chr($codepoint); - if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . - chr($codepoint & 0x3f | 0x80); - if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . - chr($codepoint >> 6 & 0x3f | 0x80) . - chr($codepoint & 0x3f | 0x80); - if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) . - chr($codepoint >> 12 & 0x3f | 0x80) . - chr($codepoint >> 6 & 0x3f | 0x80) . - chr($codepoint & 0x3f | 0x80); - + if($codepoint < 0x80) + return chr($codepoint); + if($codepoint < 0x800) + return chr($codepoint >> 6 & 0x3f | 0xc0) . chr($codepoint & 0x3f | 0x80); + if($codepoint < 0x10000) + return chr($codepoint >> 12 & 0x0f | 0xe0) . + chr($codepoint >> 6 & 0x3f | 0x80) . + chr($codepoint & 0x3f | 0x80); + if($codepoint < 0x110000) + return chr($codepoint >> 18 & 0x07 | 0xf0) . + chr($codepoint >> 12 & 0x3f | 0x80) . + chr($codepoint >> 6 & 0x3f | 0x80) . + chr($codepoint & 0x3f | 0x80); # There should be no assigned code points outside this range, but... return "&#$codepoint;"; } @@ -207,6 +211,8 @@ function wfUtf8Sequence( $codepoint ) { /** * Converts numeric character entities to UTF-8 * + * @todo Do named entities + * * @param string $string String to convert. * @return string Converted string. */ @@ -215,7 +221,6 @@ function wfMungeToUtf8( $string ) { #$string = iconv($wgInputEncoding, "UTF-8", $string); $string = preg_replace ( '/�*([0-9]+);/e', 'wfUtf8Sequence($1)', $string ); $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string ); - # Should also do named entities here return $string; } @@ -331,6 +336,8 @@ function wfReadOnly() { /** * Get a message from anywhere, for the UI elements + * + * @param string */ function wfMsg( $key ) { $args = func_get_args(); -- 2.20.1