From bae4503ec845c91cad6d3f6e9fb1f2d6de307b11 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Sat, 20 Dec 2008 00:01:34 +0000 Subject: [PATCH] Delay $wgContLang unstubbing --- includes/WebRequest.php | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/includes/WebRequest.php b/includes/WebRequest.php index 467471250d..5d2bd265b0 100644 --- a/includes/WebRequest.php +++ b/includes/WebRequest.php @@ -220,13 +220,10 @@ class WebRequest { */ function getGPCVal( $arr, $name, $default ) { if( isset( $arr[$name] ) ) { - global $wgContLang; $data = $arr[$name]; if( isset( $_GET[$name] ) && !is_array( $data ) ) { # Check for alternate/legacy character encoding. - if( isset( $wgContLang ) ) { - $data = $wgContLang->checkTitleEncoding( $data ); - } + $data = $this->checkTitleEncoding( $data ); } $data = $this->normalizeUnicode( $data ); return $data; @@ -234,6 +231,20 @@ class WebRequest { return $default; } } + + protected function checkTitleEncoding( $s ) { + global $wgContLang; + if( !isset($wgContLang) ) return $s; + # Check for non-UTF-8 URLs + $ishigh = preg_match( '/[\x80-\xff]/', $s); + if( !$ishigh ) return $s; + + $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . + '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); + if( $isutf8 ) return $s; + # Do the heavy lifting by unstubbing $wgContLang + return $wgContLang->iconv( $wgContLang->fallback8bitEncoding(), "utf-8", $s ); + } /** * Fetch a scalar from the input or return $default if it's not set. -- 2.20.1