From 87b8cf9ac7294ec40f4aaa6fc415d9a26a8bb324 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 30 Aug 2003 09:39:34 +0000 Subject: [PATCH] Break UTF-8 support class from the case conversion tables; grab tables from memcached if available --- includes/Setup.php | 7 +++-- includes/Utf8Case.php | 52 ++------------------------------- languages/Language.php | 2 +- languages/LanguageAr.php | 2 +- languages/LanguageEo.php | 4 +-- languages/LanguageHe.php | 2 +- languages/LanguageHi.php | 2 +- languages/LanguageHu.php | 2 +- languages/LanguageJa.php | 2 +- languages/LanguageKo.php | 2 +- languages/LanguageMl.php | 2 +- languages/LanguageUtf8.php | 60 ++++++++++++++++++++++++++++++++++++++ 12 files changed, 76 insertions(+), 63 deletions(-) create mode 100644 languages/LanguageUtf8.php diff --git a/includes/Setup.php b/includes/Setup.php index 3974d364cd..039d72ce24 100644 --- a/includes/Setup.php +++ b/includes/Setup.php @@ -7,7 +7,6 @@ global $IP; include_once( "$IP/GlobalFunctions.php" ); -include_once( "$IP/Language.php" ); include_once( "$IP/Namespace.php" ); include_once( "$IP/Skin.php" ); include_once( "$IP/OutputPage.php" ); @@ -17,7 +16,7 @@ include_once( "$IP/User.php" ); include_once( "$IP/LinkCache.php" ); include_once( "$IP/Title.php" ); include_once( "$IP/Article.php" ); -require( "$IP/MemCachedClient.inc.php" ); +include_once( "$IP/MemCachedClient.inc.php" ); wfDebug( "\n\n" ); @@ -37,10 +36,12 @@ if( $wgUseMemCached ) { $wgMemc->set_debug( $wgMemCachedDebug ); } +include_once( "$IP/Language.php" ); + $wgOut = new OutputPage(); $wgLangClass = "Language" . ucfirst( $wgLanguageCode ); if( ! class_exists( $wgLangClass ) ) { - include_once( "$IP/Utf8Case.php" ); + include_once( "$IP/LanguageUtf8.php" ); $wgLangClass = "LanguageUtf8"; } $wgLang = new $wgLangClass(); diff --git a/includes/Utf8Case.php b/includes/Utf8Case.php index bcf4eeb732..996070af59 100644 --- a/includes/Utf8Case.php +++ b/includes/Utf8Case.php @@ -1,12 +1,11 @@ "A", @@ -1494,49 +1493,4 @@ $wikiLowerChars = array ( "\xf0\x90\x90\xa5" => "\xf0\x90\x91\x8d" ); -# Base stuff useful to all UTF-8 based language files -class LanguageUtf8 extends Language { - - function ucfirst( $string ) { - # For most languages, this is a wrapper for ucfirst() - # But that doesn't work right in a UTF-8 locale - global $wikiUpperChars, $wikiLowerChars; - return preg_replace ( - "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - "strtr ( \"\$1\" , \$wikiUpperChars )", - $string ); - } - - function stripForSearch( $string ) { - # MySQL fulltext index doesn't grok utf-8, so we - # need to fold cases and convert to hex - global $wikiLowerChars; - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $string ); - } - - function fallback8bitEncoding() { - # Windows codepage 1252 is a superset of iso 8859-1 - # override this to use difference source encoding to - # translate incoming 8-bit URLs. - return "windows-1252"; - } - - function checkTitleEncoding( $s ) { - global $wgInputEncoding; - - # Check for non-UTF-8 URLs - $ishigh = preg_match( '/[\x80-\xff]/', $s); - if(!$ishigh) return $s; - - $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . - '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); - if( $isutf8 ) return $s; - - return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s ); - } -} - ?> \ No newline at end of file diff --git a/languages/Language.php b/languages/Language.php index eb5699d6fa..f93afd2559 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -276,7 +276,7 @@ this (alternative: like this?).", "Recentchangeslinked" => "", "Movepage" => "", "Booksources" => "External book sources", -"Categories" => "Page categories", +"Categories" => "Page categories" ); /* private */ $wgSysopSpecialPagesEn = array( diff --git a/languages/LanguageAr.php b/languages/LanguageAr.php index 59ffc48c83..7cca4f6b63 100644 --- a/languages/LanguageAr.php +++ b/languages/LanguageAr.php @@ -1,6 +1,6 @@ "-" diff --git a/languages/LanguageHi.php b/languages/LanguageHi.php index 2d94bee6e1..7bb997c969 100644 --- a/languages/LanguageHi.php +++ b/languages/LanguageHi.php @@ -1,6 +1,6 @@ "-" diff --git a/languages/LanguageHu.php b/languages/LanguageHu.php index 2ade0664a1..009a0d49ad 100644 --- a/languages/LanguageHu.php +++ b/languages/LanguageHu.php @@ -1,6 +1,6 @@ "-" diff --git a/languages/LanguageJa.php b/languages/LanguageJa.php index ce09f9da51..7e45e5ae35 100644 --- a/languages/LanguageJa.php +++ b/languages/LanguageJa.php @@ -1,6 +1,6 @@ get( $key1 = "$wgDBname:utf8:upper" ); +$wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" ); + +if(($wikiUpperChars === false) || ($wikiLowerChars === false)) { + include_once("$IP/Utf8Case.php"); + $wgMemc->set( $key1, $wikiUpperChars ); + $wgMemc->set( $key2, $wikiLowerChars ); +} + +# Base stuff useful to all UTF-8 based language files +class LanguageUtf8 extends Language { + + function ucfirst( $string ) { + # For most languages, this is a wrapper for ucfirst() + # But that doesn't work right in a UTF-8 locale + global $wikiUpperChars, $wikiLowerChars; + return preg_replace ( + "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", + "strtr ( \"\$1\" , \$wikiUpperChars )", + $string ); + } + + function stripForSearch( $string ) { + # MySQL fulltext index doesn't grok utf-8, so we + # need to fold cases and convert to hex + global $wikiLowerChars; + return preg_replace( + "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", + "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", + $string ); + } + + function fallback8bitEncoding() { + # Windows codepage 1252 is a superset of iso 8859-1 + # override this to use difference source encoding to + # translate incoming 8-bit URLs. + return "windows-1252"; + } + + function checkTitleEncoding( $s ) { + global $wgInputEncoding; + + # Check for non-UTF-8 URLs + $ishigh = preg_match( '/[\x80-\xff]/', $s); + if(!$ishigh) return $s; + + $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . + '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); + if( $isutf8 ) return $s; + + return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s ); + } +} + +?> -- 2.20.1