Fix hardcoded path to the languages directory
[lhc/web/wiklou.git] / languages / LanguageUtf8.php
index 3c68f8e..05e7986 100644 (file)
@@ -1,24 +1,40 @@
 <?php
-#$Id$
+/**
+  * @package MediaWiki
+  * @subpackage Language
+  */
+
 if( defined( "MEDIAWIKI" ) ) {
 
+# This file and LanguageLatin1.php may be included from within functions, so
+# we need to have global statements
+
+global $wgInputEncoding, $wgOutputEncoding, $wikiUpperChars, $wikiLowerChars;
+global $wgDBname, $wgMemc;
+
 $wgInputEncoding    = "UTF-8";
 $wgOutputEncoding      = "UTF-8";
 
-if (function_exists('mb_internal_encoding')) {
+if( function_exists( 'mb_strtoupper' ) ) {
        mb_internal_encoding('UTF-8');
+} else {
+       # Hack our own case conversion routines
+       
+       # Loading serialized arrays is faster than parsing code :P
+       $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
+       $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
+       
+       if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
+               require_once( "includes/Utf8Case.php" );
+               $wgMemc->set( $key1, $wikiUpperChars );
+               $wgMemc->set( $key2, $wikiLowerChars );
+       }
 }
 
-$wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
-$wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
-
-if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
-       require_once( "includes/Utf8Case.php" );
-       $wgMemc->set( $key1, $wikiUpperChars );
-       $wgMemc->set( $key2, $wikiLowerChars );
-}
-
-# Base stuff useful to all UTF-8 based language files
+/**
+ * Base stuff useful to all UTF-8 based language files
+ * @package MediaWiki
+ */
 class LanguageUtf8 extends Language {
 
        # These two functions use mbstring library, if it is loaded
@@ -27,15 +43,24 @@ class LanguageUtf8 extends Language {
        # it should be dealt with in Language classes.
 
        function ucfirst( $string ) {
-               if (function_exists('mb_strtoupper')) {
-                       return mb_strtoupper(mb_substr($string,0,1)).mb_substr($string,1);
-               } else {
-                   global $wikiUpperChars;
-                   return preg_replace (
-                   "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                   "strtr ( \"\$1\" , \$wikiUpperChars )",
-                   $string );
+               /**
+                * On pages with many links we can get called a lot.
+                * The multibyte uppercase functions are relatively
+                * slow, so check first if we can use a faster ASCII
+                * version instead; it saves a few milliseconds.
+                */
+               if( preg_match( '/^[\x80-\xff]/', $string ) ) {
+                       if (function_exists('mb_strtoupper')) {
+                               return mb_strtoupper(mb_substr($string,0,1)).mb_substr($string,1);
+                       } else {
+                               global $wikiUpperChars;
+                               return preg_replace (
+                                       "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                                       "strtr ( \"\$1\" , \$wikiUpperChars )",
+                                       $string );
+                       }
                }
+               return ucfirst( $string );
        }
        
        function lcfirst( $string ) {
@@ -53,11 +78,27 @@ class LanguageUtf8 extends Language {
        function stripForSearch( $string ) {
                # MySQL fulltext index doesn't grok utf-8, so we
                # need to fold cases and convert to hex
-               global $wikiLowerChars;
-               return preg_replace(
-                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
-                 $string );
+
+               # In Language:: it just returns lowercase, maybe
+               # all strtolower on stripped output or argument
+               # should be removed and all stripForSearch
+               # methods adjusted to that.
+               
+               wfProfileIn( "LanguageUtf8::stripForSearch" );
+               if( function_exists( 'mb_strtolower' ) ) {
+                       $out = preg_replace(
+                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                               "'U8' . bin2hex( \"$1\" )",
+                               mb_strtolower( $string ) );
+               } else {
+                       global $wikiLowerChars;
+                       $out = preg_replace(
+                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                               "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
+                               $string );
+               }
+               wfProfileOut( "LanguageUtf8::stripForSearch" );
+               return $out;
        }
 
        function fallback8bitEncoding() {
@@ -70,6 +111,9 @@ class LanguageUtf8 extends Language {
        function checkTitleEncoding( $s ) {
                global $wgInputEncoding;
 
+               if( is_array( $s ) ) {
+                       wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
+               }
                # Check for non-UTF-8 URLs
                $ishigh = preg_match( '/[\x80-\xff]/', $s);
                if(!$ishigh) return $s;