Got rid of about 20 regex calls in braceSubstitution() relating to function-like...

[lhc/web/wiklou.git] / languages / LanguageUtf8.php
diff --git a/languages/LanguageUtf8.php b/languages/LanguageUtf8.php

index acdd886..d738624 100644 (file)
--- a/languages/LanguageUtf8.php
+++ b/languages/LanguageUtf8.php
@@ -1,4 +1,8 @@
  <?php
+/**
+  * @package MediaWiki
+  * @subpackage Language
+  */
  
  if( defined( "MEDIAWIKI" ) ) {
  
@@ -15,11 +19,11 @@ if( function_exists( 'mb_strtoupper' ) ) {
         mb_internal_encoding('UTF-8');
  } else {
         # Hack our own case conversion routines
-       
+
         # Loading serialized arrays is faster than parsing code :P
         $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
         $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
-       
+
         if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
                 require_once( "includes/Utf8Case.php" );
                 $wgMemc->set( $key1, $wikiUpperChars );
@@ -27,45 +31,71 @@ if( function_exists( 'mb_strtoupper' ) ) {
         }
  }
  
-# Base stuff useful to all UTF-8 based language files
+/**
+ * Base stuff useful to all UTF-8 based language files
+ * @package MediaWiki
+ */
  class LanguageUtf8 extends Language {
  
-       # These two functions use mbstring library, if it is loaded
-       # or compiled and character mapping arrays otherwise. 
+       # These functions use mbstring library, if it is loaded
+       # or compiled and character mapping arrays otherwise.
         # In case of language-specific character mismatch
         # it should be dealt with in Language classes.
  
-       function ucfirst( $string ) {
-               /**
-                * On pages with many links we can get called a lot.
-                * The multibyte uppercase functions are relatively
-                * slow, so check first if we can use a faster ASCII
-                * version instead; it saves a few milliseconds.
-                */
-               if( preg_match( '/^[\x80-\xff]/', $string ) ) {
-                       if (function_exists('mb_strtoupper')) {
-                               return mb_strtoupper(mb_substr($string,0,1)).mb_substr($string,1);
-                       } else {
+       function ucfirst( $str ) {
+               return LanguageUtf8::uc( $str, true );
+       }
+
+       function uc( $str, $first = false ) {
+               if ( function_exists( 'mb_strtoupper' ) )
+                       if ( $first )
+                               if ( LanguageUtf8::isMultibyte( $str ) )
+                                       return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+                               else
+                                       return ucfirst( $str );
+                       else
+                               return LanguageUtf8::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
+               else
+                       if ( LanguageUtf8::isMultibyte( $str ) ) {
                                 global $wikiUpperChars;
-                               return preg_replace (
-                                       "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                                       "strtr ( \"\$1\" , \$wikiUpperChars )",
-                                       $string );
-                       }
-               }
-               return ucfirst( $string );
+                               $x = $first ? '^' : '';
+                               return preg_replace(
+                                       "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                                       "strtr( \"\$1\" , \$wikiUpperChars )",
+                                       $str
+                               );
+                       } else
+                               return $first ? ucfirst( $str ) : strtoupper( $str );
         }
-       
-       function lcfirst( $string ) {
-               if (function_exists('mb_strtolower')) {
-                       return mb_strtolower(mb_substr($string,0,1)).mb_substr($string,1);
-               } else {
-                   global $wikiLowerChars;
-                   return preg_replace (
-                   "/^([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                   "strtr ( \"\$1\" , \$wikiLowerChars )",
-                   $string );
-               }
+
+       function lcfirst( $str ) {
+               return LanguageUtf8::lc( $str, true );
+       }
+
+       function lc( $str, $first = false ) {
+               if ( function_exists( 'mb_strtolower' ) )
+                       if ( $first )
+                               if ( LanguageUtf8::isMultibyte( $str ) )
+                                       return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+                               else
+                                       return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
+                       else
+                               return LanguageUtf8::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
+               else
+                       if ( LanguageUtf8::isMultibyte( $str ) ) {
+                               global $wikiLowerChars;
+                               $x = $first ? '^' : '';
+                               return preg_replace(
+                                       "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                                       "strtr( \"\$1\" , \$wikiLowerChars )",
+                                       $str
+                               );
+                       } else
+                               return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
+       }
+
+       function isMultibyte( $str ) {
+               return (bool)preg_match( '/^[\x80-\xff]/', $str );
         }
  
         function stripForSearch( $string ) {
@@ -76,7 +106,7 @@ class LanguageUtf8 extends Language {
                 # all strtolower on stripped output or argument
                 # should be removed and all stripForSearch
                 # methods adjusted to that.
-               
+
                 wfProfileIn( "LanguageUtf8::stripForSearch" );
                 if( function_exists( 'mb_strtolower' ) ) {
                         $out = preg_replace(
@@ -110,7 +140,7 @@ class LanguageUtf8 extends Language {
                 # Check for non-UTF-8 URLs
                 $ishigh = preg_match( '/[\x80-\xff]/', $s);
                 if(!$ishigh) return $s;
-               
+
                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
                  '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
                 if( $isutf8 ) return $s;
@@ -121,7 +151,7 @@ class LanguageUtf8 extends Language {
         function firstChar( $s ) {
                 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
-               
+
                 return isset( $matches[1] ) ? $matches[1] : "";
         }