Restructured the languages directory, to avoid problems when people untar MW 1.8...
[lhc/web/wiklou.git] / languages / Language.php
index 2644e58..c7f7076 100644 (file)
@@ -49,9 +49,10 @@ class FakeConverter {
        function findVariantLink(&$l, &$n) {}
        function getExtraHashOptions() {return '';}
        function getParsedTitle() {return '';}
-       function markNoConversion($text) {return $text;}
+       function markNoConversion($text, $noParse=false) {return $text;}
        function convertCategoryKey( $key ) {return $key; }
-
+       function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); }
+       function setNoTitleConvert(){}
 }
 
 #--------------------------------------------------------------------------
@@ -104,7 +105,6 @@ class Language {
         * Create a language object for a given language code
         */
        static function factory( $code ) {
-               global $IP;
                static $recursionLevel = 0;
 
                if ( $code == 'en' ) {
@@ -112,11 +112,11 @@ class Language {
                } else {
                        $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
                        // Preload base classes to work around APC/PHP5 bug
-                       if ( file_exists( "$IP/languages/$class.deps.php" ) ) {
-                               include_once("$IP/languages/$class.deps.php");
+                       if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
+                               include_once("$IP/languages/classes/$class.deps.php");
                        }
-                       if ( file_exists( "$IP/languages/$class.php" ) ) {
-                               include_once("$IP/languages/$class.php");
+                       if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
+                               include_once("$IP/languages/classes/$class.php");
                        }
                }
 
@@ -311,7 +311,7 @@ class Language {
                }
                
                global $IP;
-               $messageFiles = glob( "$IP/languages/Messages*.php" );
+               $messageFiles = glob( "$IP/languages/messages/Messages*.php" );
                $names = array();
                foreach ( $messageFiles as $file ) {
                        if( preg_match( '/Messages([A-Z][a-z_]+)\.php$/', $file, $m ) ) {
@@ -422,10 +422,12 @@ class Language {
         * internationalisation, a reduced set of format characters, and a better 
         * escaping format.
         *
-        * Supported format characters are dDjlFmMnYyHis. See the PHP manual for 
-        * definitions. There are a number of extensions, which start with "x":
+        * Supported format characters are dDjlNwzWFmMntLYyaAgGhHiscrU. See the 
+        * PHP manual for definitions. There are a number of extensions, which 
+        * start with "x":
         *
         *    xn   Do not translate digits of the next numeric format character
+        *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
         *    xr   Use roman numerals for the next numeric format character
         *    xx   Literal x
         *    xg   Genitive month name
@@ -448,6 +450,8 @@ class Language {
                $s = '';
                $raw = false;
                $roman = false;
+               $unix = false;
+               $rawToggle = false;
                for ( $p = 0; $p < strlen( $format ); $p++ ) {
                        $num = false;
                        $code = $format[$p];
@@ -462,6 +466,9 @@ class Language {
                                case 'xn':
                                        $raw = true;
                                        break;
+                               case 'xN':
+                                       $rawToggle = !$rawToggle;
+                                       break;
                                case 'xr':
                                        $roman = true;
                                        break;
@@ -472,14 +479,33 @@ class Language {
                                        $num = substr( $ts, 6, 2 );
                                        break;
                                case 'D':
-                                       $s .= $this->getWeekdayAbbreviation( self::calculateWeekday( $ts ) );
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $s .= $this->getWeekdayAbbreviation( date( 'w', $unix ) + 1 );
                                        break;
                                case 'j':
                                        $num = intval( substr( $ts, 6, 2 ) );
                                        break;
                                case 'l':
-                                       $s .= $this->getWeekdayName( self::calculateWeekday( $ts ) );
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $s .= $this->getWeekdayName( date( 'w', $unix ) + 1 );
+                                       break;
+                               case 'N':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $w = date( 'w', $unix );
+                                       $num = $w ? $w : 7;
+                                       break;
+                               case 'w':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $num = date( 'w', $unix );
                                        break;
+                               case 'z':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $num = date( 'z', $unix );
+                                       break;
+                               case 'W':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $num = date( 'W', $unix );
+                                       break;                                  
                                case 'F':
                                        $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
                                        break;
@@ -492,24 +518,58 @@ class Language {
                                case 'n':
                                        $num = intval( substr( $ts, 4, 2 ) );
                                        break;
+                               case 't':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $num = date( 't', $unix );
+                                       break;
+                               case 'L':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $num = date( 'L', $unix );
+                                       break;                                  
                                case 'Y':
                                        $num = substr( $ts, 0, 4 );
                                        break;
                                case 'y':
                                        $num = substr( $ts, 2, 2 );
                                        break;
-                               case 'H':
-                                       $num = substr( $ts, 8, 2 );
+                               case 'a':
+                                       $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
+                                       break;
+                               case 'A':
+                                       $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
+                                       break;
+                               case 'g':
+                                       $h = substr( $ts, 8, 2 );
+                                       $num = $h % 12 ? $h % 12 : 12;
                                        break;
                                case 'G':
                                        $num = intval( substr( $ts, 8, 2 ) );
                                        break;
+                               case 'h':
+                                       $h = substr( $ts, 8, 2 );
+                                       $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
+                                       break;                                  
+                               case 'H':
+                                       $num = substr( $ts, 8, 2 );
+                                       break;
                                case 'i':
                                        $num = substr( $ts, 10, 2 );
                                        break;
                                case 's':
                                        $num = substr( $ts, 12, 2 );
                                        break;
+                               case 'c':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $s .= date( 'c', $unix );
+                                       break;
+                               case 'r':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $s .= date( 'r', $unix );
+                                       break;
+                               case 'U':
+                                       if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
+                                       $num = $unix;
+                                       break;
                                case '\\':
                                        # Backslash escaping
                                        if ( $p < strlen( $format ) - 1 ) {
@@ -538,11 +598,11 @@ class Language {
                                        $s .= $format[$p];
                        }
                        if ( $num !== false ) {
-                               if ( $raw ) {
+                               if ( $rawToggle || $raw ) {
                                        $s .= $num;
                                        $raw = false;
                                } elseif ( $roman ) {
-                                       $s .= Language::romanNumeral( $num );
+                                       $s .= self::romanNumeral( $num );
                                        $roman = false;
                                } else {
                                        $s .= $this->formatNum( $num, true );
@@ -554,35 +614,31 @@ class Language {
        }
 
        /**
-        * Roman number formatting up to 100
+        * Roman number formatting up to 3000
         */
        static function romanNumeral( $num ) {
-               static $units = array( 0, 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' );
-               static $decades = array( 0, 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' );
+               static $table = array(
+                       array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
+                       array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
+                       array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
+                       array( '', 'M', 'MM', 'MMM' )
+               );
+                       
                $num = intval( $num );
-               if ( $num > 100 || $num <= 0 ) {
+               if ( $num > 3000 || $num <= 0 ) {
                        return $num;
                }
+
                $s = '';
-               if ( $num >= 10 ) {
-                       $s .= $decades[floor( $num / 10 )];
-                       $num = $num % 10;
-               }
-               if ( $num >= 1 ) {
-                       $s .= $units[$num];
+               for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
+                       if ( $num >= $pow10 ) {
+                               $s .= $table[$i][floor($num / $pow10)];
+                       }
+                       $num = $num % $pow10;
                }
                return $s;
        }
 
-       /**
-        * Calculate the day of the week for a 14-character timestamp
-        * 1 for Sunday through to 7 for Saturday
-        * This takes about 100us on a slow computer
-        */
-       static function calculateWeekday( $ts ) {
-               return date( 'w', wfTimestamp( TS_UNIX, $ts ) ) + 1;
-       }
-
        /**
         * This is meant to be used by time(), date(), and timeanddate() to get
         * the date preference they're supposed to use, it should be used in
@@ -712,6 +768,34 @@ class Language {
                return iconv( $in, $out, $string );
        }
 
+       // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
+       function ucwordbreaksCallbackAscii($matches){
+               return $this->ucfirst($matches[1]);
+       }
+       
+       function ucwordbreaksCallbackMB($matches){
+               return mb_strtoupper($matches[0]);
+       }
+       
+       function ucCallback($matches){
+               list( $wikiUpperChars ) = self::getCaseMaps();
+               return strtr( $matches[1], $wikiUpperChars );
+       }
+       
+       function lcCallback($matches){
+               list( , $wikiLowerChars ) = self::getCaseMaps();
+               return strtr( $matches[1], $wikiLowerChars );
+       }
+       
+       function ucwordsCallbackMB($matches){
+               return mb_strtoupper($matches[0]);
+       }
+       
+       function ucwordsCallbackWiki($matches){
+               list( $wikiUpperChars ) = self::getCaseMaps();
+               return strtr( $matches[0], $wikiUpperChars );
+       }
+
        function ucfirst( $str ) {
                return self::uc( $str, true );
        }
@@ -729,9 +813,9 @@ class Language {
                        if ( self::isMultibyte( $str ) ) {
                                list( $wikiUpperChars ) = $this->getCaseMaps();
                                $x = $first ? '^' : '';
-                               return preg_replace(
-                                       "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                                       "strtr( \"\$1\" , \$wikiUpperChars )",
+                               return preg_replace_callback(
+                                       "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
+                                       array($this,"ucCallback"),
                                        $str
                                );
                        } else
@@ -755,9 +839,9 @@ class Language {
                        if ( self::isMultibyte( $str ) ) {
                                list( , $wikiLowerChars ) = self::getCaseMaps();
                                $x = $first ? '^' : '';
-                               return preg_replace(
-                                       "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                                       "strtr( \"\$1\" , \$wikiLowerChars )",
+                               return preg_replace_callback(
+                                       "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
+                                       array($this,"lcCallback"),
                                        $str
                                );
                        } else
@@ -765,7 +849,63 @@ class Language {
        }
 
        function isMultibyte( $str ) {
-               return (bool)preg_match( '/^[\x80-\xff]/', $str );
+               return (bool)preg_match( '/[\x80-\xff]/', $str );
+       }
+
+       function ucwords($str) {
+               if ( self::isMultibyte( $str ) ) {
+                       $str = self::lc($str);
+
+                       // regexp to find first letter in each word (i.e. after each space)
+                       $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
+
+                       // function to use to capitalize a single char
+                       if ( function_exists( 'mb_strtoupper' ) )
+                               return preg_replace_callback(
+                                       $replaceRegexp,
+                                       array($this,"ucwordsCallbackMB"),
+                                       $str
+                               );
+                       else 
+                               return preg_replace_callback(
+                                       $replaceRegexp,
+                                       array($this,"ucwordsCallbackWiki"),
+                                       $str
+                               );
+               }
+               else
+                       return ucwords( strtolower( $str ) );
+       }
+
+  # capitalize words at word breaks
+       function ucwordbreaks($str){
+               if (self::isMultibyte( $str ) ) {
+                       $str = self::lc($str);
+
+                       // since \b doesn't work for UTF-8, we explicitely define word break chars
+                       $breaks= "[ \-\(\)\}\{\.,\?!]";
+
+                       // find first letter after word break
+                       $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
+
+                       if ( function_exists( 'mb_strtoupper' ) )
+                               return preg_replace_callback(
+                                       $replaceRegexp,
+                                       array($this,"ucwordbreaksCallbackMB"),
+                                       $str
+                               );
+                       else 
+                               return preg_replace_callback(
+                                       $replaceRegexp,
+                                       array($this,"ucwordsCallbackWiki"),
+                                       $str
+                               );
+               }
+               else
+                       return preg_replace_callback(
+                       '/\b([\w\x80-\xff]+)\b/',
+                       array($this,"ucwordbreaksCallbackAscii"),
+                       $str );
        }
 
        function checkTitleEncoding( $s ) {
@@ -936,6 +1076,9 @@ class Language {
                        }
                }
 
+               if( !is_array( $rawEntry ) ) {
+                       error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
+               }
                $mw->mCaseSensitive = $rawEntry[0];
                $mw->mSynonyms = array_slice( $rawEntry, 1 );
        }
@@ -1166,6 +1309,17 @@ class Language {
                return $this->mConverter->parserConvert( $text, $parser );
        }
 
+       # Tell the converter that it shouldn't convert titles
+       function setNoTitleConvert(){
+               $this->mConverter->setNotitleConvert();
+       }
+
+       # Check if this is a language with variants
+       function hasVariants(){
+               return sizeof($this->getVariants())>1;
+       }
+
+
        /**
         * Perform output conversion on a string, and encode for safe HTML output.
         * @param string $text
@@ -1210,6 +1364,17 @@ class Language {
                $this->mConverter->findVariantLink($link, $nt);
        }
 
+       /**
+        * If a language supports multiple variants, converts text
+        * into an array of all possible variants of the text:
+        *  'variant' => text in that variant
+        */
+
+       function convertLinkToAllVariants($text){
+               return $this->mConverter->convertLinkToAllVariants($text);
+       }
+
+
        /**
         * returns language specific options used by User::getPageRenderHash()
         * for example, the preferred language variant
@@ -1239,8 +1404,8 @@ class Language {
         * @param string $text text to be tagged for no conversion
         * @return string the tagged text
        */
-       function markNoConversion( $text ) {
-               return $this->mConverter->markNoConversion( $text );
+       function markNoConversion( $text, $noParse=false ) {
+               return $this->mConverter->markNoConversion( $text, $noParse );
        }
 
        /**
@@ -1274,6 +1439,16 @@ class Language {
                return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
        }
 
+       static function getMessagesFileName( $code ) {
+               global $IP;
+               return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
+       }
+
+       static function getClassFileName( $code ) {
+               global $IP;
+               return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
+       }
+       
        static function getLocalisationArray( $code, $disableCache = false ) {
                self::loadLocalisation( $code, $disableCache );
                return self::$mLocalisationCache[$code];
@@ -1286,7 +1461,7 @@ class Language {
         */
        static function loadLocalisation( $code, $disableCache = false ) {
                static $recursionGuard = array();
-               global $wgMemc, $wgDBname, $IP;
+               global $wgMemc, $wgDBname;
 
                if ( !$code ) {
                        throw new MWException( "Invalid language code requested" );
@@ -1343,8 +1518,7 @@ class Language {
                }
                
                # Load the primary localisation from the source file
-               global $IP;
-               $filename = self::getFileName( "$IP/languages/Messages", $code, '.php' );
+               $filename = self::getMessagesFileName( $code );
                if ( !file_exists( $filename ) ) {
                        wfDebug( "No localisation file for $code, using implicit fallback to en\n" );
                        $cache = array();
@@ -1362,7 +1536,7 @@ class Language {
                                throw new MWException( "Error: Circular fallback reference in language code $code" );
                        }
                        $recursionGuard[$code] = true;
-                       $newDeps = self::loadLocalisation( $fallback );
+                       $newDeps = self::loadLocalisation( $fallback, $disableCache );
                        unset( $recursionGuard[$code] );
 
                        $secondary = self::$mLocalisationCache[$fallback];
@@ -1395,6 +1569,9 @@ class Language {
                # Add dependencies to the cache entry
                $cache['deps'] = $deps;
 
+               # Replace spaces with underscores in namespace names
+               $cache['namespaceNames'] = str_replace( ' ', '_', $cache['namespaceNames'] );
+               
                # Save to both caches
                self::$mLocalisationCache[$code] = $cache;
                if ( !$disableCache ) {
@@ -1531,7 +1708,6 @@ class Language {
 
        static function getCaseMaps() {
                static $wikiUpperChars, $wikiLowerChars;
-               global $IP;
                if ( isset( $wikiUpperChars ) ) {
                        return array( $wikiUpperChars, $wikiLowerChars );
                }