Revert "merged master"
[lhc/web/wiklou.git] / languages / LanguageConverter.php
index 42b9660..4cf95c1 100644 (file)
  * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>, PhiLiP <philip.npc@gmail.com>
  */
 class LanguageConverter {
+
+       /**
+        * languages supporting variants
+        * @since 1.20
+        * @var array
+        */
+       static public $languagesWithVariants = array(
+               'gan',
+               'iu',
+               'kk',
+               'ku',
+               'shi',
+               'sr',
+               'tg',
+               'uz',
+               'zh',
+       );
+
        var $mMainLanguageCode;
        var $mVariants, $mVariantFallbacks, $mVariantNames;
        var $mTablesLoaded = false;
@@ -72,7 +90,7 @@ class LanguageConverter {
                $this->mMainLanguageCode = $maincode;
                $this->mVariants = array_diff( $variants, $wgDisabledVariants );
                $this->mVariantFallbacks = $variantfallbacks;
-               $this->mVariantNames = Language::getLanguageNames();
+               $this->mVariantNames = Language::fetchLanguageNames();
                $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
                $defaultflags = array(
                        // 'S' show converted text
@@ -117,7 +135,7 @@ class LanguageConverter {
         * in this case. Right now this is only used by zh.
         *
         * @param $variant String: the language code of the variant
-        * @return String: The code of the fallback language or the
+        * @return String|array: The code of the fallback language or the
         *                               main code if there is no fallback
         */
        public function getVariantFallbacks( $variant ) {
@@ -158,7 +176,7 @@ class LanguageConverter {
                // not memoized (i.e. there return value is not cached) since
                // new information might appear during processing after this
                // is first called.
-               if ( $req ) {
+               if ( $this->validateVariant( $req ) ) {
                        return $req;
                }
                return $this->mMainLanguageCode;
@@ -189,7 +207,7 @@ class LanguageConverter {
         * @param $variant String: the variant to validate
         * @return Mixed: returns the variant if it is valid, null otherwise
         */
-       protected function validateVariant( $variant = null ) {
+       public function validateVariant( $variant = null ) {
                if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
                        return $variant;
                }
@@ -305,9 +323,9 @@ class LanguageConverter {
         * If you want to parse rules, try to use convert() or
         * convertTo().
         *
-        * @param $text String: the text to be converted
-        * @param $toVariant String: the target language code
-        * @return String: the converted text
+        * @param $text String the text to be converted
+        * @param $toVariant bool|string the target language code
+        * @return String the converted text
         */
        public function autoConvert( $text, $toVariant = false ) {
                wfProfileIn( __METHOD__ );
@@ -322,6 +340,11 @@ class LanguageConverter {
                        }
                }
 
+               if( $this->guessVariant( $text, $toVariant ) ) {
+                       wfProfileOut( __METHOD__ );
+                       return $text;
+               }
+
                /* we convert everything except:
                   1. HTML markups (anything between < and >)
                   2. HTML entities
@@ -368,11 +391,11 @@ class LanguageConverter {
                        $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
 
                        // Advance to the next position
-                       $startPos = $elementPos + strlen( $element );           
+                       $startPos = $elementPos + strlen( $element );
 
                        // Translate any alt or title attributes inside the matched element
-                       if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 
-                               $elementMatches ) ) 
+                       if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
+                               $elementMatches ) )
                        {
                                $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
                                $changed = false;
@@ -385,7 +408,7 @@ class LanguageConverter {
                                        if ( !strpos( $attr, '://' ) ) {
                                                $attr = $this->translate( $attr, $toVariant );
                                        }
-                                       
+
                                        // Remove HTML tags to avoid disrupting the layout
                                        $attr = preg_replace( '/<[^>]+>/', '', $attr );
                                        if ( $attr !== $attrs[$attrName] ) {
@@ -394,7 +417,7 @@ class LanguageConverter {
                                        }
                                }
                                if ( $changed ) {
-                                       $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 
+                                       $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
                                                $elementMatches[3];
                                }
                        }
@@ -468,7 +491,7 @@ class LanguageConverter {
         *
         * @param $text String: the text to be converted
         * @return Array: variant => converted text
-        * @deprecated Use autoConvertToAllVariants() instead
+        * @deprecated since 1.17 Use autoConvertToAllVariants() instead
         */
        public function convertLinkToAllVariants( $text ) {
                return $this->autoConvertToAllVariants( $text );
@@ -477,7 +500,7 @@ class LanguageConverter {
        /**
         * Apply manual conversion rules.
         *
-        * @param $convRule Object: Object of ConverterRule
+        * @param $convRule ConverterRule Object of ConverterRule
         */
        protected function applyManualConv( $convRule ) {
                // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
@@ -517,7 +540,7 @@ class LanguageConverter {
         * Auto convert a Title object to a readable string in the
         * preferred variant.
         *
-        * @param $title Object: a object of Title
+        * @param $title Title a object of Title
         * @return String: converted title text
         */
        public function convertTitle( $title ) {
@@ -527,9 +550,9 @@ class LanguageConverter {
                        $text = '';
                } else {
                        // first let's check if a message has given us a converted name
-                       $nsConvKey = 'conversion-ns' . $index;
-                       if ( !wfEmptyMsg( $nsConvKey ) ) {
-                               $text = wfMsgForContentNoTrans( $nsConvKey );
+                       $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
+                       if ( $nsConvMsg->exists() ) {
+                               $text = $nsConvMsg->plain();
                        } else {
                                // the message does not exist, try retrieve it from the current
                                // variant's namespace names.
@@ -590,18 +613,22 @@ class LanguageConverter {
                $startPos = 0;
                $out = '';
                $length = strlen( $text );
+               $shouldConvert = !$this->guessVariant( $text, $variant );
+
                while ( $startPos < $length ) {
                        $pos = strpos( $text, '-{', $startPos );
 
                        if ( $pos === false ) {
                                // No more markup, append final segment
-                               $out .= $this->autoConvert( substr( $text, $startPos ), $variant );
+                               $fragment = substr( $text, $startPos );
+                               $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
                                return $out;
                        }
 
                        // Markup found
                        // Append initial segment
-                       $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant );
+                       $fragment = substr( $text, $startPos, $pos - $startPos );
+                       $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
 
                        // Advance position
                        $startPos = $pos;
@@ -618,7 +645,10 @@ class LanguageConverter {
         *
         * @param $text String: text to be converted
         * @param $variant String: the target variant code
+        * @param $startPos int
         * @param $depth Integer: depth of recursion
+        *
+        * @throws MWException
         * @return String: converted text
         */
        protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
@@ -657,8 +687,8 @@ class LanguageConverter {
                                                $inner .= '-{';
                                                if ( !$warningDone ) {
                                                        $inner .= '<span class="error">' .
-                                                               wfMsgForContent( 'language-converter-depth-warning',
-                                                                       $this->mMaxDepth ) .
+                                                               wfMessage( 'language-converter-depth-warning' )
+                                                                       ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
                                                                '</span>';
                                                        $warningDone = true;
                                                }
@@ -762,21 +792,38 @@ class LanguageConverter {
 
        /**
         * Returns language specific hash options.
+        *
+        * @return string
         */
        public function getExtraHashOptions() {
                $variant = $this->getPreferredVariant();
                return '!' . $variant;
        }
 
+       /**
+        * Guess if a text is written in a variant. This should be implemented in subclasses.
+        *
+        * @param string        $text the text to be checked
+        * @param string        $variant language code of the variant to be checked for
+        * @return bool true if $text appears to be written in $variant, false if not
+        *
+        * @author Nikola Smolenski <smolensk@eunet.rs>
+        * @since 1.19
+        */
+       public function guessVariant($text, $variant) {
+               return false;
+       }
+
        /**
         * Load default conversion tables.
         * This method must be implemented in derived class.
         *
         * @private
+        * @throws MWException
         */
        function loadDefaultTables() {
                $name = get_class( $this );
-               wfDie( "Must implement loadDefaultTables() method in class $name" );
+               throw new MWException( "Must implement loadDefaultTables() method in class $name" );
        }
 
        /**
@@ -785,16 +832,18 @@ class LanguageConverter {
         * @param $fromCache Boolean: load from memcached? Defaults to true.
         */
        function loadTables( $fromCache = true ) {
+               global $wgLangConvMemc;
+
                if ( $this->mTablesLoaded ) {
                        return;
                }
-               global $wgMemc;
+
                wfProfileIn( __METHOD__ );
                $this->mTablesLoaded = true;
                $this->mTables = false;
                if ( $fromCache ) {
                        wfProfileIn( __METHOD__ . '-cache' );
-                       $this->mTables = $wgMemc->get( $this->mCacheKey );
+                       $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
                        wfProfileOut( __METHOD__ . '-cache' );
                }
                if ( !$this->mTables
@@ -812,7 +861,7 @@ class LanguageConverter {
                        $this->postLoadTables();
                        $this->mTables[self::CACHE_VERSION_KEY] = true;
 
-                       $wgMemc->set( $this->mCacheKey, $this->mTables, 43200 );
+                       $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
                        wfProfileOut( __METHOD__ . '-recache' );
                }
                wfProfileOut( __METHOD__ );
@@ -852,6 +901,8 @@ class LanguageConverter {
         * @param $code String: language code
         * @param $subpage String: subpage name
         * @param $recursive Boolean: parse subpages recursively? Defaults to true.
+        *
+        * @return array
         */
        function parseCachedTable( $code, $subpage = '', $recursive = true ) {
                static $parsed = array();
@@ -864,26 +915,30 @@ class LanguageConverter {
                        return array();
                }
 
-               if ( strpos( $code, '/' ) === false ) {
-                       $txt = MessageCache::singleton()->get( 'Conversiontable', true, $code );
-                       if ( $txt === false ) {
-                               # FIXME: this method doesn't seem to be expecting
-                               # this possible outcome...
-                               $txt = '&lt;Conversiontable&gt;';
-                       }
+               $parsed[$key] = true;
+
+               if ( $subpage === '' ) {
+                       $txt = MessageCache::singleton()->get( 'conversiontable', true, $code );
                } else {
-                       $title = Title::makeTitleSafe(
-                               NS_MEDIAWIKI,
-                               "Conversiontable/$code"
-                       );
+                       $txt = false;
+                       $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
                        if ( $title && $title->exists() ) {
-                               $article = new Article( $title );
-                               $txt = $article->getContents();
-                       } else {
-                               $txt = '';
+                               $revision = Revision::newFromTitle( $title );
+                               if ( $revision ) {
+                                       if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
+                                               $txt = $revision->getContent( Revision::RAW )->getNativeData();
+                                       }
+
+                                       //@todo: in the future, use a specialized content model, perhaps based on json!
+                               }
                        }
                }
 
+               # Nothing to parse if there's no text
+               if ( $txt === false || $txt === null || $txt === '' ) {
+                       return array();
+               }
+
                // get all subpage links of the form
                // [[MediaWiki:Conversiontable/zh-xx/...|...]]
                $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
@@ -932,7 +987,6 @@ class LanguageConverter {
                                $ret[trim( $m[0] )] = trim( $tt[0] );
                        }
                }
-               $parsed[$key] = true;
 
                // recursively parse the subpages
                if ( $recursive ) {
@@ -971,6 +1025,10 @@ class LanguageConverter {
        /**
         * Convert the sorting key for category links. This should make different
         * keys that are variants of each other map to the same key.
+        *
+        * @param $key string
+        *
+        * @return string
         */
        function convertCategoryKey( $key ) {
                return $key;
@@ -981,14 +1039,14 @@ class LanguageConverter {
         * MediaWiki:Conversiontable* is updated.
         * @private
         *
-        * @param $article Object: Article object
+        * @param $article Article object
         * @param $user Object: User object for the current user
         * @param $text String: article text (?)
         * @param $summary String: edit summary of the edit
         * @param $isMinor Boolean: was the edit marked as minor?
         * @param $isWatch Boolean: did the user watch this page or not?
-        * @param $section Unused
-        * @param $flags Bitfield
+        * @param $section
+        * @param $flags int Bitfield
         * @param $revision Object: new Revision object or null
         * @return Boolean: true
         */
@@ -1061,7 +1119,6 @@ class LanguageConverter {
 class ConverterRule {
        var $mText; // original text in -{text}-
        var $mConverter; // LanguageConverter object
-       var $mManualCodeError = '<strong class="error">code error!</strong>';
        var $mRuleDisplay = '';
        var $mRuleTitle = false;
        var $mRules = '';// string : the text of the rules
@@ -1215,6 +1272,8 @@ class ConverterRule {
 
        /**
         * @private
+        *
+        * @return string
         */
        function getRulesDesc() {
                $codesep = $this->mConverter->mDescCodeSep;
@@ -1235,6 +1294,10 @@ class ConverterRule {
        /**
         * Parse rules conversion.
         * @private
+        *
+        * @param $variant
+        *
+        * @return string
         */
        function getRuleConvertedStr( $variant ) {
                $bidtable = $this->mBidtable;
@@ -1348,19 +1411,21 @@ class ConverterRule {
                        if ( isset( $this->mVariantFlags[$variant] ) ) {
                                // then convert <text to convert> to current language
                                $this->mRules = $this->mConverter->autoConvert( $this->mRules,
-                                                                                                                               $variant );
+                                       $variant );
                        } else { // if current variant no in flags,
                                   // then we check its fallback variants.
                                $variantFallbacks =
                                        $this->mConverter->getVariantFallbacks( $variant );
-                               foreach ( $variantFallbacks as $variantFallback ) {
-                                       // if current variant's fallback exist in flags
-                                       if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
-                                               // then convert <text to convert> to fallback language
-                                               $this->mRules =
-                                                       $this->mConverter->autoConvert( $this->mRules,
-                                                                                                                       $variantFallback );
-                                               break;
+                               if( is_array( $variantFallbacks ) ) {
+                                       foreach ( $variantFallbacks as $variantFallback ) {
+                                               // if current variant's fallback exist in flags
+                                               if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
+                                                       // then convert <text to convert> to fallback language
+                                                       $this->mRules =
+                                                               $this->mConverter->autoConvert( $this->mRules,
+                                                                       $variantFallback );
+                                                       break;
+                                               }
                                        }
                                }
                        }
@@ -1429,7 +1494,9 @@ class ConverterRule {
                        }
                }
                if ( $this->mRuleDisplay === false ) {
-                       $this->mRuleDisplay = $this->mManualCodeError;
+                       $this->mRuleDisplay = '<span class="error">'
+                               . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
+                               . '</span>';
                }
 
                $this->generateConvTable();
@@ -1444,6 +1511,7 @@ class ConverterRule {
 
        /**
         * Get display text on markup -{...}-
+        * @return string
         */
        public function getDisplay() {
                return $this->mRuleDisplay;
@@ -1451,6 +1519,7 @@ class ConverterRule {
 
        /**
         * Get converted title.
+        * @return string
         */
        public function getTitle() {
                return $this->mRuleTitle;
@@ -1458,6 +1527,7 @@ class ConverterRule {
 
        /**
         * Return how deal with conversion rules.
+        * @return string
         */
        public function getRulesAction() {
                return $this->mRulesAction;
@@ -1466,6 +1536,7 @@ class ConverterRule {
        /**
         * Get conversion table. (bidirectional and unidirectional
         * conversion table)
+        * @return array
         */
        public function getConvTable() {
                return $this->mConvTable;
@@ -1473,6 +1544,7 @@ class ConverterRule {
 
        /**
         * Get conversion rules string.
+        * @return string
         */
        public function getRules() {
                return $this->mRules;
@@ -1480,6 +1552,7 @@ class ConverterRule {
 
        /**
         * Get conversion flags.
+        * @return array
         */
        public function getFlags() {
                return $this->mFlags;