Merge "When file sha1 populator fixes wrong values, make it fix the other metadata."
[lhc/web/wiklou.git] / languages / LanguageConverter.php
index 28f125f..18d1dbc 100644 (file)
@@ -1,11 +1,24 @@
 <?php
-
 /**
  * Contains the LanguageConverter class and ConverterRule class
- * @ingroup Language
  *
- * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
  * @file
+ * @ingroup Language
  */
 
 /**
@@ -22,7 +35,12 @@ class LanguageConverter {
        var $mTables;
        // 'bidirectional' 'unidirectional' 'disable' for each variant
        var $mManualLevel;
+
+       /**
+        * @var String: memcached key name
+        */
        var $mCacheKey;
+
        var $mLangObj;
        var $mFlags;
        var $mDescCodeSep = ':', $mDescVarSep = ';';
@@ -39,24 +57,22 @@ class LanguageConverter {
        /**
         * Constructor
         *
-        * @param $langobj The Language Object
+        * @param $langobj Language: the Language Object
         * @param $maincode String: the main language code of this language
         * @param $variants Array: the supported variants of this language
         * @param $variantfallbacks Array: the fallback language of each variant
         * @param $flags Array: defining the custom strings that maps to the flags
         * @param $manualLevel Array: limit for supported variants
         */
-       public function __construct( $langobj, $maincode,
-                                                               $variants = array(),
-                                                               $variantfallbacks = array(),
-                                                               $flags = array(),
+       public function __construct( $langobj, $maincode, $variants = array(),
+                                                               $variantfallbacks = array(), $flags = array(),
                                                                $manualLevel = array() ) {
-               global $wgDisabledVariants, $wgLanguageNames;
+               global $wgDisabledVariants;
                $this->mLangObj = $langobj;
                $this->mMainLanguageCode = $maincode;
                $this->mVariants = array_diff( $variants, $wgDisabledVariants );
                $this->mVariantFallbacks = $variantfallbacks;
-               $this->mVariantNames = $wgLanguageNames;
+               $this->mVariantNames = Language::fetchLanguageNames();
                $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
                $defaultflags = array(
                        // 'S' show converted text
@@ -69,7 +85,7 @@ class LanguageConverter {
                        'D' => 'D',       // convert description (subclass implement)
                        '-' => '-',       // remove convert (not implement)
                        'H' => 'H',       // add rule for convert code
-                                                 // (but no display in placed code )
+                                                 // (but no display in placed code)
                        'N' => 'N'        // current variant name
                );
                $this->mFlags = array_merge( $defaultflags, $flags );
@@ -101,7 +117,7 @@ class LanguageConverter {
         * in this case. Right now this is only used by zh.
         *
         * @param $variant String: the language code of the variant
-        * @return String: The code of the fallback language or the
+        * @return String|array: The code of the fallback language or the
         *                               main code if there is no fallback
         */
        public function getVariantFallbacks( $variant ) {
@@ -121,20 +137,16 @@ class LanguageConverter {
 
        /**
         * Get preferred language variant.
-        * @param $fromUser Boolean: get it from $wgUser's preferences
-        * @param $fromHeader Boolean: get it from Accept-Language
         * @return String: the preferred language code
         */
-       public function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
-               global $wgDefaultLanguageVariant;
+       public function getPreferredVariant() {
+               global $wgDefaultLanguageVariant, $wgUser;
 
                $req = $this->getURLVariant();
 
-               if ( $fromUser && !$req ) {
+               if ( $wgUser->isLoggedIn() && !$req ) {
                        $req = $this->getUserVariant();
-               }
-
-               if ( $fromHeader && !$req ) {
+               } elseif ( !$req ) {
                        $req = $this->getHeaderVariant();
                }
 
@@ -146,6 +158,26 @@ class LanguageConverter {
                // not memoized (i.e. there return value is not cached) since
                // new information might appear during processing after this
                // is first called.
+               if ( $this->validateVariant( $req ) ) {
+                       return $req;
+               }
+               return $this->mMainLanguageCode;
+       }
+
+       /**
+        * Get default variant.
+        * This function would not be affected by user's settings or headers
+        * @return String: the default variant code
+        */
+       public function getDefaultVariant() {
+               global $wgDefaultLanguageVariant;
+
+               $req = $this->getURLVariant();
+
+               if ( $wgDefaultLanguageVariant && !$req ) {
+                       $req = $this->validateVariant( $wgDefaultLanguageVariant );
+               }
+
                if ( $req ) {
                        return $req;
                }
@@ -157,9 +189,8 @@ class LanguageConverter {
         * @param $variant String: the variant to validate
         * @return Mixed: returns the variant if it is valid, null otherwise
         */
-       protected function validateVariant( $variant = null ) {
-               if ( $variant !== null &&
-                        in_array( $variant, $this->mVariants ) ) {
+       public function validateVariant( $variant = null ) {
+               if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
                        return $variant;
                }
                return null;
@@ -170,9 +201,8 @@ class LanguageConverter {
         *
         * @return Mixed: variant if one found, false otherwise.
         */
-       protected function getURLVariant() {
+       public function getURLVariant() {
                global $wgRequest;
-               $ret = null;
 
                if ( $this->mURLVariant ) {
                        return $this->mURLVariant;
@@ -195,20 +225,20 @@ class LanguageConverter {
         */
        protected function getUserVariant() {
                global $wgUser;
-               $ret = null;
 
                // memoizing this function wreaks havoc on parserTest.php
-               /* if ( $this->mUserVariant ) { */
-               /*      return $this->mUserVariant; */
-               /* } */
+               /*
+               if ( $this->mUserVariant ) {
+                       return $this->mUserVariant;
+               }
+               */
 
-               // get language variant preference from logged in users
+               // Get language variant preference from logged in users
                // Don't call this on stub objects because that causes infinite
                // recursion during initialisation
                if ( $wgUser->isLoggedIn() )  {
                        $ret = $wgUser->getOption( 'variant' );
-               }
-               else {
+               } else {
                        // figure out user lang without constructing wgLang to avoid
                        // infinite recursion
                        $ret = $wgUser->getOption( 'language' );
@@ -217,7 +247,6 @@ class LanguageConverter {
                return $this->mUserVariant = $this->validateVariant( $ret );
        }
 
-
        /**
         * Determine the language variant from the Accept-Language header.
         *
@@ -225,20 +254,19 @@ class LanguageConverter {
         */
        protected function getHeaderVariant() {
                global $wgRequest;
-               $ret = null;
 
                if ( $this->mHeaderVariant ) {
                        return $this->mHeaderVariant;
                }
 
                // see if some supported language variant is set in the
-               // http header.
+               // HTTP header.
                $languages = array_keys( $wgRequest->getAcceptLang() );
                if ( empty( $languages ) ) {
                        return null;
                }
 
-               $fallback_languages = array();
+               $fallbackLanguages = array();
                foreach ( $languages as $language ) {
                        $this->mHeaderVariant = $this->validateVariant( $language );
                        if ( $this->mHeaderVariant ) {
@@ -250,17 +278,16 @@ class LanguageConverter {
                        // them later.
                        $fallbacks = $this->getVariantFallbacks( $language );
                        if ( is_string( $fallbacks ) ) {
-                               $fallback_languages[] = $fallbacks;
+                               $fallbackLanguages[] = $fallbacks;
                        } elseif ( is_array( $fallbacks ) ) {
-                               $fallback_languages =
-                                       array_merge( $fallback_languages,
-                                                                $fallbacks );
+                               $fallbackLanguages =
+                                       array_merge( $fallbackLanguages, $fallbacks );
                        }
                }
 
                if ( !$this->mHeaderVariant ) {
                        // process fallback languages now
-                       $fallback_languages = array_unique( $fallback_languages );
+                       $fallback_languages = array_unique( $fallbackLanguages );
                        foreach ( $fallback_languages as $language ) {
                                $this->mHeaderVariant = $this->validateVariant( $language );
                                if ( $this->mHeaderVariant ) {
@@ -272,34 +299,15 @@ class LanguageConverter {
                return $this->mHeaderVariant;
        }
 
-       /**
-        * Caption convert, base on preg_replace_callback.
-        *
-        * To convert text in "title" or "alt", like '<img alt="text" ... '
-        * or '<span title="text" ... '
-        *
-        * @return String like ' alt="yyyy"' or ' title="yyyy"'
-        */
-       protected function captionConvert( $matches ) {
-               $toVariant = $this->getPreferredVariant();
-               $title = $matches[1];
-               $text  = $matches[2];
-               // we convert captions except URL
-               if ( !strpos( $text, '://' ) ) {
-                       $text = $this->translate( $text, $toVariant );
-               }
-               return " $title=\"$text\"";
-       }
-
        /**
         * Dictionary-based conversion.
         * This function would not parse the conversion rules.
         * If you want to parse rules, try to use convert() or
         * convertTo().
         *
-        * @param $text String: the text to be converted
-        * @param $toVariant String: the target language code
-        * @return String: the converted text
+        * @param $text String the text to be converted
+        * @param $toVariant bool|string the target language code
+        * @return String the converted text
         */
        public function autoConvert( $text, $toVariant = false ) {
                wfProfileIn( __METHOD__ );
@@ -309,14 +317,20 @@ class LanguageConverter {
                if ( !$toVariant ) {
                        $toVariant = $this->getPreferredVariant();
                        if ( !$toVariant ) {
+                               wfProfileOut( __METHOD__ );
                                return $text;
                        }
                }
 
+               if( $this->guessVariant( $text, $toVariant ) ) {
+                       wfProfileOut( __METHOD__ );
+                       return $text;
+               }
+
                /* we convert everything except:
-                  1. html markups (anything between < and >)
-                  2. html entities
-                  3. place holders created by the parser
+                  1. HTML markups (anything between < and >)
+                  2. HTML entities
+                  3. placeholders created by the parser
                */
                global $wgParser;
                if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
@@ -325,7 +339,7 @@ class LanguageConverter {
                        $marker = '';
                }
 
-               // this one is needed when the text is inside an html markup
+               // this one is needed when the text is inside an HTML markup
                $htmlfix = '|<[^>]+$|^[^<>]*>';
 
                // disable convert to variants between <code></code> tags
@@ -337,41 +351,77 @@ class LanguageConverter {
 
                $reg = '/' . $codefix . $scriptfix . $prefix .
                        '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
+               $startPos = 0;
+               $sourceBlob = '';
+               $literalBlob = '';
+
+               // Guard against delimiter nulls in the input
+               $text = str_replace( "\000", '', $text );
+
+               $markupMatches = null;
+               $elementMatches = null;
+               while ( $startPos < strlen( $text ) ) {
+                       if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
+                               $elementPos = $markupMatches[0][1];
+                               $element = $markupMatches[0][0];
+                       } else {
+                               $elementPos = strlen( $text );
+                               $element = '';
+                       }
+
+                       // Queue the part before the markup for translation in a batch
+                       $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
+
+                       // Advance to the next position
+                       $startPos = $elementPos + strlen( $element );
+
+                       // Translate any alt or title attributes inside the matched element
+                       if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
+                               $elementMatches ) )
+                       {
+                               $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+                               $changed = false;
+                               foreach ( array( 'title', 'alt' ) as $attrName ) {
+                                       if ( !isset( $attrs[$attrName] ) ) {
+                                               continue;
+                                       }
+                                       $attr = $attrs[$attrName];
+                                       // Don't convert URLs
+                                       if ( !strpos( $attr, '://' ) ) {
+                                               $attr = $this->translate( $attr, $toVariant );
+                                       }
 
-               $matches = preg_split( $reg, $text, - 1, PREG_SPLIT_OFFSET_CAPTURE );
-
-               $m = array_shift( $matches );
-
-               $ret = $this->translate( $m[0], $toVariant );
-               $mstart = $m[1] + strlen( $m[0] );
-
-               // enable convertsion of '<img alt="xxxx" ... '
-               // or '<span title="xxxx" ... '
-               $captionpattern  = '/\s(title|alt)\s*=\s*"([\s\S]*?)"/';
-
-               $trtext = '';
-               $trtextmark = "\0";
-               $notrtext = array();
-               foreach ( $matches as $m ) {
-                       $mark = substr( $text, $mstart, $m[1] - $mstart );
-                       $mark = preg_replace_callback( $captionpattern,
-                                                                                  array( &$this, 'captionConvert' ),
-                                                                                  $mark );
-                       // Let's convert the trtext only once,
-                       // it would give us more performance improvement
-                       $notrtext[] = $mark;
-                       $trtext .= $m[0] . $trtextmark;
-                       $mstart = $m[1] + strlen( $m[0] );
-               }
-               $notrtext[] = '';
-               $trtext = $this->translate( $trtext, $toVariant );
-               $trtext = StringUtils::explode( $trtextmark, $trtext );
-               foreach ( $trtext as $t ) {
-                       $ret .= array_shift( $notrtext );
-                       $ret .= $t;
+                                       // Remove HTML tags to avoid disrupting the layout
+                                       $attr = preg_replace( '/<[^>]+>/', '', $attr );
+                                       if ( $attr !== $attrs[$attrName] ) {
+                                               $attrs[$attrName] = $attr;
+                                               $changed = true;
+                                       }
+                               }
+                               if ( $changed ) {
+                                       $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
+                                               $elementMatches[3];
+                               }
+                       }
+                       $literalBlob .= $element . "\000";
+               }
+
+               // Do the main translation batch
+               $translatedBlob = $this->translate( $sourceBlob, $toVariant );
+
+               // Put the output back together
+               $translatedIter = StringUtils::explode( "\000", $translatedBlob );
+               $literalIter = StringUtils::explode( "\000", $literalBlob );
+               $output = '';
+               while ( $translatedIter->valid() && $literalIter->valid() ) {
+                       $output .= $translatedIter->current();
+                       $output .= $literalIter->current();
+                       $translatedIter->next();
+                       $literalIter->next();
                }
+
                wfProfileOut( __METHOD__ );
-               return $ret;
+               return $output;
        }
 
        /**
@@ -383,7 +433,7 @@ class LanguageConverter {
         * @param $variant String: variant language code
         * @return String: translated text
         */
-       protected function translate( $text, $variant ) {
+       public function translate( $text, $variant ) {
                wfProfileIn( __METHOD__ );
                // If $text is empty or only includes spaces, do nothing
                // Otherwise translate it
@@ -423,7 +473,7 @@ class LanguageConverter {
         *
         * @param $text String: the text to be converted
         * @return Array: variant => converted text
-        * @deprecated Use autoConvertToAllVariants() instead
+        * @deprecated since 1.17 Use autoConvertToAllVariants() instead
         */
        public function convertLinkToAllVariants( $text ) {
                return $this->autoConvertToAllVariants( $text );
@@ -432,7 +482,7 @@ class LanguageConverter {
        /**
         * Apply manual conversion rules.
         *
-        * @param $convRule Object: Object of ConverterRule
+        * @param $convRule ConverterRule Object of ConverterRule
         */
        protected function applyManualConv( $convRule ) {
                // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
@@ -472,8 +522,8 @@ class LanguageConverter {
         * Auto convert a Title object to a readable string in the
         * preferred variant.
         *
-        *@param $title Object: a object of Title
-        *@return String: converted title text
+        * @param $title Title a object of Title
+        * @return String: converted title text
         */
        public function convertTitle( $title ) {
                $variant = $this->getPreferredVariant();
@@ -482,9 +532,9 @@ class LanguageConverter {
                        $text = '';
                } else {
                        // first let's check if a message has given us a converted name
-                       $nsConvKey = 'conversion-ns' . $index;
-                       if ( !wfEmptyMsg( $nsConvKey ) ) {
-                               $text = wfMsgForContentNoTrans( $nsConvKey );
+                       $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
+                       if ( $nsConvMsg->exists() ) {
+                               $text = $nsConvMsg->plain();
                        } else {
                                // the message does not exist, try retrieve it from the current
                                // variant's namespace names.
@@ -526,7 +576,9 @@ class LanguageConverter {
         */
        public function convertTo( $text, $variant ) {
                global $wgDisableLangConversion;
-               if ( $wgDisableLangConversion ) return $text;
+               if ( $wgDisableLangConversion ) {
+                       return $text;
+               }
                return $this->recursiveConvertTopLevel( $text, $variant );
        }
 
@@ -543,20 +595,22 @@ class LanguageConverter {
                $startPos = 0;
                $out = '';
                $length = strlen( $text );
+               $shouldConvert = !$this->guessVariant( $text, $variant );
+
                while ( $startPos < $length ) {
-                       $m = false;
                        $pos = strpos( $text, '-{', $startPos );
 
                        if ( $pos === false ) {
                                // No more markup, append final segment
-                               $out .= $this->autoConvert( substr( $text, $startPos ), $variant );
-                               $startPos = $length;
+                               $fragment = substr( $text, $startPos );
+                               $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
                                return $out;
                        }
 
                        // Markup found
                        // Append initial segment
-                       $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant );
+                       $fragment = substr( $text, $startPos, $pos - $startPos );
+                       $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
 
                        // Advance position
                        $startPos = $pos;
@@ -573,7 +627,10 @@ class LanguageConverter {
         *
         * @param $text String: text to be converted
         * @param $variant String: the target variant code
+        * @param $startPos int
         * @param $depth Integer: depth of recursion
+        *
+        * @throws MWException
         * @return String: converted text
         */
        protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
@@ -644,10 +701,9 @@ class LanguageConverter {
        }
 
        /**
-        * If a language supports multiple variants, it is
-        * possible that non-existing link in one variant
-        * actually exists in another variant. This function
-        * tries to find it. See e.g. LanguageZh.php
+        * If a language supports multiple variants, it is possible that
+        * non-existing link in one variant actually exists in another variant.
+        * This function tries to find it. See e.g. LanguageZh.php
         *
         * @param $link String: the name of the link
         * @param $nt Mixed: the title object of the link
@@ -718,10 +774,26 @@ class LanguageConverter {
 
        /**
         * Returns language specific hash options.
+        *
+        * @return string
         */
        public function getExtraHashOptions() {
                $variant = $this->getPreferredVariant();
-               return '!' . $variant ;
+               return '!' . $variant;
+       }
+
+       /**
+        * Guess if a text is written in a variant. This should be implemented in subclasses.
+        *
+        * @param string        $text the text to be checked
+        * @param string        $variant language code of the variant to be checked for
+        * @return bool true if $text appears to be written in $variant, false if not
+        *
+        * @author Nikola Smolenski <smolensk@eunet.rs>
+        * @since 1.19
+        */
+       public function guessVariant($text, $variant) {
+               return false;
        }
 
        /**
@@ -729,35 +801,39 @@ class LanguageConverter {
         * This method must be implemented in derived class.
         *
         * @private
+        * @throws MWException
         */
        function loadDefaultTables() {
                $name = get_class( $this );
-               wfDie( "Must implement loadDefaultTables() method in class $name" );
+               throw new MWException( "Must implement loadDefaultTables() method in class $name" );
        }
 
        /**
         * Load conversion tables either from the cache or the disk.
         * @private
+        * @param $fromCache Boolean: load from memcached? Defaults to true.
         */
-       function loadTables( $fromcache = true ) {
+       function loadTables( $fromCache = true ) {
+               global $wgLangConvMemc;
+
                if ( $this->mTablesLoaded ) {
                        return;
                }
-               global $wgMemc;
+
                wfProfileIn( __METHOD__ );
                $this->mTablesLoaded = true;
                $this->mTables = false;
-               if ( $fromcache ) {
+               if ( $fromCache ) {
                        wfProfileIn( __METHOD__ . '-cache' );
-                       $this->mTables = $wgMemc->get( $this->mCacheKey );
+                       $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
                        wfProfileOut( __METHOD__ . '-cache' );
                }
                if ( !$this->mTables
                         || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
                        wfProfileIn( __METHOD__ . '-recache' );
                        // not in cache, or we need a fresh reload.
-                       // we will first load the default tables
-                       // then update them using things in MediaWiki:Zhconversiontable/*
+                       // We will first load the default tables
+                       // then update them using things in MediaWiki:Conversiontable/*
                        $this->loadDefaultTables();
                        foreach ( $this->mVariants as $var ) {
                                $cached = $this->parseCachedTable( $var );
@@ -767,15 +843,14 @@ class LanguageConverter {
                        $this->postLoadTables();
                        $this->mTables[self::CACHE_VERSION_KEY] = true;
 
-                       $wgMemc->set( $this->mCacheKey, $this->mTables, 43200 );
+                       $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
                        wfProfileOut( __METHOD__ . '-recache' );
                }
                wfProfileOut( __METHOD__ );
        }
 
        /**
-        * Hook for post processig after conversion tables are loaded.
-        *
+        * Hook for post processing after conversion tables are loaded.
         */
        function postLoadTables() { }
 
@@ -792,7 +867,6 @@ class LanguageConverter {
                $this->loadTables( false );
        }
 
-
        /**
         * Parse the conversion table stored in the cache.
         *
@@ -803,18 +877,18 @@ class LanguageConverter {
         *                      ...
         *              }-
         *
-        *      To make the tables more manageable, subpages are allowed
-        *      and will be parsed recursively if $recursive == true.
+        * To make the tables more manageable, subpages are allowed
+        * and will be parsed recursively if $recursive == true.
+        *
+        * @param $code String: language code
+        * @param $subpage String: subpage name
+        * @param $recursive Boolean: parse subpages recursively? Defaults to true.
         *
+        * @return array
         */
        function parseCachedTable( $code, $subpage = '', $recursive = true ) {
-               global $wgMessageCache;
                static $parsed = array();
 
-               if ( !is_object( $wgMessageCache ) ) {
-                       return array();
-               }
-
                $key = 'Conversiontable/' . $code;
                if ( $subpage ) {
                        $key .= '/' . $subpage;
@@ -823,26 +897,28 @@ class LanguageConverter {
                        return array();
                }
 
-               if ( strpos( $code, '/' ) === false ) {
-                       $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
-                       if ( $txt === false ) {
-                               # FIXME: this method doesn't seem to be expecting
-                               # this possible outcome...
-                               $txt = '&lt;Conversiontable&gt;';
-                       }
+               $parsed[$key] = true;
+
+               if ( $subpage === '' ) {
+                       $txt = MessageCache::singleton()->get( 'conversiontable', true, $code );
                } else {
-                       $title = Title::makeTitleSafe( NS_MEDIAWIKI,
-                                                                                  "Conversiontable/$code" );
+                       $txt = false;
+                       $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
                        if ( $title && $title->exists() ) {
-                               $article = new Article( $title );
-                               $txt = $article->getContents();
-                       } else {
-                               $txt = '';
+                               $revision = Revision::newFromTitle( $title );
+                               if ( $revision ) {
+                                       $txt = $revision->getRawText();
+                               }
                        }
                }
 
+               # Nothing to parse if there's no text
+               if ( $txt === false || $txt === null || $txt === '' ) {
+                       return array();
+               }
+
                // get all subpage links of the form
-               // [[MediaWiki:conversiontable/zh-xx/...|...]]
+               // [[MediaWiki:Conversiontable/zh-xx/...|...]]
                $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
                        ':Conversiontable';
                $subs = StringUtils::explode( '[[', $txt );
@@ -881,14 +957,14 @@ class LanguageConverter {
                        $table = StringUtils::explode( ';', $stripped );
                        foreach ( $table as $t ) {
                                $m = explode( '=>', $t, 3 );
-                               if ( count( $m ) != 2 )
+                               if ( count( $m ) != 2 ) {
                                        continue;
+                               }
                                // trim any trailling comments starting with '//'
                                $tt = explode( '//', $m[1], 2 );
                                $ret[trim( $m[0] )] = trim( $tt[0] );
                        }
                }
-               $parsed[$key] = true;
 
                // recursively parse the subpages
                if ( $recursive ) {
@@ -911,7 +987,7 @@ class LanguageConverter {
         * various functions in the Parser.
         *
         * @param $text String: text to be tagged for no conversion
-        * @param $noParse Unused (?)
+        * @param $noParse Boolean: unused
         * @return String: the tagged text
         */
        public function markNoConversion( $text, $noParse = false ) {
@@ -927,6 +1003,10 @@ class LanguageConverter {
        /**
         * Convert the sorting key for category links. This should make different
         * keys that are variants of each other map to the same key.
+        *
+        * @param $key string
+        *
+        * @return string
         */
        function convertCategoryKey( $key ) {
                return $key;
@@ -934,11 +1014,22 @@ class LanguageConverter {
 
        /**
         * Hook to refresh the cache of conversion tables when
-        * MediaWiki:conversiontable* is updated.
+        * MediaWiki:Conversiontable* is updated.
         * @private
+        *
+        * @param $article Article object
+        * @param $user Object: User object for the current user
+        * @param $text String: article text (?)
+        * @param $summary String: edit summary of the edit
+        * @param $isMinor Boolean: was the edit marked as minor?
+        * @param $isWatch Boolean: did the user watch this page or not?
+        * @param $section
+        * @param $flags int Bitfield
+        * @param $revision Object: new Revision object or null
+        * @return Boolean: true
         */
-       function OnArticleSaveComplete( $article, $user, $text, $summary, $isminor,
-                       $iswatch, $section, $flags, $revision ) {
+       function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor,
+                       $isWatch, $section, $flags, $revision ) {
                $titleobj = $article->getTitle();
                if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
                        $title = $titleobj->getDBkey();
@@ -955,14 +1046,17 @@ class LanguageConverter {
 
        /**
         * Armour rendered math against conversion.
-        * Wrap math into rawoutput -{R| math }- syntax.
+        * Escape special chars in parsed math text. (in most cases are img elements)
+        *
+        * @param $text String: text to armour against conversion
+        * @return String: armoured text where { and } have been converted to
+        *                 &#123; and &#125;
         */
        public function armourMath( $text ) {
-               // we need to convert '-{' and '}-' to '-&#123;' and '&#125;-'
-               // to avoid a unwanted '}-' appeared after the math-image.
+               // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
+               // any unwanted markup appearing in the math image tag.
                $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
-               $ret = "-{R|$text}-";
-               return $ret;
+               return $text;
        }
 
        /**
@@ -1157,6 +1251,8 @@ class ConverterRule {
 
        /**
         * @private
+        *
+        * @return string
         */
        function getRulesDesc() {
                $codesep = $this->mConverter->mDescCodeSep;
@@ -1177,6 +1273,10 @@ class ConverterRule {
        /**
         * Parse rules conversion.
         * @private
+        *
+        * @param $variant
+        *
+        * @return string
         */
        function getRuleConvertedStr( $variant ) {
                $bidtable = $this->mBidtable;
@@ -1258,7 +1358,7 @@ class ConverterRule {
                                }
                                $vmarked[] = $v;
                        }
-                       /*for unidirectional array fill to convert tables */
+                       /* for unidirectional array fill to convert tables */
                        if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
                                && isset( $unidtable[$v] ) )
                        {
@@ -1273,14 +1373,13 @@ class ConverterRule {
 
        /**
         * Parse rules and flags.
-        * @public
+        * @param $variant String: variant language code
         */
-       function parse( $variant = NULL ) {
+       public function parse( $variant = null ) {
                if ( !$variant ) {
                        $variant = $this->mConverter->getPreferredVariant();
                }
 
-               $variants = $this->mConverter->mVariants;
                $this->parseFlags();
                $flags = $this->mFlags;
 
@@ -1291,19 +1390,21 @@ class ConverterRule {
                        if ( isset( $this->mVariantFlags[$variant] ) ) {
                                // then convert <text to convert> to current language
                                $this->mRules = $this->mConverter->autoConvert( $this->mRules,
-                                                                                                                               $variant );
+                                       $variant );
                        } else { // if current variant no in flags,
                                   // then we check its fallback variants.
                                $variantFallbacks =
                                        $this->mConverter->getVariantFallbacks( $variant );
-                               foreach ( $variantFallbacks as $variantFallback ) {
-                                       // if current variant's fallback exist in flags
-                                       if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
-                                               // then convert <text to convert> to fallback language
-                                               $this->mRules =
-                                                       $this->mConverter->autoConvert( $this->mRules,
-                                                                                                                       $variantFallback );
-                                               break;
+                               if( is_array( $variantFallbacks ) ) {
+                                       foreach ( $variantFallbacks as $variantFallback ) {
+                                               // if current variant's fallback exist in flags
+                                               if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
+                                                       // then convert <text to convert> to fallback language
+                                                       $this->mRules =
+                                                               $this->mConverter->autoConvert( $this->mRules,
+                                                                       $variantFallback );
+                                                       break;
+                                               }
                                        }
                                }
                        }
@@ -1379,58 +1480,58 @@ class ConverterRule {
        }
 
        /**
-        * @public
+        * @todo FIXME: code this function :)
         */
-       function hasRules() {
+       public function hasRules() {
                // TODO:
        }
 
        /**
         * Get display text on markup -{...}-
-        * @public
+        * @return string
         */
-       function getDisplay() {
+       public function getDisplay() {
                return $this->mRuleDisplay;
        }
 
        /**
         * Get converted title.
-        * @public
+        * @return string
         */
-       function getTitle() {
+       public function getTitle() {
                return $this->mRuleTitle;
        }
 
        /**
         * Return how deal with conversion rules.
-        * @public
+        * @return string
         */
-       function getRulesAction() {
+       public function getRulesAction() {
                return $this->mRulesAction;
        }
 
        /**
-        * Get conversion table. ( bidirectional and unidirectional
-        * conversion table )
-        * @public
+        * Get conversion table. (bidirectional and unidirectional
+        * conversion table)
+        * @return array
         */
-       function getConvTable() {
+       public function getConvTable() {
                return $this->mConvTable;
        }
 
        /**
         * Get conversion rules string.
-        * @public
+        * @return string
         */
-       function getRules() {
+       public function getRules() {
                return $this->mRules;
        }
 
        /**
         * Get conversion flags.
-        * @public
+        * @return array
         */
-       function getFlags() {
+       public function getFlags() {
                return $this->mFlags;
        }
 }