Revert 15733 and 15719 for the moment; I see some eval'd string code and other thinsg...
authorBrion Vibber <brion@users.mediawiki.org>
Wed, 19 Jul 2006 20:13:39 +0000 (20:13 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Wed, 19 Jul 2006 20:13:39 +0000 (20:13 +0000)
includes/Parser.php
includes/SearchEngine.php
includes/SpecialSearch.php
languages/Language.php
languages/LanguageConverter.php
languages/LanguageSr.php
languages/LanguageUtf8.php

index 74b926f..1079f5b 100644 (file)
@@ -1408,6 +1408,7 @@ class Parser
                $selflink = $this->mTitle->getPrefixedText();
                wfProfileOut( $fname.'-setup' );
 
+               $checkVariantLink = sizeof($wgContLang->getVariants())>1;
                $useSubpages = $this->areSubpagesAllowed();
 
                # Loop for each link
@@ -1491,6 +1492,13 @@ class Parser
                                continue;
                        }
 
+                       #check other language variants of the link
+                       #if the article does not exist
+                       if( $checkVariantLink
+                           && $nt->getArticleID() == 0 ) {
+                               $wgContLang->findVariantLink($link, $nt);
+                       }
+
                        $ns = $nt->getNamespace();
                        $iw = $nt->getInterWiki();
 
@@ -3822,7 +3830,6 @@ class Parser
        function replaceLinkHolders( &$text, $options = 0 ) {
                global $wgUser;
                global $wgOutputReplace;
-               global $wgContLang, $wgLanguageCode;
 
                $fname = 'Parser::replaceLinkHolders';
                wfProfileIn( $fname );
@@ -3913,86 +3920,6 @@ class Parser
                        }
                        wfProfileOut( $fname.'-check' );
 
-                       # Do a second query for links in different language variants (if needed)
-                       if(sizeof($wgContLang->getVariants())>1){
-                               $linkBatch = new LinkBatch(); 
-
-                               // Add variants of links to link batch
-                               foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
-                                       $title = $this->mLinkHolders['titles'][$key];
-                                       if ( is_null( $title ) )
-                                               continue;
-
-                                       $pdbk = $title->getPrefixedDBkey();
-
-                                       // generate all variants of the link title text
-                                       $allTextVariants = $wgContLang->convertLinkToAllVariants($title->getText());
-
-                                       // if link was not found (in first query), add all variants to query
-                                       if ( !isset($colours[$pdbk]) ){
-                                               foreach($allTextVariants as $textVariant){
-                                                       $linkBatch->addObj( Title::makeTitleSafe( $ns, $textVariant ) );
-                                               }
-                                       }
-                                       // if link was found add only variant with fixed title
-                                       else if($colours[$pdbk] == 1){
-                                               $fixedCode = $wgLanguageCode.'-fixed';
-                                               if( isset($allTextVariants[$fixedCode]) ){
-                                                       $linkBatch->addObj( Title::makeTitleSafe( $ns, $allTextVariants[$fixedCode]  ) );
-                                               }
-                                       }
-                               }
-
-                               # fetch link variants into cache
-                               $linkBatch->execute();
-
-                               # check if links are found in some of the variants
-                               foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
-                                       $title = $this->mLinkHolders['titles'][$key];
-                                       if ( is_null( $title ) ) 
-                                               continue;
-
-                                       $pdbk = $title->getPrefixedDBkey();                                     
-                                       $allTextVariants = $wgContLang->convertLinkToAllVariants($title->getText());
-
-                                       // If link has already been found, check only fixed variant
-                                       if(isset($colours[$pdbk]) && $colours[$pdbk] == 1){
-                                               $fixedCode = $wgLanguageCode.'-fixed';
-
-                                               if( isset($allTextVariants[$fixedCode]) ){
-                                                       $allTextVariants = array($fixedCode => $allTextVariants[$fixedCode]);
-                                               }
-                                       }
-
-                                       // process the link variants
-                                       if ( !isset($colours[$pdbk]) || $colours[$pdbk] == 1 ){
-                                               foreach($allTextVariants as $variantCode => $textVariant){
-                                                       $variantTitle=Title::makeTitleSafe( $ns, $textVariant );
-                                                       if(is_null($variantTitle)) continue;
-
-                                                       $varpdbk = $variantTitle->getPrefixedDBkey();
-
-                                                       if($linkCache->getGoodLinkID( $varpdbk ) != 0){
-
-                                                               // found link in some of the variants, replace the link holder data
-                                                               $this->mLinkHolders['titles'][$key] = $variantTitle;
-                                                               $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey();
-
-                                                               if($wgContLang->getPreferredVariant() == $wgLanguageCode)
-                                                                       $this->mLinkHolders['texts'][$key] = $this->mLinkHolders['texts'][$key];
-                                                               else
-                                                                       $this->mLinkHolders['texts'][$key] = $variantTitle->getText();
-
-                                                               $pdbks[$key] = $varpdbk;
-                                                               $colours[$varpdbk] = 1;
-
-                                                               break;
-                                                       }
-                                               }       
-                                       }
-                               }
-                       }
-
                        # Construct search and replace arrays
                        wfProfileIn( $fname.'-construct' );
                        $wgOutputReplace = array();
index 6af1e41..c3b3851 100644 (file)
@@ -51,7 +51,6 @@ class SearchEngine {
         * @private
         */
        function getNearMatch( $term ) {
-               global $wgContLang;
                # Exact match? No need to look further.
                $title = Title::newFromText( $term );
                if (is_null($title))
@@ -63,27 +62,33 @@ class SearchEngine {
 
                # Now try all lower case (i.e. first letter capitalized)
                #
-               $title = Title::newFromText( $wgContLang->lc( $term ) );
+               $title = Title::newFromText( strtolower( $term ) );
                if ( $title->exists() ) {
                        return $title;
                }
 
                # Now try capitalized string
                #
-               $title = Title::newFromText( $wgContLang->ucwords( $term ) );
+               $title = Title::newFromText( ucwords( strtolower( $term ) ) );
                if ( $title->exists() ) {
                        return $title;
                }
 
                # Now try all upper case
                #
-               $title = Title::newFromText( $wgContLang->uc( $term ) );
+               $title = Title::newFromText( strtoupper( $term ) );
                if ( $title->exists() ) {
                        return $title;
                }
 
                # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
-               $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
+               $title = Title::newFromText( preg_replace_callback(
+                       '/\b([\w\x80-\xff]+)\b/',
+                       create_function( '$matches', '
+                               global $wgContLang;
+                               return $wgContLang->ucfirst($matches[1]);
+                               ' ),
+                       $term ) );
                if ( $title->exists() ) {
                        return $title;
                }
index a8aadfa..c466985 100644 (file)
@@ -77,7 +77,6 @@ class SpecialSearch {
        function goResult( $term ) {
                global $wgOut;
                global $wgGoToEdit;
-               global $wgContLang;
 
                $this->setupPage( $term );
 
@@ -97,20 +96,6 @@ class SpecialSearch {
                        return;
                }
 
-               # if language supports variants, search in all variants
-               if(sizeof($wgContLang->getVariants())>1){
-                       $allTermVariants = $wgContLang->convertLinkToAllVariants($term);
-
-                       foreach($allTermVariants as $termVariant){
-                               $t = SearchEngine::getNearMatch( $termVariant );
-                               if( !is_null( $t ) ) {
-                                       $wgOut->redirect( $t->getFullURL() );
-                                       wfProfileOut( $fname );
-                                       return;
-                               }
-                       }
-               }
-
                # No match, generate an edit URL
                $t = Title::newFromText( $term );
                if( is_null( $t ) ) {
index f001ee6..ba4e713 100644 (file)
@@ -315,7 +315,6 @@ class fakeConverter {
        function getParsedTitle() {return '';}
        function markNoConversion($text) {return $text;}
        function convertCategoryKey( $key ) {return $key; }
-       function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); }
 
 }
 
@@ -755,21 +754,6 @@ class Language {
                return strtolower( $str );
        }
 
-       function ucwords($str) {
-               return ucwords( strtolower( $str ) );
-       }
-
-  # capitalize words at word breaks
-       function ucwordbreaks($str){
-               return preg_replace_callback(
-                       '/\b([\w\x80-\xff]+)\b/',
-                       create_function( '$matches', '
-                               global $wgContLang;
-                               return $wgContLang->ucfirst($matches[1]);
-                               ' ),
-                       $str );
-       }
-
        function checkTitleEncoding( $s ) {
                global $wgInputEncoding;
 
@@ -1169,17 +1153,6 @@ class Language {
                $this->mConverter->findVariantLink($link, $nt);
        }
 
-       /**
-        * If a language supports multiple variants, converts text
-        * into an array of all possible variants of the text:
-        *  'variant' => text in that variant
-        */
-
-       function convertLinkToAllVariants($text){
-               return $this->mConverter->convertLinkToAllVariants($text);
-       }
-
-
        /**
         * returns language specific options used by User::getPageRenderHash()
         * for example, the preferred language variant
index afbde79..c194ace 100644 (file)
@@ -36,7 +36,6 @@ class LanguageConverter {
                                                                $markup=array(),
                                                                $flags = array()) {
                global $wgDBname;
-               global $wgLegalTitleChars;
                $this->mLangObj = $langobj;
                $this->mMainLanguageCode = $maincode;
                $this->mVariants = $variants;
@@ -47,11 +46,6 @@ class LanguageConverter {
                $this->mMarkup = array_merge($m, $markup);
                $f = array('A'=>'A', 'T'=>'T');
                $this->mFlags = array_merge($f, $flags);
-
-               // enable escape characters -{ }- in titles
-               if(!preg_match('/\{/',$wgLegalTitleChars)) $wgLegalTitleChars.='\{';
-               if(!preg_match('/\}/',$wgLegalTitleChars)) $wgLegalTitleChars.='\}';
-
        }
 
        /**
@@ -120,34 +114,6 @@ class LanguageConverter {
                }
        }
 
-       /**
-        *  This function should be called on bare text
-        *  It translates text into variant, specials:
-        *    - ommiting roman numbers
-        */
-       function translateText($text, $toVariant){
-               $breaks = '[^\w\x80-\xff]';
-
-               // regexp for roman numbers
-               $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})';
-
-               $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/';
-
-               $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
-
-               
-               $m = array_shift($matches);
-               $ret = strtr($m[0], $this->mTables[$toVariant]);
-               $mstart = $m[1]+strlen($m[0]);
-               foreach($matches as $m) {
-                       $ret .= substr($text, $mstart, $m[1]-$mstart);
-                       $ret .= strtr($m[0], $this->mTables[$toVariant]);
-                       $mstart = $m[1] + strlen($m[0]);
-               }
-
-               return $ret;
-       }
-
        /**
      * dictionary-based conversion
      *
@@ -181,21 +147,19 @@ class LanguageConverter {
                        $marker = "";
 
                // this one is needed when the text is inside an html markup
-               $htmlfix = '|<[^>]+$|^[^<>]*>';
+               $htmlfix = '|<[^>]+=\"[^(>=)]*$|^[^(<>=\")]*\"[^>]*>';
 
-               // disable convert to variants between <code></code> tags
-               $codefix = '<code>.+?<\/code>|';
-
-               $reg = '/'.$codefix.'<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
+               $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/';
        
                $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
 
+
                $m = array_shift($matches);
-               $ret = $this->translateText($m[0],$toVariant);
+               $ret = strtr($m[0], $this->mTables[$toVariant]);
                $mstart = $m[1]+strlen($m[0]);
                foreach($matches as $m) {
                        $ret .= substr($text, $mstart, $m[1]-$mstart);
-                       $ret .= $this->translateText($m[0],$toVariant);
+                       $ret .= strtr($m[0], $this->mTables[$toVariant]);
                        $mstart = $m[1] + strlen($m[0]);
                }
                wfProfileOut( $fname );
@@ -207,9 +171,9 @@ class LanguageConverter {
      *
      * @param string $text the text to be converted
      * @return array of string
-     * @public
+     * @private
      */
-       function autoConvertToAllVariants($text, $includeFixedVariant=true) {
+       function autoConvertToAllVariants($text) {
                $fname="LanguageConverter::autoConvertToAllVariants";
                wfProfileIn( $fname );
                if( !$this->mTablesLoaded )
@@ -217,51 +181,12 @@ class LanguageConverter {
 
                $ret = array();
                foreach($this->mVariants as $variant) {
-                       $ret[$variant] = $this->translateText($text,$variant);
+                       $ret[$variant] = strtr($text, $this->mTables[$variant]);
                }
-               if($includeFixedVariant)
-                       $ret[$this->mMainLanguageCode.'-fixed'] = $this->mMarkup['begin'].$text.$this->mMarkup['end'];
-
                wfProfileOut( $fname );
                return $ret;
        }
 
-       /**
-     * convert link text to all supported variants
-     *
-     * @param string $text the text to be converted
-     * @return array of string
-     * @public
-     */
-       function convertLinkToAllVariants($text,$includeFixedVariant=true) {
-               if( !$this->mTablesLoaded )
-                       $this->loadTables();
-
-               $ret = array();
-               $tarray = explode($this->mMarkup['begin'], $text);
-               $tfirst = array_shift($tarray);
-
-               foreach($this->mVariants as $variant)
-                       $ret[$variant] = $this->translateText($tfirst,$variant);
-
-               foreach($tarray as $txt) {
-                       $marked = explode($this->mMarkup['end'], $txt, 2);
-
-                       foreach($this->mVariants as $variant){
-                               $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end'];
-                               if(array_key_exists(1, $marked))
-                                       $ret[$variant] .= $this->translateText($marked[1],$variant);
-                       }
-                       
-               }
-
-               if($includeFixedVariant)
-                       $ret[$this->mMainLanguageCode.'-fixed'] = $this->mMarkup['begin'].$text.$this->mMarkup['end'];
-
-               return $ret;
-       }
-
-
        /**
         * Convert text using a parser object for context
         */
@@ -269,7 +194,7 @@ class LanguageConverter {
                global $wgDisableLangConversion;
                /* don't do anything if this is the conversion table */
                if ( $parser->mTitle->getNamespace() == NS_MEDIAWIKI &&
-                                strpos($parser->mTitle->getText(), "Conversiontable") !== false ) 
+                       strpos($parser->mTitle->getText, "Conversiontable") !== false ) 
                {
                        return $text;
                }
@@ -327,7 +252,7 @@ class LanguageConverter {
                                return $text;
                        }
                        else {
-                               $this->mTitleDisplay = $this->convert($text);
+                               $this->mTitleDisplay = $this->autoConvert($text);
                                return $this->mTitleDisplay;
                        }
                }
@@ -364,7 +289,7 @@ class LanguageConverter {
                        else
                                $rules = $marked[0];
 
-                       //FIXME: may cause trouble here...
+#FIXME: may cause trouble here...
                        //strip &nbsp; since it interferes with the parsing, plus,
                        //all spaces should be stripped in this tag anyway.
                        $rules = str_replace('&nbsp;', '', $rules);
@@ -456,16 +381,23 @@ class LanguageConverter {
      * @access public
         */
        function findVariantLink( &$link, &$nt ) {
+               static $count=0; //used to limit this operation
+               static $cache=array();
                global $wgDisableLangConversion;
                $pref = $this->getPreferredVariant();
                $ns=0;
                if(is_object($nt))
                        $ns = $nt->getNamespace();
-
+               if( $count > 50 && $ns != NS_CATEGORY )
+                       return;
+               $count++;
                $variants = $this->autoConvertToAllVariants($link);
                if($variants == false) //give up
                        return;
                foreach( $variants as $v ) {
+                       if(isset($cache[$v]))
+                               continue;
+                       $cache[$v] = 1;
                        $varnt = Title::newFromText( $v, $ns );
                        if( $varnt && $varnt->getArticleID() > 0 ) {
                                $nt = $varnt;
index d9063e2..2eede4b 100644 (file)
@@ -196,17 +196,14 @@ class SrConverter extends LanguageConverter {
 class LanguageSr extends LanguageSr_ec {
        function __construct() {
                global $wgHooks;
-
-               // these variants are currently UNUSED:
-               // 'sr-jc', 'sr-jl' 
-               $variants = array('sr', 'sr-ec', 'sr-el');
+               $variants = array('sr', 'sr-ec', 'sr-jc', 'sr-el', 'sr-jl');
                $variantfallbacks = array(
                        'sr'    => 'sr-ec',
-                       'sr-ec' => 'sr-ec',
-                       'sr-el' => 'sr-el',
-                       ); 
-
-
+                       'sr-ec' => 'sr-jc',
+                       'sr-jc' => 'sr-ec',
+                       'sr-el' => 'sr-jl',
+                       'sr-jl' => 'sr-el'
+               );
                $marker = array();//don't mess with these, leave them as they are
                $flags = array(
                        'S' => 'S', 'писмо' => 'S', 'pismo' => 'S',
index c75dd7e..d738624 100644 (file)
@@ -94,51 +94,6 @@ class LanguageUtf8 extends Language {
                                return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
        }
 
-       function ucwords($str) {
-               global $wikiUpperChars;
-
-               if ( LanguageUtf8::isMultibyte( $str ) ) {
-                       $str = LanguageUtf8::lc($str);
-
-                       if ( function_exists( 'mb_strtoupper' ) )
-                               $replaceCall = "mb_strtoupper(\"\$0\")";
-                       else 
-                               $replaceCall = "strtr( \"\$0\" , \$wikiUpperChars )";
-
-                       return preg_replace(
-                                       "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                                       $replaceCall,
-                                       $str
-                               );
-               }
-               else
-                       return ucwords( strtolower( $str ) );
-       }       
-
-       function ucwordbreaks($str){
-               global $wikiUpperChars;
-
-               if (LanguageUtf8::isMultibyte( $str ) ) {
-                       $str = LanguageUtf8::lc($str);
-
-                       if ( function_exists( 'mb_strtoupper' ) )
-                               $replaceCall = "mb_strtoupper(\"\$0\")";
-                       else 
-                               $replaceCall = "strtr( \"\$0\" , \$wikiUpperChars )";
-
-                       // since \b doesn't work for UTF-8, we explicitely define word break chars
-                       $breaks= "[ \-\(\)\}\{\.,\?!]";
-
-                       return preg_replace(
-                                       "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                                       $replaceCall,
-                                       $str
-                               );
-               }
-               else
-                       return Language::ucwordbreaks($str);
-       }
-
        function isMultibyte( $str ) {
                return (bool)preg_match( '/^[\x80-\xff]/', $str );
        }