From: Brion Vibber Date: Wed, 19 Jul 2006 20:13:39 +0000 (+0000) Subject: Revert 15733 and 15719 for the moment; I see some eval'd string code and other thinsg... X-Git-Tag: 1.31.0-rc.0~56180 X-Git-Url: http://git.cyclocoop.org/%24image?a=commitdiff_plain;h=ba78b052b03dfe8fb23aeae9d9d423f078f59d6c;p=lhc%2Fweb%2Fwiklou.git Revert 15733 and 15719 for the moment; I see some eval'd string code and other thinsg which make me nervous and I don't think anybody's reviewed this --- diff --git a/includes/Parser.php b/includes/Parser.php index 74b926f078..1079f5b822 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -1408,6 +1408,7 @@ class Parser $selflink = $this->mTitle->getPrefixedText(); wfProfileOut( $fname.'-setup' ); + $checkVariantLink = sizeof($wgContLang->getVariants())>1; $useSubpages = $this->areSubpagesAllowed(); # Loop for each link @@ -1491,6 +1492,13 @@ class Parser continue; } + #check other language variants of the link + #if the article does not exist + if( $checkVariantLink + && $nt->getArticleID() == 0 ) { + $wgContLang->findVariantLink($link, $nt); + } + $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); @@ -3822,7 +3830,6 @@ class Parser function replaceLinkHolders( &$text, $options = 0 ) { global $wgUser; global $wgOutputReplace; - global $wgContLang, $wgLanguageCode; $fname = 'Parser::replaceLinkHolders'; wfProfileIn( $fname ); @@ -3913,86 +3920,6 @@ class Parser } wfProfileOut( $fname.'-check' ); - # Do a second query for links in different language variants (if needed) - if(sizeof($wgContLang->getVariants())>1){ - $linkBatch = new LinkBatch(); - - // Add variants of links to link batch - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) - continue; - - $pdbk = $title->getPrefixedDBkey(); - - // generate all variants of the link title text - $allTextVariants = $wgContLang->convertLinkToAllVariants($title->getText()); - - // if link was not found (in first query), add all variants to query - if ( !isset($colours[$pdbk]) ){ - foreach($allTextVariants as $textVariant){ - $linkBatch->addObj( Title::makeTitleSafe( $ns, $textVariant ) ); - } - } - // if link was found add only variant with fixed title - else if($colours[$pdbk] == 1){ - $fixedCode = $wgLanguageCode.'-fixed'; - if( isset($allTextVariants[$fixedCode]) ){ - $linkBatch->addObj( Title::makeTitleSafe( $ns, $allTextVariants[$fixedCode] ) ); - } - } - } - - # fetch link variants into cache - $linkBatch->execute(); - - # check if links are found in some of the variants - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) - continue; - - $pdbk = $title->getPrefixedDBkey(); - $allTextVariants = $wgContLang->convertLinkToAllVariants($title->getText()); - - // If link has already been found, check only fixed variant - if(isset($colours[$pdbk]) && $colours[$pdbk] == 1){ - $fixedCode = $wgLanguageCode.'-fixed'; - - if( isset($allTextVariants[$fixedCode]) ){ - $allTextVariants = array($fixedCode => $allTextVariants[$fixedCode]); - } - } - - // process the link variants - if ( !isset($colours[$pdbk]) || $colours[$pdbk] == 1 ){ - foreach($allTextVariants as $variantCode => $textVariant){ - $variantTitle=Title::makeTitleSafe( $ns, $textVariant ); - if(is_null($variantTitle)) continue; - - $varpdbk = $variantTitle->getPrefixedDBkey(); - - if($linkCache->getGoodLinkID( $varpdbk ) != 0){ - - // found link in some of the variants, replace the link holder data - $this->mLinkHolders['titles'][$key] = $variantTitle; - $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); - - if($wgContLang->getPreferredVariant() == $wgLanguageCode) - $this->mLinkHolders['texts'][$key] = $this->mLinkHolders['texts'][$key]; - else - $this->mLinkHolders['texts'][$key] = $variantTitle->getText(); - - $pdbks[$key] = $varpdbk; - $colours[$varpdbk] = 1; - - break; - } - } - } - } - } - # Construct search and replace arrays wfProfileIn( $fname.'-construct' ); $wgOutputReplace = array(); diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php index 6af1e41596..c3b38519a7 100644 --- a/includes/SearchEngine.php +++ b/includes/SearchEngine.php @@ -51,7 +51,6 @@ class SearchEngine { * @private */ function getNearMatch( $term ) { - global $wgContLang; # Exact match? No need to look further. $title = Title::newFromText( $term ); if (is_null($title)) @@ -63,27 +62,33 @@ class SearchEngine { # Now try all lower case (i.e. first letter capitalized) # - $title = Title::newFromText( $wgContLang->lc( $term ) ); + $title = Title::newFromText( strtolower( $term ) ); if ( $title->exists() ) { return $title; } # Now try capitalized string # - $title = Title::newFromText( $wgContLang->ucwords( $term ) ); + $title = Title::newFromText( ucwords( strtolower( $term ) ) ); if ( $title->exists() ) { return $title; } # Now try all upper case # - $title = Title::newFromText( $wgContLang->uc( $term ) ); + $title = Title::newFromText( strtoupper( $term ) ); if ( $title->exists() ) { return $title; } # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc - $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); + $title = Title::newFromText( preg_replace_callback( + '/\b([\w\x80-\xff]+)\b/', + create_function( '$matches', ' + global $wgContLang; + return $wgContLang->ucfirst($matches[1]); + ' ), + $term ) ); if ( $title->exists() ) { return $title; } diff --git a/includes/SpecialSearch.php b/includes/SpecialSearch.php index a8aadfa0f0..c4669854d6 100644 --- a/includes/SpecialSearch.php +++ b/includes/SpecialSearch.php @@ -77,7 +77,6 @@ class SpecialSearch { function goResult( $term ) { global $wgOut; global $wgGoToEdit; - global $wgContLang; $this->setupPage( $term ); @@ -97,20 +96,6 @@ class SpecialSearch { return; } - # if language supports variants, search in all variants - if(sizeof($wgContLang->getVariants())>1){ - $allTermVariants = $wgContLang->convertLinkToAllVariants($term); - - foreach($allTermVariants as $termVariant){ - $t = SearchEngine::getNearMatch( $termVariant ); - if( !is_null( $t ) ) { - $wgOut->redirect( $t->getFullURL() ); - wfProfileOut( $fname ); - return; - } - } - } - # No match, generate an edit URL $t = Title::newFromText( $term ); if( is_null( $t ) ) { diff --git a/languages/Language.php b/languages/Language.php index f001ee6f5b..ba4e713fb1 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -315,7 +315,6 @@ class fakeConverter { function getParsedTitle() {return '';} function markNoConversion($text) {return $text;} function convertCategoryKey( $key ) {return $key; } - function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); } } @@ -755,21 +754,6 @@ class Language { return strtolower( $str ); } - function ucwords($str) { - return ucwords( strtolower( $str ) ); - } - - # capitalize words at word breaks - function ucwordbreaks($str){ - return preg_replace_callback( - '/\b([\w\x80-\xff]+)\b/', - create_function( '$matches', ' - global $wgContLang; - return $wgContLang->ucfirst($matches[1]); - ' ), - $str ); - } - function checkTitleEncoding( $s ) { global $wgInputEncoding; @@ -1169,17 +1153,6 @@ class Language { $this->mConverter->findVariantLink($link, $nt); } - /** - * If a language supports multiple variants, converts text - * into an array of all possible variants of the text: - * 'variant' => text in that variant - */ - - function convertLinkToAllVariants($text){ - return $this->mConverter->convertLinkToAllVariants($text); - } - - /** * returns language specific options used by User::getPageRenderHash() * for example, the preferred language variant diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index afbde79032..c194acee62 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -36,7 +36,6 @@ class LanguageConverter { $markup=array(), $flags = array()) { global $wgDBname; - global $wgLegalTitleChars; $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; $this->mVariants = $variants; @@ -47,11 +46,6 @@ class LanguageConverter { $this->mMarkup = array_merge($m, $markup); $f = array('A'=>'A', 'T'=>'T'); $this->mFlags = array_merge($f, $flags); - - // enable escape characters -{ }- in titles - if(!preg_match('/\{/',$wgLegalTitleChars)) $wgLegalTitleChars.='\{'; - if(!preg_match('/\}/',$wgLegalTitleChars)) $wgLegalTitleChars.='\}'; - } /** @@ -120,34 +114,6 @@ class LanguageConverter { } } - /** - * This function should be called on bare text - * It translates text into variant, specials: - * - ommiting roman numbers - */ - function translateText($text, $toVariant){ - $breaks = '[^\w\x80-\xff]'; - - // regexp for roman numbers - $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'; - - $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/'; - - $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); - - - $m = array_shift($matches); - $ret = strtr($m[0], $this->mTables[$toVariant]); - $mstart = $m[1]+strlen($m[0]); - foreach($matches as $m) { - $ret .= substr($text, $mstart, $m[1]-$mstart); - $ret .= strtr($m[0], $this->mTables[$toVariant]); - $mstart = $m[1] + strlen($m[0]); - } - - return $ret; - } - /** * dictionary-based conversion * @@ -181,21 +147,19 @@ class LanguageConverter { $marker = ""; // this one is needed when the text is inside an html markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>]+=\"[^(>=)]*$|^[^(<>=\")]*\"[^>]*>'; - // disable convert to variants between tags - $codefix = '.+?<\/code>|'; - - $reg = '/'.$codefix.'<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; + $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/'; $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); + $m = array_shift($matches); - $ret = $this->translateText($m[0],$toVariant); + $ret = strtr($m[0], $this->mTables[$toVariant]); $mstart = $m[1]+strlen($m[0]); foreach($matches as $m) { $ret .= substr($text, $mstart, $m[1]-$mstart); - $ret .= $this->translateText($m[0],$toVariant); + $ret .= strtr($m[0], $this->mTables[$toVariant]); $mstart = $m[1] + strlen($m[0]); } wfProfileOut( $fname ); @@ -207,9 +171,9 @@ class LanguageConverter { * * @param string $text the text to be converted * @return array of string - * @public + * @private */ - function autoConvertToAllVariants($text, $includeFixedVariant=true) { + function autoConvertToAllVariants($text) { $fname="LanguageConverter::autoConvertToAllVariants"; wfProfileIn( $fname ); if( !$this->mTablesLoaded ) @@ -217,51 +181,12 @@ class LanguageConverter { $ret = array(); foreach($this->mVariants as $variant) { - $ret[$variant] = $this->translateText($text,$variant); + $ret[$variant] = strtr($text, $this->mTables[$variant]); } - if($includeFixedVariant) - $ret[$this->mMainLanguageCode.'-fixed'] = $this->mMarkup['begin'].$text.$this->mMarkup['end']; - wfProfileOut( $fname ); return $ret; } - /** - * convert link text to all supported variants - * - * @param string $text the text to be converted - * @return array of string - * @public - */ - function convertLinkToAllVariants($text,$includeFixedVariant=true) { - if( !$this->mTablesLoaded ) - $this->loadTables(); - - $ret = array(); - $tarray = explode($this->mMarkup['begin'], $text); - $tfirst = array_shift($tarray); - - foreach($this->mVariants as $variant) - $ret[$variant] = $this->translateText($tfirst,$variant); - - foreach($tarray as $txt) { - $marked = explode($this->mMarkup['end'], $txt, 2); - - foreach($this->mVariants as $variant){ - $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end']; - if(array_key_exists(1, $marked)) - $ret[$variant] .= $this->translateText($marked[1],$variant); - } - - } - - if($includeFixedVariant) - $ret[$this->mMainLanguageCode.'-fixed'] = $this->mMarkup['begin'].$text.$this->mMarkup['end']; - - return $ret; - } - - /** * Convert text using a parser object for context */ @@ -269,7 +194,7 @@ class LanguageConverter { global $wgDisableLangConversion; /* don't do anything if this is the conversion table */ if ( $parser->mTitle->getNamespace() == NS_MEDIAWIKI && - strpos($parser->mTitle->getText(), "Conversiontable") !== false ) + strpos($parser->mTitle->getText, "Conversiontable") !== false ) { return $text; } @@ -327,7 +252,7 @@ class LanguageConverter { return $text; } else { - $this->mTitleDisplay = $this->convert($text); + $this->mTitleDisplay = $this->autoConvert($text); return $this->mTitleDisplay; } } @@ -364,7 +289,7 @@ class LanguageConverter { else $rules = $marked[0]; - //FIXME: may cause trouble here... +#FIXME: may cause trouble here... //strip   since it interferes with the parsing, plus, //all spaces should be stripped in this tag anyway. $rules = str_replace(' ', '', $rules); @@ -456,16 +381,23 @@ class LanguageConverter { * @access public */ function findVariantLink( &$link, &$nt ) { + static $count=0; //used to limit this operation + static $cache=array(); global $wgDisableLangConversion; $pref = $this->getPreferredVariant(); $ns=0; if(is_object($nt)) $ns = $nt->getNamespace(); - + if( $count > 50 && $ns != NS_CATEGORY ) + return; + $count++; $variants = $this->autoConvertToAllVariants($link); if($variants == false) //give up return; foreach( $variants as $v ) { + if(isset($cache[$v])) + continue; + $cache[$v] = 1; $varnt = Title::newFromText( $v, $ns ); if( $varnt && $varnt->getArticleID() > 0 ) { $nt = $varnt; diff --git a/languages/LanguageSr.php b/languages/LanguageSr.php index d9063e20ca..2eede4b632 100644 --- a/languages/LanguageSr.php +++ b/languages/LanguageSr.php @@ -196,17 +196,14 @@ class SrConverter extends LanguageConverter { class LanguageSr extends LanguageSr_ec { function __construct() { global $wgHooks; - - // these variants are currently UNUSED: - // 'sr-jc', 'sr-jl' - $variants = array('sr', 'sr-ec', 'sr-el'); + $variants = array('sr', 'sr-ec', 'sr-jc', 'sr-el', 'sr-jl'); $variantfallbacks = array( 'sr' => 'sr-ec', - 'sr-ec' => 'sr-ec', - 'sr-el' => 'sr-el', - ); - - + 'sr-ec' => 'sr-jc', + 'sr-jc' => 'sr-ec', + 'sr-el' => 'sr-jl', + 'sr-jl' => 'sr-el' + ); $marker = array();//don't mess with these, leave them as they are $flags = array( 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S', diff --git a/languages/LanguageUtf8.php b/languages/LanguageUtf8.php index c75dd7e6ee..d738624b77 100644 --- a/languages/LanguageUtf8.php +++ b/languages/LanguageUtf8.php @@ -94,51 +94,6 @@ class LanguageUtf8 extends Language { return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str ); } - function ucwords($str) { - global $wikiUpperChars; - - if ( LanguageUtf8::isMultibyte( $str ) ) { - $str = LanguageUtf8::lc($str); - - if ( function_exists( 'mb_strtoupper' ) ) - $replaceCall = "mb_strtoupper(\"\$0\")"; - else - $replaceCall = "strtr( \"\$0\" , \$wikiUpperChars )"; - - return preg_replace( - "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - $replaceCall, - $str - ); - } - else - return ucwords( strtolower( $str ) ); - } - - function ucwordbreaks($str){ - global $wikiUpperChars; - - if (LanguageUtf8::isMultibyte( $str ) ) { - $str = LanguageUtf8::lc($str); - - if ( function_exists( 'mb_strtoupper' ) ) - $replaceCall = "mb_strtoupper(\"\$0\")"; - else - $replaceCall = "strtr( \"\$0\" , \$wikiUpperChars )"; - - // since \b doesn't work for UTF-8, we explicitely define word break chars - $breaks= "[ \-\(\)\}\{\.,\?!]"; - - return preg_replace( - "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - $replaceCall, - $str - ); - } - else - return Language::ucwordbreaks($str); - } - function isMultibyte( $str ) { return (bool)preg_match( '/^[\x80-\xff]/', $str ); }