From 5b2a644f2e3ff010d06789167f43d9c37d940269 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Robert=20Stojni=C4=87?= Date: Mon, 1 Jan 2007 17:20:19 +0000 Subject: [PATCH] More lang variant stuff: * More parser tests, also added 'variant' option for testing * Add global $wgDefaultLanguageVariant, can be used to set a default fallback variant * Restructured some of the parser code in LanguageConverter, fix flag processing and add some comments to despookify the old zh code :) * Cleanup of some old hacks in LanguageSr and Kk (do not use the global title object, but the one passed to the parser) --- RELEASE-NOTES | 4 ++ includes/DefaultSettings.php | 3 + languages/LanguageConverter.php | 94 +++++++++++++++++++++--------- languages/classes/LanguageKk.php | 20 +++---- languages/classes/LanguageSr.php | 44 ++++++-------- maintenance/parserTests.inc | 8 +++ maintenance/parserTests.txt | 99 +++++++++++++++++++++++++++++++- 7 files changed, 206 insertions(+), 66 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index a958171e19..ef064c0fc6 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -443,6 +443,10 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN maybe not even then, but it does) * (bug 8447) Fix SQL typo breaking non-default $wgHitcounterUpdateFreq * Do not allow previews of deleted images to be cached +* Add global variable $wgDefaultLanguageVariant used to set the default language + variant of a wiki to something different than the main language code +* Add 'variant' option to parserTests - runs test with the given variant as + preferred, utilize it for more parser tests of language variants code == Languages updated == diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 01acc5138c..bef233b35b 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -685,6 +685,9 @@ $wgMsgCacheExpiry = 86400; # Whether to enable language variant conversion. $wgDisableLangConversion = false; +# Default variant code, if false, the default will be the language code +$wgDefaultLanguageVariant = false; + /** * Show a bar of language selection links in the user login and user * registration forms; edit the "loginlanguagelinks" message to diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 5f2e2fc380..07a174c1b7 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -15,6 +15,7 @@ class LanguageConverter { var $mTables; var $mTitleDisplay=''; var $mDoTitleConvert=true, $mDoContentConvert=true; + var $mTitleFromFlag = false; var $mCacheKey; var $mLangObj; var $mMarkup; @@ -78,7 +79,7 @@ class LanguageConverter { * @access public */ function getPreferredVariant( $fromUser = true ) { - global $wgUser, $wgRequest, $wgVariantArticlePath; + global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant; if($this->mPreferredVariant) return $this->mPreferredVariant; @@ -109,6 +110,12 @@ class LanguageConverter { return $this->mPreferredVariant; } + // see if default variant is globaly set + if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){ + $this->mPreferredVariant = $wgDefaultLanguageVariant; + return $this->mPreferredVariant; + } + # FIXME rewrite code for parsing http header. The current code # is written specific for detecting zh- variants if( !$this->mPreferredVariant ) { @@ -124,8 +131,13 @@ class LanguageConverter { $pv = substr($zh,0,5); } } - return $pv; + // don't try to return bad variant + if(in_array( $pv, $this->mVariants )) + return $pv; } + + return $this->mMainLanguageCode; + } /** @@ -277,6 +289,40 @@ class LanguageConverter { return $text; } + /** + * Parse flags with syntax -{FLAG| ... }- + * + */ + function parseFlags($marked){ + $flags = array(); + + // process flag only if the flag is valid + if(! ( in_array($marked[0],$this->mFlags) && $marked[1]=='|' ) ) + return array($marked,array()); + + $tt = explode($this->mMarkup['flagsep'], $marked, 2); + + if(sizeof($tt) == 2) { + $f = explode($this->mMarkup['varsep'], $tt[0]); + foreach($f as $ff) { + $ff = trim($ff); + if(array_key_exists($ff, $this->mFlags) && + !array_key_exists($this->mFlags[$ff], $flags)) + $flags[] = $this->mFlags[$ff]; + } + $rules = $tt[1]; + } + else + $rules = $marked; + + //FIXME: may cause trouble here... + //strip   since it interferes with the parsing, plus, + //all spaces should be stripped in this tag anyway. + $rules = str_replace(' ', '', $rules); + + return array($rules,$flags); + } + /** * convert text to different variants of a language. the automatic * conversion is done in autoConvert(). here we parse the text @@ -308,6 +354,14 @@ class LanguageConverter { return $text; if( $isTitle ) { + + // use the title from the T flag if any + if($this->mTitleFromFlag){ + $this->mTitleFromFlag = false; + return $this->mTitleDisplay; + } + + // check for __NOTC__ tag if( !$this->mDoTitleConvert ) { $this->mTitleDisplay = $text; return $text; @@ -332,50 +386,38 @@ class LanguageConverter { if( isset( $this->mVariantFallbacks[$plang] ) ) { $fallback = $this->mVariantFallbacks[$plang]; } else { - // This sounds... bad? - $fallback = ''; + $fallback = $this->mMainLanguageCode; } $tarray = explode($this->mMarkup['begin'], $text); $tfirst = array_shift($tarray); $text = $this->autoConvert($tfirst); - foreach($tarray as $txt) { + foreach($tarray as $txt) { $marked = explode($this->mMarkup['end'], $txt, 2); - $flags = array(); - $tt = explode($this->mMarkup['flagsep'], $marked[0], 2); - if(sizeof($tt) == 2) { - $f = explode($this->mMarkup['varsep'], $tt[0]); - foreach($f as $ff) { - $ff = trim($ff); - if(array_key_exists($ff, $this->mFlags) && - !array_key_exists($this->mFlags[$ff], $flags)) - $flags[] = $this->mFlags[$ff]; - } - $rules = $tt[1]; - } - else - $rules = $marked[0]; - - //FIXME: may cause trouble here... - //strip   since it interferes with the parsing, plus, - //all spaces should be stripped in this tag anyway. - $rules = str_replace(' ', '', $rules); + // strip the flags from syntax like -{T| ... }- + list($rules,$flags) = $this->parseFlags($marked[0]); + // parse the contents -{ ... }- $carray = $this->parseManualRule($rules, $flags); + $disp = ''; if(array_key_exists($plang, $carray)) $disp = $carray[$plang]; else if(array_key_exists($fallback, $carray)) $disp = $carray[$fallback]; if($disp) { - if(in_array('T', $flags)) + // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title + if(in_array('T', $flags)){ + $this->mTitleFromFlag = true; $this->mTitleDisplay = $disp; + } else $text .= $disp; + // use syntax -{A|zh:WordZh;zh-tw:WordTw}- to introduce a custom mapping between + // words WordZh and WordTw in the whole text if(in_array('A', $flags)) { - /* modify the conversion table for this session*/ /* fill in the missing variants, if any, with fallbacks */ diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php index df060d23aa..46162e01ad 100644 --- a/languages/classes/LanguageKk.php +++ b/languages/classes/LanguageKk.php @@ -97,17 +97,15 @@ class KkConverter extends LanguageConverter { ); } - /* - * Override function from LanguageConvertor - * Additional checks: - * - There should be no conversion for Talk pages - */ - function getPreferredVariant(){ - global $wgTitle; - if( is_object( $wgTitle ) && $wgTitle->isTalkPage()) { - return $this->mMainLanguageCode; - } - return parent::getPreferredVariant(); + + // Do not convert content on talk pages + function parserConvert( $text, &$parser ){ + if(is_object($parser->mTitle) && $parser->mTitle->isTalkPage()) + $this->mDoContentConvert=false; + else + $this->mDoContentConvert=true; + + return parent::parserConvert($text, $parser ); } /* diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php index 2d56aff959..d7c75ed191 100644 --- a/languages/classes/LanguageSr.php +++ b/languages/classes/LanguageSr.php @@ -52,8 +52,6 @@ class SrConverter extends LanguageConverter { 'Nj' => 'Њ', 'n!j' => 'нј', 'N!j'=> 'Нј', 'N!J'=> 'НЈ' ); - var $mParsingContent=false; - function loadDefaultTables() { $this->mTables = array( 'sr-ec' => new ReplacementArray( $this->mToCyrillics ), @@ -70,38 +68,28 @@ class SrConverter extends LanguageConverter { currently, and just produces a couple of bugs */ function parseManualRule($rule, $flags=array()) { - // ignore all formatting - foreach($this->mVariants as $v) { - $carray[$v] = $rule; - } + if(in_array('T',$flags)){ + return parent::parseManualRule($rule, $flags); + } + // otherwise ignore all formatting + foreach($this->mVariants as $v) { + $carray[$v] = $rule; + } + return $carray; } - // Set a flag when parsing content, this is used to prevent - // conversion of content within talk pages + // Do not convert content on talk pages function parserConvert( $text, &$parser ){ - $this->mParsingContent = true; - $output = parent::parserConvert($text, $parser ); - $this->mParsingContent = false; - return $output; - - } + if(is_object($parser->mTitle) && $parser->mTitle->isTalkPage()) + $this->mDoContentConvert=false; + else + $this->mDoContentConvert=true; - /* - * Override function from LanguageConvertor - * Additional checks: - * - There should be no conversion for Talk pages - */ - function getPreferredVariant( $fromUser=true ){ - global $wgTitle; - if(is_object($wgTitle) && $wgTitle->isTalkPage() && $this->mParsingContent){ - return $this->mMainLanguageCode; - } - return parent::getPreferredVariant($fromUser); + return parent::parserConvert($text, $parser ); } - /* * A function wrapper: * - if there is no selected variant, leave the link @@ -191,8 +179,8 @@ class LanguageSr extends LanguageSr_ec { $variants = array('sr', 'sr-ec', 'sr-el'); $variantfallbacks = array( 'sr' => 'sr-ec', - 'sr-ec' => 'sr-ec', - 'sr-el' => 'sr-el', + 'sr-ec' => 'sr', + 'sr-el' => 'sr', ); diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc index 0f5a4bd125..c85220d031 100644 --- a/maintenance/parserTests.inc +++ b/maintenance/parserTests.inc @@ -356,6 +356,12 @@ class ParserTest { $lang = 'en'; } + if( preg_match( '/variant=([a-z]+(?:-[a-z]+)?)/', $opts, $m ) ) + $variant = $m[1]; + else + $variant = false; + + $settings = array( 'wgServer' => 'http://localhost', 'wgScript' => '/index.php', @@ -382,6 +388,8 @@ class ParserTest { 'wgLocaltimezone' => 'UTC', 'wgAllowExternalImages' => true, 'wgUseTidy' => false, + 'wgDefaultLanguageVariant' => $variant, + 'wgVariantArticlePath' => false, ); $this->savedGlobals = array(); foreach( $settings as $var => $val ) { diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index fea83ed0dd..7cd62e07ec 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -18,7 +18,8 @@ # subpage enable subpages (disabled by default) # noxml don't check for XML well formdness # title=[[XXX]] run test using article title XXX -# language=XXX set content language to XXX for this test +# language=XXX set content language to XXX for this test +# variant=XXX set the variant of language for this test (eg zh-tw) # disabled do not run test # # For testing purposes, temporary articles can created: @@ -6265,6 +6266,102 @@ Latin proverb: -{Ne nuntium necare}- !! end +!! test +Prevent conversion with -{}- tags (language variants) +!! options +language=sr variant=sr-ec +!! input +Latinski: -{Ne nuntium necare}- +!! result +

Латински: Ne nuntium necare +

+!! end + + +!! test +Prevent conversion of text with -{}- tags (language variants) +!! options +language=sr variant=sr-ec +!! input +Latinski: -{Ne nuntium necare}- +!! result +

Латински: Ne nuntium necare +

+!! end + + +!! test +Prevent conversion of links with -{}- tags (language variants) +!! options +language=sr variant=sr-ec +!! input +-{[[Main Page]]}- +!! result +

Main Page +

+!! end + + +!! test +-{}- tags within headlines (within html for parserConvert()) +!! options +language=sr variant=sr-ec +!! input +== -{Naslov}- == +!! result +

[уреди] Naslov

+ +!! end + + +!! test +Explicit definition of language variant alternatives +!! options +language=zh variant=zh-tw +!! input +-{zh:China;zh-tw:Taiwan}-, not China +!! result +

Taiwan, not China +

+!! end + + +!! test +Adding explicit session-wise language variant mapping (A flag) +!! options +language=zh variant=zh-tw +!! input +-{A|zh:China;zh-tw:Taiwan}- is China +!! result +

Taiwan is Taiwan +

+!! end + + +!! test +Adding explicit conversion rule for title (T flag) +!! options +language=zh variant=zh-tw +!! input +Should be stripped-{T|zh:China;zh-tw:Taiwan}-! +!! result +

Should be stripped! +

+!! end + + +!! test +Do not convert roman numbers to language variants +!! options +language=sr variant=sr-ec +!! input +Fridrih IV je car. +!! result +

Фридрих IV је цар. +

+!! end + + # # # -- 2.20.1