More lang variant stuff:
authorRobert Stojnić <rainman@users.mediawiki.org>
Mon, 1 Jan 2007 17:20:19 +0000 (17:20 +0000)
committerRobert Stojnić <rainman@users.mediawiki.org>
Mon, 1 Jan 2007 17:20:19 +0000 (17:20 +0000)
* More parser tests, also added 'variant' option for testing
* Add global $wgDefaultLanguageVariant, can be used to set a default
  fallback variant
* Restructured some of the parser code in LanguageConverter, fix flag
  processing and add some comments to despookify the old zh code :)
* Cleanup of some old hacks in LanguageSr and Kk (do not use the global
  title object, but the one passed to the parser)

RELEASE-NOTES
includes/DefaultSettings.php
languages/LanguageConverter.php
languages/classes/LanguageKk.php
languages/classes/LanguageSr.php
maintenance/parserTests.inc
maintenance/parserTests.txt

index a958171..ef064c0 100644 (file)
@@ -443,6 +443,10 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
   maybe not even then, but it does)
 * (bug 8447) Fix SQL typo breaking non-default $wgHitcounterUpdateFreq
 * Do not allow previews of deleted images to be cached
+* Add global variable $wgDefaultLanguageVariant used to set the default language 
+  variant of a wiki to something different than the main language code
+* Add 'variant' option to parserTests - runs test with the given variant as 
+  preferred, utilize it for more parser tests of language variants code
 
 == Languages updated ==
 
index 01acc51..bef233b 100644 (file)
@@ -685,6 +685,9 @@ $wgMsgCacheExpiry   = 86400;
 # Whether to enable language variant conversion.
 $wgDisableLangConversion = false;
 
+# Default variant code, if false, the default will be the language code
+$wgDefaultLanguageVariant = false;
+
 /**
  * Show a bar of language selection links in the user login and user
  * registration forms; edit the "loginlanguagelinks" message to
index 5f2e2fc..07a174c 100644 (file)
@@ -15,6 +15,7 @@ class LanguageConverter {
        var $mTables;
        var $mTitleDisplay='';
        var $mDoTitleConvert=true, $mDoContentConvert=true;
+       var $mTitleFromFlag = false;
        var $mCacheKey;
        var $mLangObj;
        var $mMarkup;
@@ -78,7 +79,7 @@ class LanguageConverter {
      * @access public
        */
        function getPreferredVariant( $fromUser = true ) {
-               global $wgUser, $wgRequest, $wgVariantArticlePath;
+               global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
 
                if($this->mPreferredVariant)
                        return $this->mPreferredVariant;
@@ -109,6 +110,12 @@ class LanguageConverter {
                        return $this->mPreferredVariant;
                }
 
+               // see if default variant is globaly set
+               if($wgDefaultLanguageVariant != false  &&  in_array( $wgDefaultLanguageVariant, $this->mVariants )){
+                       $this->mPreferredVariant = $wgDefaultLanguageVariant;
+                       return $this->mPreferredVariant;
+               }
+
                # FIXME rewrite code for parsing http header. The current code
                # is written specific for detecting zh- variants
                if( !$this->mPreferredVariant ) {
@@ -124,8 +131,13 @@ class LanguageConverter {
                                        $pv = substr($zh,0,5);
                                }
                        }
-                       return $pv;
+                       // don't try to return bad variant
+                       if(in_array( $pv, $this->mVariants ))
+                               return $pv;
                }
+
+               return $this->mMainLanguageCode;
+
        }
 
        /**
@@ -277,6 +289,40 @@ class LanguageConverter {
                return $text;
        }
 
+       /**
+        *  Parse flags with syntax -{FLAG| ... }-
+        *
+        */
+       function parseFlags($marked){
+                       $flags = array();
+
+                       // process flag only if the flag is valid
+                       if(! ( in_array($marked[0],$this->mFlags) && $marked[1]=='|' ) )
+                               return array($marked,array());
+
+                       $tt = explode($this->mMarkup['flagsep'], $marked, 2);
+
+                       if(sizeof($tt) == 2) {
+                               $f = explode($this->mMarkup['varsep'], $tt[0]);
+                               foreach($f as $ff) {
+                                       $ff = trim($ff);
+                                       if(array_key_exists($ff, $this->mFlags) &&
+                                               !array_key_exists($this->mFlags[$ff], $flags))
+                                               $flags[] = $this->mFlags[$ff];
+                               }
+                               $rules = $tt[1];
+                       }
+                       else
+                               $rules = $marked;
+
+                       //FIXME: may cause trouble here...
+                       //strip &nbsp; since it interferes with the parsing, plus,
+                       //all spaces should be stripped in this tag anyway.
+                       $rules = str_replace('&nbsp;', '', $rules);
+
+                       return array($rules,$flags);
+       }
+
        /**
         * convert text to different variants of a language. the automatic
         * conversion is done in autoConvert(). here we parse the text
@@ -308,6 +354,14 @@ class LanguageConverter {
                        return $text;
 
                if( $isTitle ) {
+
+                       // use the title from the T flag if any
+                       if($this->mTitleFromFlag){
+                               $this->mTitleFromFlag = false;
+                               return $this->mTitleDisplay;
+                       }
+
+                       // check for __NOTC__ tag
                        if( !$this->mDoTitleConvert ) {
                                $this->mTitleDisplay = $text;
                                return $text;
@@ -332,50 +386,38 @@ class LanguageConverter {
                if( isset( $this->mVariantFallbacks[$plang] ) ) {
                        $fallback = $this->mVariantFallbacks[$plang];
                } else {
-                       // This sounds... bad?
-                       $fallback = '';
+                       $fallback = $this->mMainLanguageCode;
                }
 
                $tarray = explode($this->mMarkup['begin'], $text);
                $tfirst = array_shift($tarray);
                $text = $this->autoConvert($tfirst);
-               foreach($tarray as $txt) {
+               foreach($tarray as $txt) {      
                        $marked = explode($this->mMarkup['end'], $txt, 2);
-                       $flags = array();
-                       $tt = explode($this->mMarkup['flagsep'], $marked[0], 2);
 
-                       if(sizeof($tt) == 2) {
-                               $f = explode($this->mMarkup['varsep'], $tt[0]);
-                               foreach($f as $ff) {
-                                       $ff = trim($ff);
-                                       if(array_key_exists($ff, $this->mFlags) &&
-                                               !array_key_exists($this->mFlags[$ff], $flags))
-                                               $flags[] = $this->mFlags[$ff];
-                               }
-                               $rules = $tt[1];
-                       }
-                       else
-                               $rules = $marked[0];
-
-                       //FIXME: may cause trouble here...
-                       //strip &nbsp; since it interferes with the parsing, plus,
-                       //all spaces should be stripped in this tag anyway.
-                       $rules = str_replace('&nbsp;', '', $rules);
+                       // strip the flags from syntax like -{T| ... }-
+                       list($rules,$flags) = $this->parseFlags($marked[0]);
 
+                       // parse the contents -{ ... }- 
                        $carray = $this->parseManualRule($rules, $flags);
+
                        $disp = '';
                        if(array_key_exists($plang, $carray))
                                $disp = $carray[$plang];
                        else if(array_key_exists($fallback, $carray))
                                $disp = $carray[$fallback];
                        if($disp) {
-                               if(in_array('T',  $flags))
+                               // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
+                               if(in_array('T',  $flags)){
+                                       $this->mTitleFromFlag = true;
                                        $this->mTitleDisplay = $disp;
+                               }
                                else
                                        $text .= $disp;
 
+                               // use syntax -{A|zh:WordZh;zh-tw:WordTw}- to introduce a custom mapping between
+                               // words WordZh and WordTw in the whole text 
                                if(in_array('A', $flags)) {
-                                       /* modify the conversion table for this session*/
 
                                        /* fill in the missing variants, if any,
                                            with fallbacks */
index df060d2..46162e0 100644 (file)
@@ -97,17 +97,15 @@ class KkConverter extends LanguageConverter {
                );
        }
 
-       /*
-        * Override function from LanguageConvertor
-        * Additional checks:
-        *  - There should be no conversion for Talk pages
-        */
-       function getPreferredVariant(){
-               global $wgTitle;
-               if( is_object( $wgTitle ) && $wgTitle->isTalkPage()) {
-                       return $this->mMainLanguageCode;
-               }
-               return parent::getPreferredVariant();
+
+       // Do not convert content on talk pages
+       function parserConvert( $text, &$parser ){
+               if(is_object($parser->mTitle) && $parser->mTitle->isTalkPage())
+                       $this->mDoContentConvert=false;
+               else 
+                       $this->mDoContentConvert=true;
+
+               return parent::parserConvert($text, $parser );
        }
 
        /*
index 2d56aff..d7c75ed 100644 (file)
@@ -52,8 +52,6 @@ class SrConverter extends LanguageConverter {
                'Nj' => 'Њ', 'n!j' => 'нј', 'N!j'=> 'Нј', 'N!J'=> 'НЈ'
        );
 
-       var $mParsingContent=false;
-
        function loadDefaultTables() {
                $this->mTables = array(
                        'sr-ec' => new ReplacementArray( $this->mToCyrillics ),
@@ -70,38 +68,28 @@ class SrConverter extends LanguageConverter {
                        currently, and just produces a couple of bugs
        */
        function parseManualRule($rule, $flags=array()) {
-               // ignore all formatting
-               foreach($this->mVariants as $v) {
-                               $carray[$v] = $rule;
-                       }
+               if(in_array('T',$flags)){
+                       return parent::parseManualRule($rule, $flags);
+               }
 
+               // otherwise ignore all formatting
+               foreach($this->mVariants as $v) {
+                       $carray[$v] = $rule;
+               }
+               
                return $carray;
        }
 
-       // Set a flag when parsing content, this is used to prevent 
-       // conversion of content within talk pages
+       // Do not convert content on talk pages
        function parserConvert( $text, &$parser ){
-               $this->mParsingContent = true;
-               $output = parent::parserConvert($text, $parser );
-               $this->mParsingContent = false;
-               return $output;
-               
-       }
+               if(is_object($parser->mTitle) && $parser->mTitle->isTalkPage())
+                       $this->mDoContentConvert=false;
+               else 
+                       $this->mDoContentConvert=true;
 
-       /*
-        * Override function from LanguageConvertor
-        * Additional checks: 
-        *  - There should be no conversion for Talk pages
-        */
-       function getPreferredVariant( $fromUser=true ){
-               global $wgTitle;
-               if(is_object($wgTitle) && $wgTitle->isTalkPage() && $this->mParsingContent){
-                       return $this->mMainLanguageCode;
-               }
-               return parent::getPreferredVariant($fromUser);
+               return parent::parserConvert($text, $parser );
        }
 
-
        /*
         * A function wrapper:
         *   - if there is no selected variant, leave the link 
@@ -191,8 +179,8 @@ class LanguageSr extends LanguageSr_ec {
                $variants = array('sr', 'sr-ec', 'sr-el');
                $variantfallbacks = array(
                        'sr'    => 'sr-ec',
-                       'sr-ec' => 'sr-ec',
-                       'sr-el' => 'sr-el',
+                       'sr-ec' => 'sr',
+                       'sr-el' => 'sr',
                        ); 
 
 
index 0f5a4bd..c85220d 100644 (file)
@@ -356,6 +356,12 @@ class ParserTest {
                        $lang = 'en';
                }
 
+               if( preg_match( '/variant=([a-z]+(?:-[a-z]+)?)/', $opts, $m ) )
+                       $variant = $m[1];
+               else 
+                       $variant = false;
+
+
                $settings = array(
                        'wgServer' => 'http://localhost',
                        'wgScript' => '/index.php',
@@ -382,6 +388,8 @@ class ParserTest {
                        'wgLocaltimezone' => 'UTC',
                        'wgAllowExternalImages' => true,
                        'wgUseTidy' => false,
+                       'wgDefaultLanguageVariant' => $variant,
+                       'wgVariantArticlePath' => false,
                        );
                $this->savedGlobals = array();
                foreach( $settings as $var => $val ) {
index fea83ed..7cd62e0 100644 (file)
@@ -18,7 +18,8 @@
 #      subpage         enable subpages (disabled by default)
 #      noxml           don't check for XML well formdness
 #      title=[[XXX]]   run test using article title XXX
-#   language=XXX       set content language to XXX for this test
+#      language=XXX    set content language to XXX for this test
+#      variant=XXX     set the variant of language for this test (eg zh-tw)
 #      disabled        do not run test
 #
 # For testing purposes, temporary articles can created:
@@ -6265,6 +6266,102 @@ Latin proverb: -{Ne nuntium necare}-
 !! end
 
 
+!! test
+Prevent conversion with -{}- tags (language variants)
+!! options
+language=sr variant=sr-ec
+!! input
+Latinski: -{Ne nuntium necare}-
+!! result
+<p>Латински: Ne nuntium necare
+</p>
+!! end
+
+
+!! test
+Prevent conversion of text with -{}- tags (language variants)
+!! options
+language=sr variant=sr-ec
+!! input
+Latinski: -{Ne nuntium necare}-
+!! result
+<p>Латински: Ne nuntium necare
+</p>
+!! end
+
+
+!! test
+Prevent conversion of links with -{}- tags (language variants)
+!! options
+language=sr variant=sr-ec
+!! input
+-{[[Main Page]]}-
+!! result
+<p><a href="/index.php?title=Main_Page&amp;variant=sr-ec" title="Main Page">Main Page</a>
+</p>
+!! end
+
+
+!! test
+-{}- tags within headlines (within html for parserConvert())
+!! options
+language=sr variant=sr-ec
+!! input
+== -{Naslov}- ==
+!! result
+<a name="-.7BNaslov.7D-"></a><h2><span class="editsection">[<a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Уреди део: Naslov">уреди</a>]</span> <span class="mw-headline"> Naslov </span></h2>
+
+!! end
+
+
+!! test
+Explicit definition of language variant alternatives
+!! options
+language=zh variant=zh-tw
+!! input
+-{zh:China;zh-tw:Taiwan}-, not China
+!! result
+<p>Taiwan, not China
+</p>
+!! end
+
+
+!! test
+Adding explicit session-wise language variant mapping (A flag)
+!! options
+language=zh variant=zh-tw
+!! input
+-{A|zh:China;zh-tw:Taiwan}- is China
+!! result
+<p>Taiwan is Taiwan
+</p>
+!! end
+
+
+!! test
+Adding explicit conversion rule for title (T flag)
+!! options
+language=zh variant=zh-tw
+!! input
+Should be stripped-{T|zh:China;zh-tw:Taiwan}-!
+!! result
+<p>Should be stripped!
+</p>
+!! end
+
+
+!! test
+Do not convert roman numbers to language variants
+!! options
+language=sr variant=sr-ec
+!! input
+Fridrih IV je car.
+!! result
+<p>Фридрих IV је цар.
+</p>
+!! end
+
+
 #
 #
 #