From 0327a95ea12532ef1537316163ad2f50027cdc44 Mon Sep 17 00:00:00 2001 From: Chad Horohoe Date: Thu, 21 Apr 2011 14:02:38 +0000 Subject: [PATCH] (bug 28643) Merge Serbian language variant conversion improvements to trunk (r85224, r85239, r85308) from Nikola's branch --- languages/LanguageConverter.php | 20 +++++++++++++++++++- languages/classes/LanguageSr.php | 22 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 42b9660771..8807f44934 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -322,6 +322,10 @@ class LanguageConverter { } } + if( $this->guessVariant( $text, $toVariant ) ) { + return $text; + } + /* we convert everything except: 1. HTML markups (anything between < and >) 2. HTML entities @@ -571,7 +575,7 @@ class LanguageConverter { */ public function convertTo( $text, $variant ) { global $wgDisableLangConversion; - if ( $wgDisableLangConversion ) { + if ( $wgDisableLangConversion || $this->guessVariant( $text, $variant ) ) { return $text; } return $this->recursiveConvertTopLevel( $text, $variant ); @@ -768,6 +772,20 @@ class LanguageConverter { return '!' . $variant; } + /** + * Guess if a text is written in a variant. This should be implemented in subclasses. + * + * @param string $text the text to be checked + * @param string $variant language code of the variant to be checked for + * @return bool true if $text appears to be written in $variant, false if not + * + * @author Nikola Smolenski + * @since 1.18 + */ + public function guessVariant($text, $variant) { + return false; + } + /** * Load default conversion tables. * This method must be implemented in derived class. diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php index edcae288f0..22f9ca42b0 100644 --- a/languages/classes/LanguageSr.php +++ b/languages/classes/LanguageSr.php @@ -148,6 +148,27 @@ class SrConverter extends LanguageConverter { return $ret; } + + /** + * Guess if a text is written in Cyrillic or Latin. + * + * @author Nikola Smolenski + * @since 1.18 + */ + public function guessVariant( $text, $variant ) { + $numCyrillic = preg_match_all("/[шђчћжШЂЧЋЖ]/u", $text, $dummy); + $numLatin = preg_match_all("/[šđč枊ĐČĆŽ]/u", $text, $dummy); + + if( $variant == 'sr-ec' ) { + return $numCyrillic > $numLatin; + } else if( $variant == 'sr-el' ) { + return $numLatin > $numCyrillic; + } else { + return false; + } + + } + } /** @@ -202,4 +223,5 @@ class LanguageSr extends LanguageSr_ec { } } } + } -- 2.20.1