var $mTables;
// 'bidirectional' 'unidirectional' 'disable' for each variant
var $mManualLevel;
+
+ /**
+ * @var String: memcached key name
+ */
var $mCacheKey;
+
var $mLangObj;
var $mFlags;
var $mDescCodeSep = ':', $mDescVarSep = ';';
/**
* Constructor
*
- * @param $langobj The Language Object
+ * @param $langobj Language: the Language Object
* @param $maincode String: the main language code of this language
* @param $variants Array: the supported variants of this language
* @param $variantfallbacks Array: the fallback language of each variant
* @param $flags Array: defining the custom strings that maps to the flags
* @param $manualLevel Array: limit for supported variants
*/
- public function __construct( $langobj, $maincode,
- $variants = array(),
- $variantfallbacks = array(),
- $flags = array(),
+ public function __construct( $langobj, $maincode, $variants = array(),
+ $variantfallbacks = array(), $flags = array(),
$manualLevel = array() ) {
- global $wgDisabledVariants, $wgLanguageNames;
+ global $wgDisabledVariants;
$this->mLangObj = $langobj;
$this->mMainLanguageCode = $maincode;
$this->mVariants = array_diff( $variants, $wgDisabledVariants );
$this->mVariantFallbacks = $variantfallbacks;
- $this->mVariantNames = $wgLanguageNames;
+ $this->mVariantNames = Language::getLanguageNames();
$this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
$defaultflags = array(
// 'S' show converted text
'D' => 'D', // convert description (subclass implement)
'-' => '-', // remove convert (not implement)
'H' => 'H', // add rule for convert code
- // (but no display in placed code )
+ // (but no display in placed code)
'N' => 'N' // current variant name
);
$this->mFlags = array_merge( $defaultflags, $flags );
/**
* Get preferred language variant.
- * @param $fromUser Boolean: get it from $wgUser's preferences
- * @param $fromHeader Boolean: get it from Accept-Language
* @return String: the preferred language code
*/
- public function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
- global $wgDefaultLanguageVariant;
+ public function getPreferredVariant() {
+ global $wgDefaultLanguageVariant, $wgUser;
$req = $this->getURLVariant();
- if ( $fromUser && !$req ) {
+ if ( $wgUser->isLoggedIn() && !$req ) {
$req = $this->getUserVariant();
- }
-
- if ( $fromHeader && !$req ) {
+ } elseif ( !$req ) {
$req = $this->getHeaderVariant();
}
return $this->mMainLanguageCode;
}
+ /**
+ * Get default variant.
+ * This function would not be affected by user's settings or headers
+ * @return String: the default variant code
+ */
+ public function getDefaultVariant() {
+ global $wgDefaultLanguageVariant;
+
+ $req = $this->getURLVariant();
+
+ if ( $wgDefaultLanguageVariant && !$req ) {
+ $req = $this->validateVariant( $wgDefaultLanguageVariant );
+ }
+
+ if ( $req ) {
+ return $req;
+ }
+ return $this->mMainLanguageCode;
+ }
+
/**
* Validate the variant
* @param $variant String: the variant to validate
* @return Mixed: returns the variant if it is valid, null otherwise
*/
protected function validateVariant( $variant = null ) {
- if ( $variant !== null &&
- in_array( $variant, $this->mVariants ) ) {
+ if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
return $variant;
}
return null;
*
* @return Mixed: variant if one found, false otherwise.
*/
- protected function getURLVariant() {
+ public function getURLVariant() {
global $wgRequest;
- $ret = null;
if ( $this->mURLVariant ) {
return $this->mURLVariant;
*/
protected function getUserVariant() {
global $wgUser;
- $ret = null;
// memoizing this function wreaks havoc on parserTest.php
- /* if ( $this->mUserVariant ) { */
- /* return $this->mUserVariant; */
- /* } */
+ /*
+ if ( $this->mUserVariant ) {
+ return $this->mUserVariant;
+ }
+ */
- // get language variant preference from logged in users
+ // Get language variant preference from logged in users
// Don't call this on stub objects because that causes infinite
// recursion during initialisation
if ( $wgUser->isLoggedIn() ) {
$ret = $wgUser->getOption( 'variant' );
- }
- else {
+ } else {
// figure out user lang without constructing wgLang to avoid
// infinite recursion
$ret = $wgUser->getOption( 'language' );
return $this->mUserVariant = $this->validateVariant( $ret );
}
-
/**
* Determine the language variant from the Accept-Language header.
*
}
// see if some supported language variant is set in the
- // http header.
+ // HTTP header.
$languages = array_keys( $wgRequest->getAcceptLang() );
if ( empty( $languages ) ) {
return null;
}
- $fallback_languages = array();
+ $fallbackLanguages = array();
foreach ( $languages as $language ) {
$this->mHeaderVariant = $this->validateVariant( $language );
if ( $this->mHeaderVariant ) {
// them later.
$fallbacks = $this->getVariantFallbacks( $language );
if ( is_string( $fallbacks ) ) {
- $fallback_languages[] = $fallbacks;
+ $fallbackLanguages[] = $fallbacks;
} elseif ( is_array( $fallbacks ) ) {
- $fallback_languages =
- array_merge( $fallback_languages,
- $fallbacks );
+ $fallbackLanguages =
+ array_merge( $fallbackLanguages, $fallbacks );
}
}
if ( !$this->mHeaderVariant ) {
// process fallback languages now
- $fallback_languages = array_unique( $fallback_languages );
+ $fallback_languages = array_unique( $fallbackLanguages );
foreach ( $fallback_languages as $language ) {
$this->mHeaderVariant = $this->validateVariant( $language );
if ( $this->mHeaderVariant ) {
return $this->mHeaderVariant;
}
- /**
- * Caption convert, base on preg_replace_callback.
- *
- * To convert text in "title" or "alt", like '<img alt="text" ... '
- * or '<span title="text" ... '
- *
- * @return String like ' alt="yyyy"' or ' title="yyyy"'
- */
- protected function captionConvert( $matches ) {
- $toVariant = $this->getPreferredVariant();
- $title = $matches[1];
- $text = $matches[2];
- // we convert captions except URL
- if ( !strpos( $text, '://' ) ) {
- $text = $this->translate( $text, $toVariant );
- }
- return " $title=\"$text\"";
- }
-
/**
* Dictionary-based conversion.
* This function would not parse the conversion rules.
* If you want to parse rules, try to use convert() or
* convertTo().
*
- * @param $text String: the text to be converted
- * @param $toVariant String: the target language code
- * @return String: the converted text
+ * @param $text String the text to be converted
+ * @param $toVariant bool|string the target language code
+ * @return String the converted text
*/
public function autoConvert( $text, $toVariant = false ) {
wfProfileIn( __METHOD__ );
if ( !$toVariant ) {
$toVariant = $this->getPreferredVariant();
if ( !$toVariant ) {
+ wfProfileOut( __METHOD__ );
return $text;
}
}
/* we convert everything except:
- 1. html markups (anything between < and >)
- 2. html entities
- 3. place holders created by the parser
+ 1. HTML markups (anything between < and >)
+ 2. HTML entities
+ 3. placeholders created by the parser
*/
global $wgParser;
if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
$marker = '';
}
- // this one is needed when the text is inside an html markup
+ // this one is needed when the text is inside an HTML markup
$htmlfix = '|<[^>]+$|^[^<>]*>';
// disable convert to variants between <code></code> tags
$reg = '/' . $codefix . $scriptfix . $prefix .
'<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
+ $startPos = 0;
+ $sourceBlob = '';
+ $literalBlob = '';
+
+ // Guard against delimiter nulls in the input
+ $text = str_replace( "\000", '', $text );
+
+ $markupMatches = null;
+ $elementMatches = null;
+ while ( $startPos < strlen( $text ) ) {
+ if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
+ $elementPos = $markupMatches[0][1];
+ $element = $markupMatches[0][0];
+ } else {
+ $elementPos = strlen( $text );
+ $element = '';
+ }
- $matches = preg_split( $reg, $text, - 1, PREG_SPLIT_OFFSET_CAPTURE );
-
- $m = array_shift( $matches );
-
- $ret = $this->translate( $m[0], $toVariant );
- $mstart = $m[1] + strlen( $m[0] );
-
- // enable convertsion of '<img alt="xxxx" ... '
- // or '<span title="xxxx" ... '
- $captionpattern = '/\s(title|alt)\s*=\s*"([\s\S]*?)"/';
-
- $trtext = '';
- $trtextmark = "\0";
- $notrtext = array();
- foreach ( $matches as $m ) {
- $mark = substr( $text, $mstart, $m[1] - $mstart );
- $mark = preg_replace_callback( $captionpattern,
- array( &$this, 'captionConvert' ),
- $mark );
- // Let's convert the trtext only once,
- // it would give us more performance improvement
- $notrtext[] = $mark;
- $trtext .= $m[0] . $trtextmark;
- $mstart = $m[1] + strlen( $m[0] );
- }
- $notrtext[] = '';
- $trtext = $this->translate( $trtext, $toVariant );
- $trtext = StringUtils::explode( $trtextmark, $trtext );
- foreach ( $trtext as $t ) {
- $ret .= array_shift( $notrtext );
- $ret .= $t;
+ // Queue the part before the markup for translation in a batch
+ $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
+
+ // Advance to the next position
+ $startPos = $elementPos + strlen( $element );
+
+ // Translate any alt or title attributes inside the matched element
+ if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
+ $elementMatches ) )
+ {
+ $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+ $changed = false;
+ foreach ( array( 'title', 'alt' ) as $attrName ) {
+ if ( !isset( $attrs[$attrName] ) ) {
+ continue;
+ }
+ $attr = $attrs[$attrName];
+ // Don't convert URLs
+ if ( !strpos( $attr, '://' ) ) {
+ $attr = $this->translate( $attr, $toVariant );
+ }
+
+ // Remove HTML tags to avoid disrupting the layout
+ $attr = preg_replace( '/<[^>]+>/', '', $attr );
+ if ( $attr !== $attrs[$attrName] ) {
+ $attrs[$attrName] = $attr;
+ $changed = true;
+ }
+ }
+ if ( $changed ) {
+ $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
+ $elementMatches[3];
+ }
+ }
+ $literalBlob .= $element . "\000";
}
+
+ // Do the main translation batch
+ $translatedBlob = $this->translate( $sourceBlob, $toVariant );
+
+ // Put the output back together
+ $translatedIter = StringUtils::explode( "\000", $translatedBlob );
+ $literalIter = StringUtils::explode( "\000", $literalBlob );
+ $output = '';
+ while ( $translatedIter->valid() && $literalIter->valid() ) {
+ $output .= $translatedIter->current();
+ $output .= $literalIter->current();
+ $translatedIter->next();
+ $literalIter->next();
+ }
+
wfProfileOut( __METHOD__ );
- return $ret;
+ return $output;
}
/**
* @param $variant String: variant language code
* @return String: translated text
*/
- protected function translate( $text, $variant ) {
+ public function translate( $text, $variant ) {
wfProfileIn( __METHOD__ );
// If $text is empty or only includes spaces, do nothing
// Otherwise translate it
*
* @param $text String: the text to be converted
* @return Array: variant => converted text
- * @deprecated Use autoConvertToAllVariants() instead
+ * @deprecated since 1.17 Use autoConvertToAllVariants() instead
*/
public function convertLinkToAllVariants( $text ) {
return $this->autoConvertToAllVariants( $text );
/**
* Apply manual conversion rules.
*
- * @param $convRule Object: Object of ConverterRule
+ * @param $convRule ConverterRule Object of ConverterRule
*/
protected function applyManualConv( $convRule ) {
// Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
* Auto convert a Title object to a readable string in the
* preferred variant.
*
- *@param $title Object: a object of Title
- *@return String: converted title text
+ * @param $title Title a object of Title
+ * @return String: converted title text
*/
public function convertTitle( $title ) {
$variant = $this->getPreferredVariant();
$text = '';
} else {
// first let's check if a message has given us a converted name
- $nsConvKey = 'conversion-ns' . $index;
- if ( !wfEmptyMsg( $nsConvKey ) ) {
- $text = wfMsgForContentNoTrans( $nsConvKey );
+ $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
+ if ( $nsConvMsg->exists() ) {
+ $text = $nsConvMsg->plain();
} else {
// the message does not exist, try retrieve it from the current
// variant's namespace names.
*/
public function convertTo( $text, $variant ) {
global $wgDisableLangConversion;
- if ( $wgDisableLangConversion ) return $text;
+ if ( $wgDisableLangConversion ) {
+ return $text;
+ }
return $this->recursiveConvertTopLevel( $text, $variant );
}
if ( $pos === false ) {
// No more markup, append final segment
$out .= $this->autoConvert( substr( $text, $startPos ), $variant );
- $startPos = $length;
return $out;
}
*
* @param $text String: text to be converted
* @param $variant String: the target variant code
+ * @param $startPos int
* @param $depth Integer: depth of recursion
+ *
* @return String: converted text
*/
protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
}
/**
- * If a language supports multiple variants, it is
- * possible that non-existing link in one variant
- * actually exists in another variant. This function
- * tries to find it. See e.g. LanguageZh.php
+ * If a language supports multiple variants, it is possible that
+ * non-existing link in one variant actually exists in another variant.
+ * This function tries to find it. See e.g. LanguageZh.php
*
* @param $link String: the name of the link
* @param $nt Mixed: the title object of the link
/**
* Returns language specific hash options.
+ *
+ * @return string
*/
public function getExtraHashOptions() {
$variant = $this->getPreferredVariant();
- return '!' . $variant ;
+ return '!' . $variant;
}
/**
*/
function loadDefaultTables() {
$name = get_class( $this );
- wfDie( "Must implement loadDefaultTables() method in class $name" );
+ throw new MWException( "Must implement loadDefaultTables() method in class $name" );
}
/**
* Load conversion tables either from the cache or the disk.
* @private
+ * @param $fromCache Boolean: load from memcached? Defaults to true.
*/
- function loadTables( $fromcache = true ) {
+ function loadTables( $fromCache = true ) {
if ( $this->mTablesLoaded ) {
return;
}
wfProfileIn( __METHOD__ );
$this->mTablesLoaded = true;
$this->mTables = false;
- if ( $fromcache ) {
+ if ( $fromCache ) {
wfProfileIn( __METHOD__ . '-cache' );
$this->mTables = $wgMemc->get( $this->mCacheKey );
wfProfileOut( __METHOD__ . '-cache' );
|| !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
wfProfileIn( __METHOD__ . '-recache' );
// not in cache, or we need a fresh reload.
- // we will first load the default tables
- // then update them using things in MediaWiki:Zhconversiontable/*
+ // We will first load the default tables
+ // then update them using things in MediaWiki:Conversiontable/*
$this->loadDefaultTables();
foreach ( $this->mVariants as $var ) {
$cached = $this->parseCachedTable( $var );
}
/**
- * Hook for post processig after conversion tables are loaded.
- *
+ * Hook for post processing after conversion tables are loaded.
*/
function postLoadTables() { }
$this->loadTables( false );
}
-
/**
* Parse the conversion table stored in the cache.
*
* ...
* }-
*
- * To make the tables more manageable, subpages are allowed
- * and will be parsed recursively if $recursive == true.
+ * To make the tables more manageable, subpages are allowed
+ * and will be parsed recursively if $recursive == true.
+ *
+ * @param $code String: language code
+ * @param $subpage String: subpage name
+ * @param $recursive Boolean: parse subpages recursively? Defaults to true.
*
+ * @return array
*/
function parseCachedTable( $code, $subpage = '', $recursive = true ) {
- global $wgMessageCache;
static $parsed = array();
- if ( !is_object( $wgMessageCache ) ) {
- return array();
- }
-
$key = 'Conversiontable/' . $code;
if ( $subpage ) {
$key .= '/' . $subpage;
}
if ( strpos( $code, '/' ) === false ) {
- $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
+ $txt = MessageCache::singleton()->get( 'Conversiontable', true, $code );
if ( $txt === false ) {
- # FIXME: this method doesn't seem to be expecting
+ # @todo FIXME: This method doesn't seem to be expecting
# this possible outcome...
$txt = '<Conversiontable>';
}
} else {
- $title = Title::makeTitleSafe( NS_MEDIAWIKI,
- "Conversiontable/$code" );
+ $title = Title::makeTitleSafe(
+ NS_MEDIAWIKI,
+ "Conversiontable/$code"
+ );
if ( $title && $title->exists() ) {
$article = new Article( $title );
$txt = $article->getContents();
}
// get all subpage links of the form
- // [[MediaWiki:conversiontable/zh-xx/...|...]]
+ // [[MediaWiki:Conversiontable/zh-xx/...|...]]
$linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
':Conversiontable';
$subs = StringUtils::explode( '[[', $txt );
$table = StringUtils::explode( ';', $stripped );
foreach ( $table as $t ) {
$m = explode( '=>', $t, 3 );
- if ( count( $m ) != 2 )
+ if ( count( $m ) != 2 ) {
continue;
+ }
// trim any trailling comments starting with '//'
$tt = explode( '//', $m[1], 2 );
$ret[trim( $m[0] )] = trim( $tt[0] );
* various functions in the Parser.
*
* @param $text String: text to be tagged for no conversion
- * @param $noParse Unused (?)
+ * @param $noParse Boolean: unused
* @return String: the tagged text
*/
public function markNoConversion( $text, $noParse = false ) {
/**
* Convert the sorting key for category links. This should make different
* keys that are variants of each other map to the same key.
+ *
+ * @param $key string
+ *
+ * @return string
*/
function convertCategoryKey( $key ) {
return $key;
/**
* Hook to refresh the cache of conversion tables when
- * MediaWiki:conversiontable* is updated.
+ * MediaWiki:Conversiontable* is updated.
* @private
+ *
+ * @param $article Article object
+ * @param $user Object: User object for the current user
+ * @param $text String: article text (?)
+ * @param $summary String: edit summary of the edit
+ * @param $isMinor Boolean: was the edit marked as minor?
+ * @param $isWatch Boolean: did the user watch this page or not?
+ * @param $section Unused
+ * @param $flags Bitfield
+ * @param $revision Object: new Revision object or null
+ * @return Boolean: true
*/
- function OnArticleSaveComplete( $article, $user, $text, $summary, $isminor,
- $iswatch, $section, $flags, $revision ) {
+ function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor,
+ $isWatch, $section, $flags, $revision ) {
$titleobj = $article->getTitle();
if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
$title = $titleobj->getDBkey();
/**
* Armour rendered math against conversion.
- * Wrap math into rawoutput -{R| math }- syntax.
+ * Escape special chars in parsed math text. (in most cases are img elements)
+ *
+ * @param $text String: text to armour against conversion
+ * @return String: armoured text where { and } have been converted to
+ * { and }
*/
public function armourMath( $text ) {
- // we need to convert '-{' and '}-' to '-{' and '}-'
- // to avoid a unwanted '}-' appeared after the math-image.
+ // convert '-{' and '}-' to '-{' and '}-' to prevent
+ // any unwanted markup appearing in the math image tag.
$text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) );
- $ret = "-{R|$text}-";
- return $ret;
+ return $text;
}
/**
/**
* @private
+ *
+ * @return string
*/
function getRulesDesc() {
$codesep = $this->mConverter->mDescCodeSep;
/**
* Parse rules conversion.
* @private
+ *
+ * @param $variant
+ *
+ * @return string
*/
function getRuleConvertedStr( $variant ) {
$bidtable = $this->mBidtable;
}
$vmarked[] = $v;
}
- /*for unidirectional array fill to convert tables */
+ /* for unidirectional array fill to convert tables */
if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
&& isset( $unidtable[$v] ) )
{
/**
* Parse rules and flags.
- * @public
+ * @param $variant String: variant language code
*/
- function parse( $variant = NULL ) {
+ public function parse( $variant = null ) {
if ( !$variant ) {
$variant = $this->mConverter->getPreferredVariant();
}
}
/**
- * @public
+ * @todo FIXME: code this function :)
*/
- function hasRules() {
+ public function hasRules() {
// TODO:
}
/**
* Get display text on markup -{...}-
- * @public
+ * @return string
*/
- function getDisplay() {
+ public function getDisplay() {
return $this->mRuleDisplay;
}
/**
* Get converted title.
- * @public
+ * @return string
*/
- function getTitle() {
+ public function getTitle() {
return $this->mRuleTitle;
}
/**
* Return how deal with conversion rules.
- * @public
+ * @return string
*/
- function getRulesAction() {
+ public function getRulesAction() {
return $this->mRulesAction;
}
/**
- * Get conversion table. ( bidirectional and unidirectional
- * conversion table )
- * @public
+ * Get conversion table. (bidirectional and unidirectional
+ * conversion table)
+ * @return array
*/
- function getConvTable() {
+ public function getConvTable() {
return $this->mConvTable;
}
/**
* Get conversion rules string.
- * @public
+ * @return string
*/
- function getRules() {
+ public function getRules() {
return $this->mRules;
}
/**
* Get conversion flags.
- * @public
+ * @return array
*/
- function getFlags() {
+ public function getFlags() {
return $this->mFlags;
}
}