From: Liangent
Date: Sat, 17 Nov 2012 19:19:47 +0000 (+0800)
Subject: Clean up Language::markNoConversion().
X-Git-Tag: 1.31.0-rc.0~21534^2
X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=e01adbfc0bd9f39adffc1f955ccc711e73818194;p=lhc%2Fweb%2Fwiklou.git
Clean up Language::markNoConversion().
* IRIs are getting more and more widely used these days so Chinese
characters are also needed to be prevented from being converted
in text of external links.
* So now all markNoConversion() functions in languages with variants
do the same thing. Merge them into a single function in the
Language class and drop implementations in individual languages.
* By the way rephrase phpdoc of that function, and (bug 24798) fix
the link detection regex to use wfUrlProtocolsWithoutProtRel().
Protocol-relative regex is excluded to avoid false positives.
* Add parser test for it.
Change-Id: I2ec0ac2b9b11221584adb72555168498de209d57
---
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 36b682ee48..f664e4908c 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -1314,7 +1314,8 @@ class Parser {
if ( $text === false ) {
# Not an image, make a link
$text = Linker::makeExternalLink( $url,
- $this->getConverterLanguage()->markNoConversion($url), true, 'free',
+ $this->getConverterLanguage()->markNoConversion( $url, true ),
+ true, 'free',
$this->getExternalLinkAttribs( $url ) );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
diff --git a/languages/Language.php b/languages/Language.php
index c4807a61d3..25ca3c2f72 100644
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -3690,15 +3690,24 @@ class Language {
}
/**
- * Enclose a string with the "no conversion" tag. This is used by
- * various functions in the Parser
+ * Prepare external link text for conversion. When the text is
+ * a URL, it shouldn't be converted, and it'll be wrapped in
+ * the "raw" tag (-{R| }-) to prevent conversion.
*
- * @param $text String: text to be tagged for no conversion
- * @param $noParse bool
+ * This function is called "markNoConversion" for historical
+ * reasons.
+ *
+ * @param $text String: text to be used for external link
+ * @param $noParse bool: wrap it without confirming it's a real URL first
* @return string the tagged text
*/
public function markNoConversion( $text, $noParse = false ) {
- return $this->mConverter->markNoConversion( $text, $noParse );
+ // Excluding protocal-relative URLs may avoid many false positives.
+ if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
+ return $this->mConverter->markNoConversion( $text );
+ } else {
+ return $text;
+ }
}
/**
diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php
index 6482070d08..0d652d43de 100644
--- a/languages/classes/LanguageGan.php
+++ b/languages/classes/LanguageGan.php
@@ -66,20 +66,6 @@ class GanConverter extends LanguageConverter {
);
}
- /**
- * there shouldn't be any latin text in Chinese conversion, so no need
- * to mark anything.
- * $noParse is there for compatibility with LanguageConvert::markNoConversion
- *
- * @param $text string
- * @param $noParse bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- return $text;
- }
-
/**
* @param $key string
* @return String
diff --git a/languages/classes/LanguageIu.php b/languages/classes/LanguageIu.php
index 79e5582634..fe5cdf8d41 100644
--- a/languages/classes/LanguageIu.php
+++ b/languages/classes/LanguageIu.php
@@ -157,21 +157,6 @@ class IuConverter extends LanguageConverter {
$link = $oldlink;
}
- /**
- * We want our external link captions to be converted in variants,
- * so we return the original text instead -{$text}-, except for URLs
- *
- * @param $text string
- * @param $noParse bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) )
- return parent::markNoConversion( $text );
- return $text;
- }
-
/**
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php
index bdaf2f4f14..6dd6959fea 100644
--- a/languages/classes/LanguageKk.php
+++ b/languages/classes/LanguageKk.php
@@ -390,21 +390,6 @@ class KkConverter extends LanguageConverter {
}
}
- /**
- * We want our external link captions to be converted in variants,
- * so we return the original text instead -{$text}-, except for URLs
- *
- * @param $text string
- * @param $noParse string|bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) )
- return parent::markNoConversion( $text );
- return $text;
- }
-
/**
* @param $key string
* @return String
diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php
index 0eac439bd2..30d98baad2 100644
--- a/languages/classes/LanguageKu.php
+++ b/languages/classes/LanguageKu.php
@@ -177,21 +177,6 @@ class KuConverter extends LanguageConverter {
$link = $oldlink;
}
- /**
- * We want our external link captions to be converted in variants,
- * so we return the original text instead -{$text}-, except for URLs
- *
- * @param $text string
- * @param $noParse bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) )
- return parent::markNoConversion( $text );
- return $text;
- }
-
/**
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
diff --git a/languages/classes/LanguageShi.php b/languages/classes/LanguageShi.php
index 4833d1c330..5ddcfdeffd 100644
--- a/languages/classes/LanguageShi.php
+++ b/languages/classes/LanguageShi.php
@@ -136,21 +136,6 @@ class ShiConverter extends LanguageConverter {
$link = $oldlink;
}
- /**
- * We want our external link captions to be converted in variants,
- * so we return the original text instead -{$text}-, except for URLs
- *
- * @param $text string
- * @param $noParse bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) )
- return parent::markNoConversion( $text );
- return $text;
- }
-
/**
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php
index b472743630..3610c1e740 100644
--- a/languages/classes/LanguageSr.php
+++ b/languages/classes/LanguageSr.php
@@ -128,21 +128,6 @@ class SrConverter extends LanguageConverter {
$link = $oldlink;
}
- /**
- * We want our external link captions to be converted in variants,
- * so we return the original text instead -{$text}-, except for URLs
- *
- * @param $text string
- * @param $noParse bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) )
- return parent::markNoConversion( $text );
- return $text;
- }
-
/**
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php
index 8bf66a30c1..04767f2d95 100644
--- a/languages/classes/LanguageZh.php
+++ b/languages/classes/LanguageZh.php
@@ -87,20 +87,6 @@ class ZhConverter extends LanguageConverter {
$this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] );
}
- /**
- * there shouldn't be any latin text in Chinese conversion, so no need
- * to mark anything.
- * $noParse is there for compatibility with LanguageConvert::markNoConversion
- *
- * @param $text string
- * @param $noParse bool
- *
- * @return string
- */
- function markNoConversion( $text, $noParse = false ) {
- return $text;
- }
-
/**
* @param $key string
* @return String
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index b8de79d809..b1c8858dcb 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -10496,6 +10496,29 @@ Nested: -{zh-hans:Hi -{zh-cn:China;zh-sg:Singapore;}-;zh-hant:Hello -{zh-tw:Taiw
!! end
+!! test
+Proper conversion of text in external links
+!! options
+language=sr variant=sr-ec
+!! input
+http://www.google.com
+gopher://www.google.com
+[http://www.google.com http://www.google.com]
+[gopher://www.google.com gopher://www.google.com]
+[https://www.google.com irc://www.google.com]
+[ftp://www.google.com www.google.com/ftp://dir]
+[//www.google.com www.google.com]
+!! result
+http://www.google.com
+gopher://www.google.com
+http://www.google.com
+gopher://www.google.com
+irc://www.google.com
+www.гоогле.Ñом/ÑÑп://диÑ
+www.гоогле.Ñом
+
+!! end
+
!! test
Do not convert roman numbers to language variants
!! options