From e01adbfc0bd9f39adffc1f955ccc711e73818194 Mon Sep 17 00:00:00 2001 From: Liangent Date: Sun, 18 Nov 2012 03:19:47 +0800 Subject: [PATCH] Clean up Language::markNoConversion(). * IRIs are getting more and more widely used these days so Chinese characters are also needed to be prevented from being converted in text of external links. * So now all markNoConversion() functions in languages with variants do the same thing. Merge them into a single function in the Language class and drop implementations in individual languages. * By the way rephrase phpdoc of that function, and (bug 24798) fix the link detection regex to use wfUrlProtocolsWithoutProtRel(). Protocol-relative regex is excluded to avoid false positives. * Add parser test for it. Change-Id: I2ec0ac2b9b11221584adb72555168498de209d57 --- includes/parser/Parser.php | 3 ++- languages/Language.php | 19 ++++++++++++++----- languages/classes/LanguageGan.php | 14 -------------- languages/classes/LanguageIu.php | 15 --------------- languages/classes/LanguageKk.php | 15 --------------- languages/classes/LanguageKu.php | 15 --------------- languages/classes/LanguageShi.php | 15 --------------- languages/classes/LanguageSr.php | 15 --------------- languages/classes/LanguageZh.php | 14 -------------- tests/parser/parserTests.txt | 23 +++++++++++++++++++++++ 10 files changed, 39 insertions(+), 109 deletions(-) diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 36b682ee48..f664e4908c 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1314,7 +1314,8 @@ class Parser { if ( $text === false ) { # Not an image, make a link $text = Linker::makeExternalLink( $url, - $this->getConverterLanguage()->markNoConversion($url), true, 'free', + $this->getConverterLanguage()->markNoConversion( $url, true ), + true, 'free', $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters diff --git a/languages/Language.php b/languages/Language.php index c4807a61d3..25ca3c2f72 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -3690,15 +3690,24 @@ class Language { } /** - * Enclose a string with the "no conversion" tag. This is used by - * various functions in the Parser + * Prepare external link text for conversion. When the text is + * a URL, it shouldn't be converted, and it'll be wrapped in + * the "raw" tag (-{R| }-) to prevent conversion. * - * @param $text String: text to be tagged for no conversion - * @param $noParse bool + * This function is called "markNoConversion" for historical + * reasons. + * + * @param $text String: text to be used for external link + * @param $noParse bool: wrap it without confirming it's a real URL first * @return string the tagged text */ public function markNoConversion( $text, $noParse = false ) { - return $this->mConverter->markNoConversion( $text, $noParse ); + // Excluding protocal-relative URLs may avoid many false positives. + if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) { + return $this->mConverter->markNoConversion( $text ); + } else { + return $text; + } } /** diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php index 6482070d08..0d652d43de 100644 --- a/languages/classes/LanguageGan.php +++ b/languages/classes/LanguageGan.php @@ -66,20 +66,6 @@ class GanConverter extends LanguageConverter { ); } - /** - * there shouldn't be any latin text in Chinese conversion, so no need - * to mark anything. - * $noParse is there for compatibility with LanguageConvert::markNoConversion - * - * @param $text string - * @param $noParse bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - return $text; - } - /** * @param $key string * @return String diff --git a/languages/classes/LanguageIu.php b/languages/classes/LanguageIu.php index 79e5582634..fe5cdf8d41 100644 --- a/languages/classes/LanguageIu.php +++ b/languages/classes/LanguageIu.php @@ -157,21 +157,6 @@ class IuConverter extends LanguageConverter { $link = $oldlink; } - /** - * We want our external link captions to be converted in variants, - * so we return the original text instead -{$text}-, except for URLs - * - * @param $text string - * @param $noParse bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) ) - return parent::markNoConversion( $text ); - return $text; - } - /** * An ugly function wrapper for parsing Image titles * (to prevent image name conversion) diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php index bdaf2f4f14..6dd6959fea 100644 --- a/languages/classes/LanguageKk.php +++ b/languages/classes/LanguageKk.php @@ -390,21 +390,6 @@ class KkConverter extends LanguageConverter { } } - /** - * We want our external link captions to be converted in variants, - * so we return the original text instead -{$text}-, except for URLs - * - * @param $text string - * @param $noParse string|bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) ) - return parent::markNoConversion( $text ); - return $text; - } - /** * @param $key string * @return String diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php index 0eac439bd2..30d98baad2 100644 --- a/languages/classes/LanguageKu.php +++ b/languages/classes/LanguageKu.php @@ -177,21 +177,6 @@ class KuConverter extends LanguageConverter { $link = $oldlink; } - /** - * We want our external link captions to be converted in variants, - * so we return the original text instead -{$text}-, except for URLs - * - * @param $text string - * @param $noParse bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) ) - return parent::markNoConversion( $text ); - return $text; - } - /** * An ugly function wrapper for parsing Image titles * (to prevent image name conversion) diff --git a/languages/classes/LanguageShi.php b/languages/classes/LanguageShi.php index 4833d1c330..5ddcfdeffd 100644 --- a/languages/classes/LanguageShi.php +++ b/languages/classes/LanguageShi.php @@ -136,21 +136,6 @@ class ShiConverter extends LanguageConverter { $link = $oldlink; } - /** - * We want our external link captions to be converted in variants, - * so we return the original text instead -{$text}-, except for URLs - * - * @param $text string - * @param $noParse bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) ) - return parent::markNoConversion( $text ); - return $text; - } - /** * An ugly function wrapper for parsing Image titles * (to prevent image name conversion) diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php index b472743630..3610c1e740 100644 --- a/languages/classes/LanguageSr.php +++ b/languages/classes/LanguageSr.php @@ -128,21 +128,6 @@ class SrConverter extends LanguageConverter { $link = $oldlink; } - /** - * We want our external link captions to be converted in variants, - * so we return the original text instead -{$text}-, except for URLs - * - * @param $text string - * @param $noParse bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - if ( $noParse || preg_match( "/^https?:\/\/|ftp:\/\/|irc:\/\//", $text ) ) - return parent::markNoConversion( $text ); - return $text; - } - /** * An ugly function wrapper for parsing Image titles * (to prevent image name conversion) diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php index 8bf66a30c1..04767f2d95 100644 --- a/languages/classes/LanguageZh.php +++ b/languages/classes/LanguageZh.php @@ -87,20 +87,6 @@ class ZhConverter extends LanguageConverter { $this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] ); } - /** - * there shouldn't be any latin text in Chinese conversion, so no need - * to mark anything. - * $noParse is there for compatibility with LanguageConvert::markNoConversion - * - * @param $text string - * @param $noParse bool - * - * @return string - */ - function markNoConversion( $text, $noParse = false ) { - return $text; - } - /** * @param $key string * @return String diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index b8de79d809..b1c8858dcb 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -10496,6 +10496,29 @@ Nested: -{zh-hans:Hi -{zh-cn:China;zh-sg:Singapore;}-;zh-hant:Hello -{zh-tw:Taiw

!! end +!! test +Proper conversion of text in external links +!! options +language=sr variant=sr-ec +!! input +http://www.google.com +gopher://www.google.com +[http://www.google.com http://www.google.com] +[gopher://www.google.com gopher://www.google.com] +[https://www.google.com irc://www.google.com] +[ftp://www.google.com www.google.com/ftp://dir] +[//www.google.com www.google.com] +!! result +

http://www.google.com +gopher://www.google.com +http://www.google.com +gopher://www.google.com +irc://www.google.com +www.гоогле.цом/фтп://дир +www.гоогле.цом +

+!! end + !! test Do not convert roman numbers to language variants !! options -- 2.20.1