From d6028a1811f66a78d4ae8ca5ff62f45e4588260b Mon Sep 17 00:00:00 2001 From: Fran McCrory Date: Tue, 10 Jul 2012 14:49:02 -0400 Subject: [PATCH] (bug 34939) Handle mixed-case URL protocols in wikitext This patch marks the regex matching url protocol as being case insensitive. We will from now render links like [HTTP://ww]. Tests added. Change-Id: I706acb7a0ae194b50d2318763beae4e5e83671f3 --- RELEASE-NOTES-1.20 | 1 + includes/GlobalFunctions.php | 3 +++ includes/Sanitizer.php | 2 +- includes/Skin.php | 4 ++-- includes/api/ApiFormatBase.php | 2 +- includes/parser/Parser.php | 14 +++++++------- includes/parser/Parser_LinkHooks.php | 2 +- tests/parser/parserTests.txt | 27 +++++++++++++++++++++++++++ 8 files changed, 43 insertions(+), 12 deletions(-) diff --git a/RELEASE-NOTES-1.20 b/RELEASE-NOTES-1.20 index 783d231067..01c2f9c67f 100644 --- a/RELEASE-NOTES-1.20 +++ b/RELEASE-NOTES-1.20 @@ -141,6 +141,7 @@ upgrade PHP if you have not done so prior to upgrading MediaWiki. with auto-hide, multi-message support, and message replacement tags. * jquery.messageBox which appears to be unused by both core and extensions has been removed. +* (bug 34939) made link parsking insensitive ([HttP://]) === Bug fixes in 1.20 === * (bug 30245) Use the correct way to construct a log page title. diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 0a6051801a..98bc65fc49 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -786,6 +786,9 @@ function wfParseUrl( $url ) { return false; } + // parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase. + $bits['scheme'] = strtolower( $bits['scheme'] ); + // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) { $bits['delimiter'] = '://'; diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index a0c77da758..734c4ec9de 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -1026,7 +1026,7 @@ class Sanitizer { # Stupid hack $encValue = preg_replace_callback( - '/(' . wfUrlProtocols() . ')/', + '/((?i)' . wfUrlProtocols() . ')/', array( 'Sanitizer', 'armorLinksCallback' ), $encValue ); return $encValue; diff --git a/includes/Skin.php b/includes/Skin.php index 00eb5e871d..968f215e11 100644 --- a/includes/Skin.php +++ b/includes/Skin.php @@ -1063,7 +1063,7 @@ abstract class Skin extends ContextSource { * @return String URL */ static function makeInternalOrExternalUrl( $name ) { - if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $name ) ) { + if ( preg_match( '/^(?i:' . wfUrlProtocols() . ')/', $name ) ) { return $name; } else { return self::makeUrl( $name ); @@ -1227,7 +1227,7 @@ abstract class Skin extends ContextSource { $text = $line[1]; } - if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $link ) ) { + if ( preg_match( '/^(?i:' . wfUrlProtocols() . ')/', $link ) ) { $href = $link; // Parser::getExternalLinkAttribs won't work here because of the Namespace things diff --git a/includes/api/ApiFormatBase.php b/includes/api/ApiFormatBase.php index 54c90a6eef..8ad9b8ca35 100644 --- a/includes/api/ApiFormatBase.php +++ b/includes/api/ApiFormatBase.php @@ -271,7 +271,7 @@ See the complete documentation, // identify URLs $protos = wfUrlProtocolsWithoutProtRel(); // This regex hacks around bug 13218 (" included in the URL) - $text = preg_replace( "#(($protos).*?)(")?([ \\'\"<>\n]|<|>|")#", '\\1\\3\\4', $text ); + $text = preg_replace( "#(((?i)$protos).*?)(")?([ \\'\"<>\n]|<|>|")#", '\\1\\3\\4', $text ); // identify requests to api.php $text = preg_replace( "#api\\.php\\?[^ <\n\t]+#", '\\0', $text ); if ( $this->mHelp ) { diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 8e9444a2bd..de55de0c0f 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -207,7 +207,7 @@ class Parser { public function __construct( $conf = array() ) { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); - $this->mExtLinkBracketedRegex = '/\[((' . $this->mUrlProtocols . ')'. + $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'. self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; @@ -1187,7 +1187,7 @@ class Parser { '!(?: # Start cases (].*?) | # m[1]: Skip link text (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " - (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . ' + (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number ISBN\s+(\b # m[5]: ISBN, capture number (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix @@ -1853,7 +1853,7 @@ class Parser { # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. - if ( preg_match( '/^(?:' . $this->mUrlProtocols . ')/', $m[1] ) ) { + if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { $s .= $prefix . '[[' . $line ; wfProfileOut( __METHOD__."-misc" ); continue; @@ -2090,7 +2090,7 @@ class Parser { * @return String: less-or-more HTML with NOPARSE bits */ function armorLinks( $text ) { - return preg_replace( '/\b(' . $this->mUrlProtocols . ')/', + return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', "{$this->mUniqPrefix}NOPARSE$1", $text ); } @@ -5095,8 +5095,8 @@ class Parser { $paramName = 'no-link'; $value = true; $validated = true; - } elseif ( preg_match( "/^$prots/", $value ) ) { - if ( preg_match( "/^($prots)$chars+$/u", $value, $m ) ) { + } elseif ( preg_match( "/^(?i)$prots/", $value ) ) { + if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) { $paramName = 'link-url'; $this->mOutput->addExternalLink( $value ); if ( $this->mOptions->getExternalLinkTarget() ) { @@ -5622,7 +5622,7 @@ class Parser { # @todo FIXME: Not tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. - $text = preg_replace( '/\[(?:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); + $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); # Parse wikitext quotes (italics & bold) $text = $this->doQuotes( $text ); diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php index 9555bdb93c..6bcc324d58 100644 --- a/includes/parser/Parser_LinkHooks.php +++ b/includes/parser/Parser_LinkHooks.php @@ -226,7 +226,7 @@ class Parser_LinkHooks extends Parser { # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. - if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) { + if( preg_match('/^\b(?i:' . wfUrlProtocols() . ')/', $titleText) ) { wfProfileOut( __METHOD__ ); return $wt; } diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 453e88a86f..c41811642c 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -10343,6 +10343,33 @@ abc

!! end +!!test +Bug 34939 - Case insensitive link parsing ([HttP://]) +!! input +[HttP://MediaWiki.Org/] +!! result +

[1] +

+!! end + +!!test +Bug 34939 - Case insensitive link parsing ([HttP:// title]) +!! input +[HttP://MediaWiki.Org/ MediaWiki] +!! result +

MediaWiki +

+!! end + +!!test +Bug 34939 - Case insensitive link parsing (HttP://) +!! input +HttP://MediaWiki.Org/ +!! result +

HttP://MediaWiki.Org/ +

+!! end + TODO: more images -- 2.20.1