From d6028a1811f66a78d4ae8ca5ff62f45e4588260b Mon Sep 17 00:00:00 2001
From: Fran McCrory
Date: Tue, 10 Jul 2012 14:49:02 -0400
Subject: [PATCH] (bug 34939) Handle mixed-case URL protocols in wikitext
This patch marks the regex matching url protocol as being case
insensitive. We will from now render links like [HTTP://ww].
Tests added.
Change-Id: I706acb7a0ae194b50d2318763beae4e5e83671f3
---
RELEASE-NOTES-1.20 | 1 +
includes/GlobalFunctions.php | 3 +++
includes/Sanitizer.php | 2 +-
includes/Skin.php | 4 ++--
includes/api/ApiFormatBase.php | 2 +-
includes/parser/Parser.php | 14 +++++++-------
includes/parser/Parser_LinkHooks.php | 2 +-
tests/parser/parserTests.txt | 27 +++++++++++++++++++++++++++
8 files changed, 43 insertions(+), 12 deletions(-)
diff --git a/RELEASE-NOTES-1.20 b/RELEASE-NOTES-1.20
index 783d231067..01c2f9c67f 100644
--- a/RELEASE-NOTES-1.20
+++ b/RELEASE-NOTES-1.20
@@ -141,6 +141,7 @@ upgrade PHP if you have not done so prior to upgrading MediaWiki.
with auto-hide, multi-message support, and message replacement tags.
* jquery.messageBox which appears to be unused by both core and extensions has
been removed.
+* (bug 34939) made link parsking insensitive ([HttP://])
=== Bug fixes in 1.20 ===
* (bug 30245) Use the correct way to construct a log page title.
diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php
index 0a6051801a..98bc65fc49 100644
--- a/includes/GlobalFunctions.php
+++ b/includes/GlobalFunctions.php
@@ -786,6 +786,9 @@ function wfParseUrl( $url ) {
return false;
}
+ // parse_url() incorrectly handles schemes case-sensitively. Convert it to lowercase.
+ $bits['scheme'] = strtolower( $bits['scheme'] );
+
// most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
$bits['delimiter'] = '://';
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index a0c77da758..734c4ec9de 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -1026,7 +1026,7 @@ class Sanitizer {
# Stupid hack
$encValue = preg_replace_callback(
- '/(' . wfUrlProtocols() . ')/',
+ '/((?i)' . wfUrlProtocols() . ')/',
array( 'Sanitizer', 'armorLinksCallback' ),
$encValue );
return $encValue;
diff --git a/includes/Skin.php b/includes/Skin.php
index 00eb5e871d..968f215e11 100644
--- a/includes/Skin.php
+++ b/includes/Skin.php
@@ -1063,7 +1063,7 @@ abstract class Skin extends ContextSource {
* @return String URL
*/
static function makeInternalOrExternalUrl( $name ) {
- if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $name ) ) {
+ if ( preg_match( '/^(?i:' . wfUrlProtocols() . ')/', $name ) ) {
return $name;
} else {
return self::makeUrl( $name );
@@ -1227,7 +1227,7 @@ abstract class Skin extends ContextSource {
$text = $line[1];
}
- if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $link ) ) {
+ if ( preg_match( '/^(?i:' . wfUrlProtocols() . ')/', $link ) ) {
$href = $link;
// Parser::getExternalLinkAttribs won't work here because of the Namespace things
diff --git a/includes/api/ApiFormatBase.php b/includes/api/ApiFormatBase.php
index 54c90a6eef..8ad9b8ca35 100644
--- a/includes/api/ApiFormatBase.php
+++ b/includes/api/ApiFormatBase.php
@@ -271,7 +271,7 @@ See the complete documentation,
// identify URLs
$protos = wfUrlProtocolsWithoutProtRel();
// This regex hacks around bug 13218 (" included in the URL)
- $text = preg_replace( "#(($protos).*?)(")?([ \\'\"<>\n]|<|>|")#", '\\1\\3\\4', $text );
+ $text = preg_replace( "#(((?i)$protos).*?)(")?([ \\'\"<>\n]|<|>|")#", '\\1\\3\\4', $text );
// identify requests to api.php
$text = preg_replace( "#api\\.php\\?[^ <\n\t]+#", '\\0', $text );
if ( $this->mHelp ) {
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 8e9444a2bd..de55de0c0f 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -207,7 +207,7 @@ class Parser {
public function __construct( $conf = array() ) {
$this->mConf = $conf;
$this->mUrlProtocols = wfUrlProtocols();
- $this->mExtLinkBracketedRegex = '/\[((' . $this->mUrlProtocols . ')'.
+ $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'.
self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
@@ -1187,7 +1187,7 @@ class Parser {
'!(?: # Start cases
(].*?) | # m[1]: Skip link text
(<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
- (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
+ (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . '
(?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
ISBN\s+(\b # m[5]: ISBN, capture number
(?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
@@ -1853,7 +1853,7 @@ class Parser {
# Don't allow internal links to pages containing
# PROTO: where PROTO is a valid URL protocol; these
# should be external links.
- if ( preg_match( '/^(?:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
+ if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
$s .= $prefix . '[[' . $line ;
wfProfileOut( __METHOD__."-misc" );
continue;
@@ -2090,7 +2090,7 @@ class Parser {
* @return String: less-or-more HTML with NOPARSE bits
*/
function armorLinks( $text ) {
- return preg_replace( '/\b(' . $this->mUrlProtocols . ')/',
+ return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
"{$this->mUniqPrefix}NOPARSE$1", $text );
}
@@ -5095,8 +5095,8 @@ class Parser {
$paramName = 'no-link';
$value = true;
$validated = true;
- } elseif ( preg_match( "/^$prots/", $value ) ) {
- if ( preg_match( "/^($prots)$chars+$/u", $value, $m ) ) {
+ } elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
+ if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );
if ( $this->mOptions->getExternalLinkTarget() ) {
@@ -5622,7 +5622,7 @@ class Parser {
# @todo FIXME: Not tolerant to blank link text
# I.E. [http://www.mediawiki.org] will render as [1] or something depending
# on how many empty links there are on the page - need to figure that out.
- $text = preg_replace( '/\[(?:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
+ $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
# Parse wikitext quotes (italics & bold)
$text = $this->doQuotes( $text );
diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php
index 9555bdb93c..6bcc324d58 100644
--- a/includes/parser/Parser_LinkHooks.php
+++ b/includes/parser/Parser_LinkHooks.php
@@ -226,7 +226,7 @@ class Parser_LinkHooks extends Parser {
# Don't allow internal links to pages containing
# PROTO: where PROTO is a valid URL protocol; these
# should be external links.
- if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) {
+ if( preg_match('/^\b(?i:' . wfUrlProtocols() . ')/', $titleText) ) {
wfProfileOut( __METHOD__ );
return $wt;
}
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 453e88a86f..c41811642c 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -10343,6 +10343,33 @@ abc
!! end
+!!test
+Bug 34939 - Case insensitive link parsing ([HttP://])
+!! input
+[HttP://MediaWiki.Org/]
+!! result
+[1]
+
+!! end
+
+!!test
+Bug 34939 - Case insensitive link parsing ([HttP:// title])
+!! input
+[HttP://MediaWiki.Org/ MediaWiki]
+!! result
+MediaWiki
+
+!! end
+
+!!test
+Bug 34939 - Case insensitive link parsing (HttP://)
+!! input
+HttP://MediaWiki.Org/
+!! result
+HttP://MediaWiki.Org/
+
+!! end
+
TODO:
more images
--
2.20.1