From 48bbe8b848fd7021a0d32729b9c8c77038762fa1 Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Mon, 15 Aug 2011 12:20:00 +0000 Subject: [PATCH] (bug 30269) Strings like foobar//barfoo are linked to become foobar[//barfoo] * Introduce a boolean parameter to wfUrlProtocols() which, if set to false, will cause '//' to be dropped from the returned regex so it doesn't match protocol-relative URLs * Introduce wfUrlProtocolsWithoutProtRel() as a wrapper for wfUrlProtocols( false ). The latter should not be used directly because the former is much clearer * Use this new function in Parser::doMagicLinks() to fix the original bug. Also use it in ApiFormatBase::formatHTML() and CodeCommentLinker::link(), which probably had similar bugs --- includes/GlobalFunctions.php | 35 ++++++++++++++++++++++++++-------- includes/api/ApiFormatBase.php | 2 +- includes/parser/Parser.php | 2 +- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 193b84b6c4..68d6865712 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -469,14 +469,17 @@ function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) { /** * Returns a regular expression of url protocols * + * @param $includeProtocolRelative bool If false, remove '//' from the returned protocol list. + * DO NOT USE this directy, use wfUrlProtocolsWithoutProtRel() instead * @return String */ -function wfUrlProtocols() { +function wfUrlProtocols( $includeProtocolRelative = true ) { global $wgUrlProtocols; - static $retval = null; - if ( !is_null( $retval ) ) { - return $retval; + // Cache return values separately based on $includeProtocolRelative + static $retval = array( true => null, false => null ); + if ( !is_null( $retval[$includeProtocolRelative] ) ) { + return $retval[$includeProtocolRelative]; } // Support old-style $wgUrlProtocols strings, for backwards compatibility @@ -484,14 +487,30 @@ function wfUrlProtocols() { if ( is_array( $wgUrlProtocols ) ) { $protocols = array(); foreach ( $wgUrlProtocols as $protocol ) { - $protocols[] = preg_quote( $protocol, '/' ); + // Filter out '//' if !$includeProtocolRelative + if ( $includeProtocolRelative || $protocol !== '//' ) { + $protocols[] = preg_quote( $protocol, '/' ); + } } - $retval = implode( '|', $protocols ); + $retval[$includeProtocolRelative] = implode( '|', $protocols ); } else { - $retval = $wgUrlProtocols; + // Ignore $includeProtocolRelative in this case + // This case exists for pre-1.6 compatibility, and we can safely assume + // that '//' won't appear in a pre-1.6 config because protocol-relative + // URLs weren't supported until 1.18 + $retval[$includeProtocolRelative] = $wgUrlProtocols; } - return $retval; + return $retval[$includeProtocolRelative]; +} + +/** + * Like wfUrlProtocols(), but excludes '//' from the protocol list. Use this if + * you need a regex that matches all URL protocols but does not match protocol- + * relative URLs + */ +function wfUrlProtocolsWithoutProtRel() { + return wfUrlProtocols( false ); } /** diff --git a/includes/api/ApiFormatBase.php b/includes/api/ApiFormatBase.php index 0dd697c337..9df2c6e9c1 100644 --- a/includes/api/ApiFormatBase.php +++ b/includes/api/ApiFormatBase.php @@ -263,7 +263,7 @@ See complete documentation, or // encode all comments or tags as safe blue strings $text = preg_replace( '/\<(!--.*?--|.*?)\>/', '<\1>', $text ); // identify URLs - $protos = wfUrlProtocols(); + $protos = wfUrlProtocolsWithoutProtRel(); // This regex hacks around bug 13218 (" included in the URL) $text = preg_replace( "#(($protos).*?)(")?([ \\'\"<>\n]|<|>|")#", '\\1\\3\\4', $text ); // identify requests to api.php diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 8fe153c31d..9a511ad07a 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1230,7 +1230,7 @@ class Parser { */ function doMagicLinks( $text ) { wfProfileIn( __METHOD__ ); - $prots = $this->mUrlProtocols; + $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; $text = preg_replace_callback( '!(?: # Start cases -- 2.20.1