From: C. Scott Ananian Date: Thu, 8 Jan 2015 22:00:54 +0000 (-0500) Subject: Support IPv6 URLs in bracketed and auto links. X-Git-Tag: 1.31.0-rc.0~10357 X-Git-Url: https://git.cyclocoop.org/%28%28?a=commitdiff_plain;h=87eebf8dd5ec4564aa1cfca4fe7e53fbd29da3d5;p=lhc%2Fweb%2Fwiklou.git Support IPv6 URLs in bracketed and auto links. The corresponding patch for Parsoid is Ibb33188cdfe2004e469c3f6ee6f30d34d1923283. Task: T23261 Change-Id: Iff077bf31168b431febb243e2e62f2c6502616bc --- diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 387f24faa0..30981c368f 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -1809,6 +1809,11 @@ class Sanitizer { $host = preg_replace( $strip, '', $host ); + // IPv6 host names are bracketed with []. Url-decode these. + if ( substr_compare( "//%5B", $host, 0, 5 ) === 0 && preg_match( '!^//%5B(.*?)%5D((:\d+)?)$!', $host, $matches ) ) { + $host = '//[' . $matches[1] . ']' . $matches[2]; + } + // @todo FIXME: Validate hostnames here return $protocol . $host . $rest; diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 6189997a92..2eec08bf91 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -87,7 +87,11 @@ class Parser { # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; - const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) + # Simplified expression to match an IPv4 or IPv6 address, or + # at least one character of a host name (embeds EXT_LINK_URL_CLASS) + const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])'; + # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR) + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; # Regular expression for a non-newline space @@ -254,7 +258,8 @@ class Parser { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . - self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; + self::EXT_LINK_ADDR . + self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } elseif ( defined( 'HPHP_VERSION' ) ) { @@ -1378,6 +1383,7 @@ class Parser { public function doMagicLinks( $text ) { $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; + $addr = self::EXT_LINK_ADDR; $space = self::SPACE_NOT_NL; # non-newline space $spdash = "(?:-|$space)"; # a dash or a non-newline space $spaces = "$space++"; # possessive match of 1 or more spaces @@ -1386,7 +1392,7 @@ class Parser { (].*?) | # m[1]: Skip link text (<.*?>) | # m[2]: Skip stuff inside # HTML elements' . " - (\b(?i:$prots)($urlChar+)) | # m[3]: Free external links + (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links # m[4]: Post-protocol path \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number ([0-9]+)\b | @@ -1499,14 +1505,14 @@ class Parser { $url = substr( $url, 0, -$numSepChars ); } - $url = Sanitizer::cleanUrl( $url ); - # Verify that we still have a real URL after trail removal, and # not just lone protocol if ( strlen( $trail ) >= $numPostProto ) { return $url . $trail; } + $url = Sanitizer::cleanUrl( $url ); + # Is this an external image? $text = $this->maybeMakeExternalImage( $url ); if ( $text === false ) { @@ -5415,9 +5421,10 @@ class Parser { case 'gallery-internal-link': $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); $chars = self::EXT_LINK_URL_CLASS; + $addr = self::EXT_LINK_ADDR; $prots = $this->mUrlProtocols; //check to see if link matches an absolute url, if not then it must be a wiki link. - if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { + if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) { $link = $linkValue; } else { $localLinkTitle = Title::newFromText( $linkValue ); @@ -5599,13 +5606,14 @@ class Parser { break; case 'link': $chars = self::EXT_LINK_URL_CLASS; + $addr = self::EXT_LINK_ADDR; $prots = $this->mUrlProtocols; if ( $value === '' ) { $paramName = 'no-link'; $value = true; $validated = true; } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) { - if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) { + if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) { $paramName = 'link-url'; $this->mOutput->addExternalLink( $value ); if ( $this->mOptions->getExternalLinkTarget() ) { diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index ffa435c08b..f6ca577186 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -5341,14 +5341,91 @@ http://example.com/index.php?foozoid[]=bar !! end !! test -IPv6 urls (bug 21261) -!! options -disabled +IPv6 urls, autolink format (T23261) !! wikitext http://[2404:130:0:1000::187:2]/index.php + +Examples from RFC2373, section 2.2: +* http://[1080::8:800:200C:417A]/unicast +* http://[FF01::101]/multicast +* http://[::1]/loopback +* http://[::]/unspecified +* http://[::13.1.68.3]/ipv4compat +* http://[::FFFF:129.144.52.38]/ipv4compat + +Examples from RFC 2732, section 2: +* http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html +* http://[1080:0:0:0:8:800:200C:417A]/index.html +* http://[3ffe:2a00:100:7031::1] +* http://[1080::8:800:200C:417A]/foo +* http://[::192.9.5.5]/ipng +* http://[::FFFF:129.144.52.38]:80/index.html +* http://[2010:836B:4179::836B:4179] + !! html

http://[2404:130:0:1000::187:2]/index.php -

+

Examples from RFC2373, section 2.2: +

+ +

Examples from RFC 2732, section 2: +

+ + +!! end + +!! test +IPv6 urls, bracketed format (T23261) +!! wikitext +[http://[2404:130:0:1000::187:2]/index.php test] + +Examples from RFC2373, section 2.2: +* [http://[1080::8:800:200C:417A] unicast] +* [http://[FF01::101] multicast] +* [http://[::1]/ loopback] +* [http://[::] unspecified] +* [http://[::13.1.68.3] ipv4compat] +* [http://[::FFFF:129.144.52.38] ipv4compat] + +Examples from RFC 2732, section 2: +* [http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html 1] +* [http://[1080:0:0:0:8:800:200C:417A]/index.html 2] +* [http://[3ffe:2a00:100:7031::1] 3] +* [http://[1080::8:800:200C:417A]/foo 4] +* [http://[::192.9.5.5]/ipng 5] +* [http://[::FFFF:129.144.52.38]:80/index.html 6] +* [http://[2010:836B:4179::836B:4179] 7] + +!! html +

test +

Examples from RFC2373, section 2.2: +

+ +

Examples from RFC 2732, section 2: +

+ + !! end !! test