From a05971dfc7fdd78be02edae4855ca808dc599cda Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Wed, 23 Sep 2015 15:16:24 -0400 Subject: [PATCH] Terminate free external link on   (and numeric versions of <>) Bug: T84937 Change-Id: Ic74d8d069e08c0597c7b26755e0d942bf3a510cc --- RELEASE-NOTES-1.26 | 2 ++ includes/parser/Parser.php | 3 ++- tests/parser/parserTests.txt | 16 +++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/RELEASE-NOTES-1.26 b/RELEASE-NOTES-1.26 index d5b521e67f..aa32c9f6f0 100644 --- a/RELEASE-NOTES-1.26 +++ b/RELEASE-NOTES-1.26 @@ -203,6 +203,8 @@ changes to languages because of Phabricator reports. * DeferredUpdates::addHTMLCacheUpdate() was removed. * The default name of the 'suppress' group page has been changed from 'Project:Oversight' to 'Project:Suppress'. +* (T84937) Free external links ("autolinked" urls) will now be terminated + by   and HTML entity encodings of  , <, and >. == Compatibility == diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 677da63bd7..288e4863d0 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1474,8 +1474,9 @@ class Parser { # The characters '<' and '>' (which were escaped by # removeHTMLtags()) should not be included in # URLs, per RFC 2396. + # Make   terminate a URL as well (bug T84937) $m2 = array(); - if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { + if ( preg_match( '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { $trail = substr( $url, $m2[0][1] ) . $trail; $url = substr( $url, 0, $m2[0][1] ); } diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index c8c63f397b..4c6c9a5b31 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -4646,6 +4646,9 @@ http://example.com? http://example.com) http://example.com/url_with_(brackets) (http://example.com/url_without_brackets) +http://example.com/url_with_entity& +http://example.com/url_with_entity& +http://example.com/url_with_entity& http://example.com/url_with_entity  http://example.com/url_with_entity  http://example.com/url_with_entity  @@ -4663,12 +4666,15 @@ http://example.com/url_with_entity< http://example.com) http://example.com/url_with_(brackets) (http://example.com/url_without_brackets) -http://example.com/url_with_entity  -http://example.com/url_with_entity  -http://example.com/url_with_entity  +http://example.com/url_with_entity& +http://example.com/url_with_entity& +http://example.com/url_with_entity& +http://example.com/url_with_entity  +http://example.com/url_with_entity  +http://example.com/url_with_entity  http://example.com/url_with_entity< -http://example.com/url_with_entity%3C -http://example.com/url_with_entity%3C +http://example.com/url_with_entity< +http://example.com/url_with_entity<

!! html/parsoid

http://example.com, -- 2.20.1