From 54a8199f870e3e7c2e95159152d295d2e63f444d Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Thu, 15 May 2014 17:35:59 -0700 Subject: [PATCH] Don't allow embedded newlines in magic links, but do allow   This continues the work started in T67278 to make magic link parsing more consistent with wiki text parsing in general, and closes two long-standing bugs. Bug: T30950 Bug: T31025 Change-Id: I71f8b337543163569c64bbfdec154eb9b69d7264 --- RELEASE-NOTES-1.25 | 2 ++ includes/parser/Parser.php | 23 +++++++++++++------ tests/parser/parserTests.txt | 43 ++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/RELEASE-NOTES-1.25 b/RELEASE-NOTES-1.25 index 1956eb6133..9183e445dd 100644 --- a/RELEASE-NOTES-1.25 +++ b/RELEASE-NOTES-1.25 @@ -279,6 +279,8 @@ changes to languages because of Bugzilla reports. However, this difference is unlikely to arise in practice. * (T67278) RFC, PMID, and ISBN "magic links" must be surrounded by non-word characters on both sides. +* (T30950, T31025) RFC, PMID, and ISBN "magic links" can no longer contain + newlines; but they can contain   and other non-newline whitespace. == Compatibility == diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index ecb14ed85b..e3a4ea57eb 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -90,6 +90,9 @@ class Parser { const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; + # Regular expression for a non-newline space + const SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})'; + # State constants for the definition list colon extraction const COLON_STATE_TEXT = 0; const COLON_STATE_TAG = 1; @@ -1389,18 +1392,22 @@ class Parser { wfProfileIn( __METHOD__ ); $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; + $space = self::SPACE_NOT_NL; # non-newline space + $spdash = "(?:-|$space)"; # a dash or a non-newline space + $spaces = "$space++"; # possessive match of 1 or more spaces $text = preg_replace_callback( '!(?: # Start cases (].*?) | # m[1]: Skip link text (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " - (\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' - \b(?:RFC|PMID)\s+([0-9]+)\b |# m[4]: RFC or PMID, capture number - \bISBN\s+( # m[5]: ISBN, capture number - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters + (\b(?i:$prots)$urlChar+) | # m[3]: Free external links + \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number + ([0-9]+)\b | + \bISBN $spaces ( # m[5]: ISBN, capture number + (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix + (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters [0-9Xx] # check digit - )\b - )!xu', array( &$this, 'magicLinkCallback' ), $text ); + )\b + )!xu", array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; } @@ -1441,6 +1448,8 @@ class Parser { } elseif ( isset( $m[5] ) && $m[5] !== '' ) { # ISBN $isbn = $m[5]; + $space = self::SPACE_NOT_NL; # non-newline space + $isbn = preg_replace( "/$space/", ' ', $isbn ); $num = strtr( $isbn, array( '-' => '', ' ' => '', diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index f7dc0a90b5..cf9d829975 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -8934,6 +8934,19 @@ This is RFC 822 but thisRFC 822 is not RFC 822linked.

!! end +!! test +Magic links: RFC (w/ non-newline whitespace, bug 28950/29025) +!! wikitext +RFC       822 +RFC +822 +!! html +

RFC 822 +RFC +822 +

+!! end + !! test Magic links: ISBN (bug 1937) !! wikitext @@ -8952,6 +8965,23 @@ This is ISBN 978-0-316-09811-3 but thisISBN 978-0-316-09811-3 is not ISBN 978-0-

!! end +!! test +Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025) +!! wikitext +ISBN       978 0 316 09811 3 +ISBN +9780316098113 +ISBN 978 +0316098113 +!! html +

ISBN 978 0 316 09811 3 +ISBN +9780316098113 +ISBN 978 +0316098113 +

+!! end + !! test Magic links: PMID incorrectly converts space to underscore !! wikitext @@ -8970,6 +9000,19 @@ This is PMID 1234 but thisPMID 1234 is not PMID 1234linked.

!! end +!! test +Magic links: PMID (w/ non-newline whitespace, bug 28950/29025) +!! wikitext +PMID       1234 +PMID +1234 +!! html +

PMID 1234 +PMID +1234 +

+!! end + ### ### Templates #### -- 2.20.1