From 54a8199f870e3e7c2e95159152d295d2e63f444d Mon Sep 17 00:00:00 2001
From: "C. Scott Ananian"
Date: Thu, 15 May 2014 17:35:59 -0700
Subject: [PATCH] Don't allow embedded newlines in magic links, but do allow
This continues the work started in T67278 to make magic link parsing
more consistent with wiki text parsing in general, and closes two
long-standing bugs.
Bug: T30950
Bug: T31025
Change-Id: I71f8b337543163569c64bbfdec154eb9b69d7264
---
RELEASE-NOTES-1.25 | 2 ++
includes/parser/Parser.php | 23 +++++++++++++------
tests/parser/parserTests.txt | 43 ++++++++++++++++++++++++++++++++++++
3 files changed, 61 insertions(+), 7 deletions(-)
diff --git a/RELEASE-NOTES-1.25 b/RELEASE-NOTES-1.25
index 1956eb6133..9183e445dd 100644
--- a/RELEASE-NOTES-1.25
+++ b/RELEASE-NOTES-1.25
@@ -279,6 +279,8 @@ changes to languages because of Bugzilla reports.
However, this difference is unlikely to arise in practice.
* (T67278) RFC, PMID, and ISBN "magic links" must be surrounded by non-word
characters on both sides.
+* (T30950, T31025) RFC, PMID, and ISBN "magic links" can no longer contain
+ newlines; but they can contain and other non-newline whitespace.
== Compatibility ==
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index ecb14ed85b..e3a4ea57eb 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -90,6 +90,9 @@ class Parser {
const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
+ # Regular expression for a non-newline space
+ const SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
+
# State constants for the definition list colon extraction
const COLON_STATE_TEXT = 0;
const COLON_STATE_TAG = 1;
@@ -1389,18 +1392,22 @@ class Parser {
wfProfileIn( __METHOD__ );
$prots = wfUrlProtocolsWithoutProtRel();
$urlChar = self::EXT_LINK_URL_CLASS;
+ $space = self::SPACE_NOT_NL; # non-newline space
+ $spdash = "(?:-|$space)"; # a dash or a non-newline space
+ $spaces = "$space++"; # possessive match of 1 or more spaces
$text = preg_replace_callback(
'!(?: # Start cases
(].*?) | # m[1]: Skip link text
(<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
- (\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . '
- \b(?:RFC|PMID)\s+([0-9]+)\b |# m[4]: RFC or PMID, capture number
- \bISBN\s+( # m[5]: ISBN, capture number
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
+ (\b(?i:$prots)$urlChar+) | # m[3]: Free external links
+ \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number
+ ([0-9]+)\b |
+ \bISBN $spaces ( # m[5]: ISBN, capture number
+ (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
+ (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
[0-9Xx] # check digit
- )\b
- )!xu', array( &$this, 'magicLinkCallback' ), $text );
+ )\b
+ )!xu", array( &$this, 'magicLinkCallback' ), $text );
wfProfileOut( __METHOD__ );
return $text;
}
@@ -1441,6 +1448,8 @@ class Parser {
} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# ISBN
$isbn = $m[5];
+ $space = self::SPACE_NOT_NL; # non-newline space
+ $isbn = preg_replace( "/$space/", ' ', $isbn );
$num = strtr( $isbn, array(
'-' => '',
' ' => '',
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index f7dc0a90b5..cf9d829975 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -8934,6 +8934,19 @@ This is RFC 822 but thisRFC 822 is not RFC 822linked.
!! end
+!! test
+Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
+!! wikitext
+RFC 822
+RFC
+822
+!! html
+RFC 822
+RFC
+822
+
+!! end
+
!! test
Magic links: ISBN (bug 1937)
!! wikitext
@@ -8952,6 +8965,23 @@ This is ISBN 978-0-316-09811-3 but thisISBN 978-0-316-09811-3 is not ISBN 978-0-
!! end
+!! test
+Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
+!! wikitext
+ISBN 978 0 316 09811 3
+ISBN
+9780316098113
+ISBN 978
+0316098113
+!! html
+ISBN 978 0 316 09811 3
+ISBN
+9780316098113
+ISBN 978
+0316098113
+
+!! end
+
!! test
Magic links: PMID incorrectly converts space to underscore
!! wikitext
@@ -8970,6 +9000,19 @@ This is PMID 1234 but thisPMID 1234 is not PMID 1234linked.
!! end
+!! test
+Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
+!! wikitext
+PMID 1234
+PMID
+1234
+!! html
+PMID 1234
+PMID
+1234
+
+!! end
+
###
### Templates
####
--
2.20.1