From 25d35fc65c54a787ea91c5326bbb2a8de486a773 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Thu, 15 May 2014 17:19:10 -0700 Subject: [PATCH] Enforce spaces around magic links (RFC, PMID, and ISBN). Ensure that there is a \b boundary before and after RFC, PMID, and ISBN links. (Previously we enforced \b boundaries only before free external links and after ISBN links.) Consistency is a good thing! In addition: * \b is not a PHP escape sequence, so you don't need to write \\b inside a string. * \b before the numeric part of an ISBN is pointless: by the structure of the regexp there will always be a space on the left and a word character (a digit) on the right. Bug: 65278 Change-Id: Ic315b988091a5c7530a8285b9249804db72e55db --- RELEASE-NOTES-1.25 | 2 ++ includes/parser/Parser.php | 8 +++---- tests/parser/parserTests.txt | 42 ++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/RELEASE-NOTES-1.25 b/RELEASE-NOTES-1.25 index 06d2d6619d..339e677f1b 100644 --- a/RELEASE-NOTES-1.25 +++ b/RELEASE-NOTES-1.25 @@ -271,6 +271,8 @@ changes to languages because of Bugzilla reports. ** In source text of the form '{$A}'{$B}' or `{$A}`{$B}`, where variable A does not exist yet variable B does, the latter may not be replaced. However, this difference is unlikely to arise in practice. +* (T67278) RFC, PMID, and ISBN "magic links" must be surrounded by non-word + characters on both sides. == Compatibility == diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 91d8a81ad4..5c8253a0c3 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1393,13 +1393,13 @@ class Parser { '!(?: # Start cases (].*?) | # m[1]: Skip link text (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " - (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' - (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number - ISBN\s+(\b # m[5]: ISBN, capture number + (\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' + \b(?:RFC|PMID)\s+([0-9]+)\b |# m[4]: RFC or PMID, capture number + \bISBN\s+( # m[5]: ISBN, capture number (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters [0-9Xx] # check digit - \b) + )\b )!xu', array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 78e1387a61..5f19e8b4ff 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -4184,6 +4184,21 @@ http://example.com/url_with_(brackets)

!! end +!! test +External links: No preceding word characters allowed (bug 65278) +!! wikitext +NOPEhttp://example.com +N0http://example.com +ok:http://example.com +ok-http://example.com +!! html +

NOPEhttp://example.com +N0http://example.com +ok:http://example.com +ok-http://example.com +

+!! end + !! test External image !! wikitext @@ -8886,6 +8901,15 @@ RFC 822

!! end +!! test +Magic links: RFC (bug 65278) +!! wikitext +This is RFC 822 but thisRFC 822 is not RFC 822linked. +!! html +

This is RFC 822 but thisRFC 822 is not RFC 822linked. +

+!! end + !! test Magic links: ISBN (bug 1937) !! wikitext @@ -8895,6 +8919,15 @@ ISBN 0-306-40615-2

!! end +!! test +Magic links: ISBN (bug 65278) +!! wikitext +This is ISBN 978-0-316-09811-3 but thisISBN 978-0-316-09811-3 is not ISBN 978-0-316-09811-3linked. +!! html +

This is ISBN 978-0-316-09811-3 but thisISBN 978-0-316-09811-3 is not ISBN 978-0-316-09811-3linked. +

+!! end + !! test Magic links: PMID incorrectly converts space to underscore !! wikitext @@ -8904,6 +8937,15 @@ PMID 1234

!! end +!! test +Magic links: PMID (bug 65278) +!! wikitext +This is PMID 1234 but thisPMID 1234 is not PMID 1234linked. +!! html +

This is PMID 1234 but thisPMID 1234 is not PMID 1234linked. +

+!! end + ### ### Templates #### -- 2.20.1