Don't allow embedded newlines in magic links, but do allow  

author C. Scott Ananian <cscott@cscott.net>

Fri, 16 May 2014 00:35:59 +0000 (17:35 -0700)

committer Tim Starling <tstarling@wikimedia.org>

Mon, 22 Dec 2014 04:14:55 +0000 (04:14 +0000)
author C. Scott Ananian <cscott@cscott.net>
Fri, 16 May 2014 00:35:59 +0000 (17:35 -0700)
committer Tim Starling <tstarling@wikimedia.org>
Mon, 22 Dec 2014 04:14:55 +0000 (04:14 +0000)
diff --git a/RELEASE-NOTES-1.25 b/RELEASE-NOTES-1.25

index 1956eb6..9183e44 100644 (file)
--- a/RELEASE-NOTES-1.25
+++ b/RELEASE-NOTES-1.25
@@ -279,6 +279,8 @@ changes to languages because of Bugzilla reports.
     However, this difference is unlikely to arise in practice.
  * (T67278) RFC, PMID, and ISBN "magic links" must be surrounded by non-word
    characters on both sides.
+* (T30950, T31025) RFC, PMID, and ISBN "magic links" can no longer contain
+  newlines; but they can contain &nbsp; and other non-newline whitespace.
  
  == Compatibility ==
  
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php

index ecb14ed..e3a4ea5 100644 (file)
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -90,6 +90,9 @@ class Parser {
         const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
                 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
  
+       # Regular expression for a non-newline space
+       const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
+
         # State constants for the definition list colon extraction
         const COLON_STATE_TEXT = 0;
         const COLON_STATE_TAG = 1;
@@ -1389,18 +1392,22 @@ class Parser {
                 wfProfileIn( __METHOD__ );
                 $prots = wfUrlProtocolsWithoutProtRel();
                 $urlChar = self::EXT_LINK_URL_CLASS;
+               $space = self::SPACE_NOT_NL; #  non-newline space
+               $spdash = "(?:-|$space)"; # a dash or a non-newline space
+               $spaces = "$space++"; # possessive match of 1 or more spaces
                 $text = preg_replace_callback(
                         '!(?:                           # Start cases
                                 (<a[ \t\r\n>].*?</a>) |     # m[1]: Skip link text
                                 (<.*?>) |                   # m[2]: Skip stuff inside HTML elements' . "
-                               (\b(?i:$prots)$urlChar+) |  # m[3]: Free external links" . '
-                               \b(?:RFC|PMID)\s+([0-9]+)\b |# m[4]: RFC or PMID, capture number
-                               \bISBN\s+(                  # m[5]: ISBN, capture number
-                                       (?: 97[89] [\ \-]? )?   # optional 13-digit ISBN prefix
-                                       (?: [0-9]  [\ \-]? ){9} # 9 digits with opt. delimiters
+                               (\b(?i:$prots)$urlChar+) |  # m[3]: Free external links
+                               \b(?:RFC|PMID) $spaces      # m[4]: RFC or PMID, capture number
+                                       ([0-9]+)\b |
+                               \bISBN $spaces (            # m[5]: ISBN, capture number
+                                       (?: 97[89] $spdash? )?   # optional 13-digit ISBN prefix
+                                       (?: [0-9]  $spdash? ){9} # 9 digits with opt. delimiters
                                         [0-9Xx]                 # check digit
-                                       )\b
-                       )!xu', array( &$this, 'magicLinkCallback' ), $text );
+                               )\b
+                       )!xu", array( &$this, 'magicLinkCallback' ), $text );
                 wfProfileOut( __METHOD__ );
                 return $text;
         }
@@ -1441,6 +1448,8 @@ class Parser {
                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
                         # ISBN
                         $isbn = $m[5];
+                       $space = self::SPACE_NOT_NL; #  non-newline space
+                       $isbn = preg_replace( "/$space/", ' ', $isbn );
                         $num = strtr( $isbn, array(
                                 '-' => '',
                                 ' ' => '',
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt

index f7dc0a9..cf9d829 100644 (file)
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -8934,6 +8934,19 @@ This is RFC 822 but thisRFC 822 is not RFC 822linked.
  </p>
  !! end
  
+!! test
+Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
+!! wikitext
+RFC &nbsp;&#160;&#0160;&#xA0;&#Xa0; 822
+RFC
+822
+!! html
+<p><a class="external mw-magiclink-rfc" rel="nofollow" href="//tools.ietf.org/html/rfc822">RFC 822</a>
+RFC
+822
+</p>
+!! end
+
  !! test
  Magic links: ISBN (bug 1937)
  !! wikitext
@@ -8952,6 +8965,23 @@ This is ISBN 978-0-316-09811-3 but thisISBN 978-0-316-09811-3 is not ISBN 978-0-
  </p>
  !! end
  
+!! test
+Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
+!! wikitext
+ISBN &nbsp;&#160;&#0160;&#xA0;&#Xa0; 978&nbsp;0&#160;316&#0160;09811&#xA0;3
+ISBN
+9780316098113
+ISBN 978
+0316098113
+!! html
+<p><a href="/wiki/Special:BookSources/9780316098113" class="internal mw-magiclink-isbn">ISBN 978 0 316 09811 3</a>
+ISBN
+9780316098113
+ISBN 978
+0316098113
+</p>
+!! end
+
  !! test
  Magic links: PMID incorrectly converts space to underscore
  !! wikitext
@@ -8970,6 +9000,19 @@ This is PMID 1234 but thisPMID 1234 is not PMID 1234linked.
  </p>
  !! end
  
+!! test
+Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
+!! wikitext
+PMID &nbsp;&#160;&#0160;&#xA0;&#Xa0; 1234
+PMID
+1234
+!! html
+<p><a class="external mw-magiclink-pmid" rel="nofollow" href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract">PMID 1234</a>
+PMID
+1234
+</p>
+!! end
+
  ###
  ### Templates
  ####
author	C. Scott Ananian <cscott@cscott.net>
	Fri, 16 May 2014 00:35:59 +0000 (17:35 -0700)
committer	Tim Starling <tstarling@wikimedia.org>
	Mon, 22 Dec 2014 04:14:55 +0000 (04:14 +0000)
RELEASE-NOTES-1.25		patch \| blob \| history
includes/parser/Parser.php		patch \| blob \| history
tests/parser/parserTests.txt		patch \| blob \| history