Merge "Do not ignore the 'Prevent this user from editing his own talk page while...
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 27 Nov 2018 16:28:01 +0000 (16:28 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 27 Nov 2018 16:28:01 +0000 (16:28 +0000)
includes/parser/Parser.php
includes/parser/Sanitizer.php
includes/specials/forms/PreferencesFormOOUI.php
languages/i18n/en.json
languages/i18n/qqq.json
tests/parser/parserTests.txt

index 11825fa..81e23ad 100644 (file)
@@ -5066,9 +5066,10 @@ class Parser {
                        $ig->setShowFilename( false );
                }
                if ( isset( $params['caption'] ) ) {
-                       $caption = $params['caption'];
-                       $caption = htmlspecialchars( $caption );
-                       $caption = $this->replaceInternalLinks( $caption );
+                       // NOTE: We aren't passing a frame here or below.  Frame info
+                       // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
+                       // See T107332#4030581
+                       $caption = $this->recursiveTagParse( $params['caption'] );
                        $ig->setCaptionHtml( $caption );
                }
                if ( isset( $params['perrow'] ) ) {
@@ -5524,6 +5525,40 @@ class Parser {
                # that are later expanded to html- so expand them now and
                # remove the tags
                $tooltip = $this->mStripState->unstripBoth( $tooltip );
+               # Compatibility hack!  In HTML certain entity references not terminated
+               # by a semicolon are decoded (but not if we're in an attribute; that's
+               # how link URLs get away without properly escaping & in queries).
+               # But wikitext has always required semicolon-termination of entities,
+               # so encode & where needed to avoid decode of semicolon-less entities.
+               # See T209236 and
+               # https://www.w3.org/TR/html5/syntax.html#named-character-references
+               # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
+               $tooltip = preg_replace( "/
+                       &                       # 1. entity prefix
+                       (?=                     # 2. followed by:
+                       (?:                     #  a. one of the legacy semicolon-less named entities
+                               A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
+                               C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
+                               GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
+                               O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
+                               U(?:acute|circ|grave|uml)|Yacute|
+                               a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
+                               c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
+                               divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
+                               frac(?:1(?:2|4)|34)|
+                               gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
+                               i(?:acute|circ|excl|grave|quest|uml)|laquo|
+                               lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
+                               m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
+                               not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
+                               o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
+                               p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
+                               s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
+                               u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
+                       )
+                       (?:[^;]|$))     #  b. and not followed by a semicolon
+                       # S = study, for efficiency
+                       /Sx", '&amp;', $tooltip );
                $tooltip = Sanitizer::stripAllTags( $tooltip );
 
                return $tooltip;
index 84f8083..f8c3bc2 100644 (file)
@@ -349,18 +349,18 @@ class Sanitizer {
 
        /**
         * Regular expression to match HTML/XML attribute pairs within a tag.
-        * Allows some... latitude. Based on,
-        * https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
-        * Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
+        * Based on https://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+        * Used in Sanitizer::decodeTagAttributes
         * @return string
         */
        static function getAttribsRegex() {
                if ( self::$attribsRegex === null ) {
-                       $attribFirst = "[:_\p{L}\p{N}]";
-                       $attrib = "[:_\.\-\p{L}\p{N}]";
-                       $space = '[\x09\x0a\x0c\x0d\x20]';
+                       $spaceChars = '\x09\x0a\x0c\x0d\x20';
+                       $space = "[{$spaceChars}]";
+                       $attrib = "[^{$spaceChars}\/>=]";
+                       $attribFirst = "(?:{$attrib}|=)";
                        self::$attribsRegex =
-                               "/(?:^|$space)({$attribFirst}{$attrib}*)
+                               "/({$attribFirst}{$attrib}*)
                                        ($space*=$space*
                                        (?:
                                                # The attribute value: quoted or alone
@@ -368,11 +368,29 @@ class Sanitizer {
                                                | '([^']*)(?:'|\$)
                                                | (((?!$space|>).)*)
                                        )
-                               )?(?=$space|\$)/sxu";
+                               )?/sxu";
                }
                return self::$attribsRegex;
        }
 
+       /**
+        * Lazy-initialised attribute name regex, see getAttribNameRegex()
+        */
+       private static $attribNameRegex;
+
+       /**
+        * Used in Sanitizer::decodeTagAttributes to filter attributes.
+        * @return string
+        */
+       static function getAttribNameRegex() {
+               if ( self::$attribNameRegex === null ) {
+                       $attribFirst = "[:_\p{L}\p{N}]";
+                       $attrib = "[:_\.\-\p{L}\p{N}]";
+                       self::$attribNameRegex = "/^({$attribFirst}{$attrib}*)$/sxu";
+               }
+               return self::$attribNameRegex;
+       }
+
        /**
         * Return the various lists of recognized tags
         * @param array $extratags For any extra tags to include
@@ -1434,18 +1452,24 @@ class Sanitizer {
                        return [];
                }
 
-               $attribs = [];
                $pairs = [];
                if ( !preg_match_all(
                        self::getAttribsRegex(),
                        $text,
                        $pairs,
                        PREG_SET_ORDER ) ) {
-                       return $attribs;
+                       return [];
                }
 
+               $attribs = [];
                foreach ( $pairs as $set ) {
                        $attribute = strtolower( $set[1] );
+
+                       // Filter attribute names with unacceptable characters
+                       if ( !preg_match( self::getAttribNameRegex(), $attribute ) ) {
+                               continue;
+                       }
+
                        $value = self::getTagAttributeCallback( $set );
 
                        // Normalize whitespace
index bf4d9af..81abf1c 100644 (file)
@@ -227,8 +227,7 @@ class PreferencesFormOOUI extends OOUIHTMLForm {
         * @return string
         */
        function getLegend( $key ) {
-               $aliasKey = ( $key === 'optoutwatchlist' || $key === 'optoutrc' ) ? 'opt-out' : $key;
-               $legend = parent::getLegend( $aliasKey );
+               $legend = parent::getLegend( $key );
                Hooks::run( 'PreferencesGetLegend', [ $this, $key, &$legend ] );
                return $legend;
        }
index 72e6716..b2a755d 100644 (file)
        "prefs-editor": "Editor",
        "prefs-preview": "Preview",
        "prefs-advancedrc": "Advanced options",
-       "prefs-opt-out": "Opt out of improvements",
        "prefs-advancedrendering": "Advanced options",
        "prefs-advancedsearchoptions": "Advanced options",
        "prefs-advancedwatchlist": "Advanced options",
index 182dc42..26fed94 100644 (file)
        "prefs-editor": "Used in [[Special:Preferences]], tab \"Editing\" ({{int:prefs-editing}}).\n\n{{Identical|Editor}}",
        "prefs-preview": "Used in [[Special:Preferences]], tab \"Editing\".\n{{Identical|Preview}}",
        "prefs-advancedrc": "Used in [[Special:Preferences]], tab \"Recent changes\".\n{{Identical|Advanced options}}",
-       "prefs-opt-out": "Used in [[Special:Preferences]], tabs \"Recent changes\" and \"Watchlist\".",
        "prefs-advancedrendering": "Used in [[Special:Preferences]], tab \"Appearence\".\n{{Identical|Advanced options}}",
        "prefs-advancedsearchoptions": "Used in [[Special:Preferences]], tab \"Search options\".\n{{Identical|Advanced options}}",
        "prefs-advancedwatchlist": "Used in [[Special:Preferences]], tab \"Watchlist\".\n{{Identical|Advanced options}}",
index d2fbd8d..fdf1d5b 100644 (file)
@@ -6273,8 +6273,6 @@ parsoid=wt2html
 
 !! end
 
-# Note that the PHP parser output appears to be broken when the table
-# end tag is not separated by a space from the style attribute
 !! test
 A table with stray table end tags on start tag line (wt2html)
 !! options
@@ -6294,13 +6292,13 @@ parsoid=wt2html
 |foo
 |}
 !! html/php+tidy
-<table style="&quot;color:">
+<table style="color: red;">
 
 </table><table style="color: red;">
 <tbody><tr>
 <td>foo
 </td></tr></tbody></table>
-<table style="&quot;color:" id="foo">
+<table style="color: red;" id="foo">
 <tbody><tr>
 <td>foo
 </td></tr></tbody></table>
@@ -9652,17 +9650,14 @@ Handling html with a div self-closing tag
 <div title=bar />
 <div title=bar/>
 <div title=bar/ >
-!! html/php
-<p>&lt;div title /&gt;
-&lt;div title/&gt;
-</p>
-<div>
-<p>&lt;div title=bar /&gt;
-&lt;div title=bar/&gt;
-</p>
-<div title="bar/"></div>
-</div>
-
+!! html/php+tidy
+<div title=""></div>
+<div title=""></div>
+<div title="">
+<div title="bar"></div>
+<div title="bar"></div>
+<div title="bar/">
+</div></div>
 !! html/parsoid
 <div title="" data-parsoid='{"stx":"html","selfClose":true}'></div>
 <div title="" data-parsoid='{"stx":"html","selfClose":true}'></div>
@@ -9703,10 +9698,10 @@ Handling html with a br self-closing tag
 <br title=bar />
 <br title=bar/>
 <br title=bar/ >
-!! html/php
+!! html/php+tidy
 <p><br title="" />
 <br title="" />
-<br />
+<br title="" />
 <br title="bar" />
 <br title="bar" />
 <br title="bar/" />
@@ -9721,6 +9716,18 @@ Handling html with a br self-closing tag
 </p>
 !! end
 
+!! test
+Quoted attributes without spaces
+!! options
+parsoid=wt2html
+!! wikitext
+<div class="foo"style="color:red">red</div>
+!! html/php+tidy
+<div class="foo" style="color:red">red</div>
+!! html/parsoid
+<div class="foo" style="color:red">red</div>
+!! end
+
 !! test
 Horizontal ruler (should it add that extra space?)
 !! wikitext
@@ -15476,6 +15483,37 @@ File:Foobar.jpg|link=Foo<nowiki>''s_bar''</nowiki>s|caption
 </ul>
 !! end
 
+!! test
+HTML entity prefix in link markup (T209236)
+!! wikitext
+[[File:Foobar.jpg|link=https://example.com?foo&params=bar]]
+
+<!-- consistency with gallery extension -->
+<gallery>
+File:Foobar.jpg|link=https://example.com?foo&params=bar
+</gallery>
+!! html/php+tidy
+<p><a href="https://example.com?foo&amp;params=bar" rel="nofollow"><img alt="Foobar.jpg" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220" /></a>
+</p>
+<ul class="gallery mw-gallery-traditional">
+               <li class="gallerybox" style="width: 155px"><div style="width: 155px">
+                       <div class="thumb" style="width: 150px;"><div style="margin:68px auto;"><a href="https://example.com?foo&amp;params=bar"><img alt="Foobar.jpg" src="http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" width="120" height="14" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/240px-Foobar.jpg 2x" /></a></div></div>
+                       <div class="gallerytext">
+                       </div>
+               </div></li>
+</ul>
+!! html/parsoid
+<p><figure-inline class="mw-default-size" typeof="mw:Image"><a href="https://example.com?foo&amp;params=bar"><img resource="./File:Foobar.jpg" src="//example.com/images/3/3a/Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="220" width="1941"/></a></figure-inline></p>
+
+<!-- consistency with gallery extension -->
+<ul class="gallery mw-gallery-traditional" typeof="mw:Extension/gallery" data-mw='{"name":"gallery","attrs":{},"body":{"extsrc":"\nFile:Foobar.jpg|link=https://example.com?foo&amp;params=bar\n"}}'>
+<li class="gallerybox">
+<div class="thumb"><figure-inline typeof="mw:Image"><a href="https://example.com?foo&amp;params=bar"><img resource="./File:Foobar.jpg" src="//example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="14" width="120"/></a></figure-inline></div>
+<div class="gallerytext"></div>
+</li>
+</ul>
+!! end
+
 !! test
 Image with table with attributes in caption
 !! options
@@ -18182,8 +18220,7 @@ HTML tag with leading space is parsed as text
 </p>
 !! end
 
-## Don't expect Parsoid and PHP to match, since PHP isn't exactly following
-## the HTML5 parsing spec.
+## FIXME: The untrimmed attribute in Parsoid is T205737
 !! test
 Element with broken attribute syntax
 !! options
@@ -18192,7 +18229,7 @@ parsoid=wt2html
 <div style=" style="123">hi</div>
 <div =>ho</div>
 !! html/php
-<div style="123">hi</div>
+<div style="style=">hi</div>
 <div>ho</div>
 
 !! html/parsoid
@@ -21526,7 +21563,6 @@ image:foobar.jpg|link=Main Page#section|caption
 </ul>
 !! end
 
-## Whoops, Parsoid shouldn't be parsing templates in the attribute caption!
 !! test
 Gallery with template inside caption
 !! options
@@ -21539,7 +21575,7 @@ File:Foobar.jpg|{{echo|ho}}
 </gallery>
 !! html/php
 <ul class="gallery mw-gallery-traditional">
-       <li class='gallerycaption'>{{echo|hi}}</li>
+       <li class='gallerycaption'>hi</li>
                <li class="gallerybox" style="width: 155px"><div style="width: 155px">
                        <div class="thumb" style="width: 150px;"><div style="margin:68px auto;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" width="120" height="14" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/240px-Foobar.jpg 2x" /></a></div></div>
                        <div class="gallerytext">
@@ -21556,6 +21592,30 @@ File:Foobar.jpg|{{echo|ho}}
 </ul>
 !! end
 
+!! test
+Gallery with wikitext inside gallery caption
+!! wikitext
+<gallery caption="# List item
+
+Text '''bold''' [[link]] {{ns:-1}}
+
+[[File:Foobar.jpg|thumb|File in gallery caption]]">
+File:Foobar.jpg|Image caption
+</gallery>
+!! html/php
+<ul class="gallery mw-gallery-traditional">
+       <li class='gallerycaption'># List item Text <b>bold</b> <a href="/index.php?title=Link&amp;action=edit&amp;redlink=1" class="new" title="Link (page does not exist)">link</a> Special <div class="thumb tright"><div class="thumbinner" style="width:182px;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" width="180" height="20" class="thumbimage" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/270px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/360px-Foobar.jpg 2x" /></a>  <div class="thumbcaption"><div class="magnify"><a href="/wiki/File:Foobar.jpg" class="internal" title="Enlarge"></a></div>File in gallery caption</div></div></div></li>
+               <li class="gallerybox" style="width: 155px"><div style="width: 155px">
+                       <div class="thumb" style="width: 150px;"><div style="margin:68px auto;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" width="120" height="14" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/240px-Foobar.jpg 2x" /></a></div></div>
+                       <div class="gallerytext">
+<p>Image caption
+</p>
+                       </div>
+               </div></li>
+</ul>
+
+!! end
+
 !! test
 Gallery with wikitext inside caption
 !! options