$ig->setShowFilename( false );
}
if ( isset( $params['caption'] ) ) {
- $caption = $params['caption'];
- $caption = htmlspecialchars( $caption );
- $caption = $this->replaceInternalLinks( $caption );
+ // NOTE: We aren't passing a frame here or below. Frame info
+ // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
+ // See T107332#4030581
+ $caption = $this->recursiveTagParse( $params['caption'] );
$ig->setCaptionHtml( $caption );
}
if ( isset( $params['perrow'] ) ) {
# that are later expanded to html- so expand them now and
# remove the tags
$tooltip = $this->mStripState->unstripBoth( $tooltip );
+ # Compatibility hack! In HTML certain entity references not terminated
+ # by a semicolon are decoded (but not if we're in an attribute; that's
+ # how link URLs get away without properly escaping & in queries).
+ # But wikitext has always required semicolon-termination of entities,
+ # so encode & where needed to avoid decode of semicolon-less entities.
+ # See T209236 and
+ # https://www.w3.org/TR/html5/syntax.html#named-character-references
+ # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
+ $tooltip = preg_replace( "/
+ & # 1. entity prefix
+ (?= # 2. followed by:
+ (?: # a. one of the legacy semicolon-less named entities
+ A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
+ C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
+ GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
+ O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
+ U(?:acute|circ|grave|uml)|Yacute|
+ a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
+ c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
+ divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
+ frac(?:1(?:2|4)|34)|
+ gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
+ i(?:acute|circ|excl|grave|quest|uml)|laquo|
+ lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
+ m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
+ not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
+ o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
+ p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
+ s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
+ u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
+ )
+ (?:[^;]|$)) # b. and not followed by a semicolon
+ # S = study, for efficiency
+ /Sx", '&', $tooltip );
$tooltip = Sanitizer::stripAllTags( $tooltip );
return $tooltip;
/**
* Regular expression to match HTML/XML attribute pairs within a tag.
- * Allows some... latitude. Based on,
- * https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
- * Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
+ * Based on https://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+ * Used in Sanitizer::decodeTagAttributes
* @return string
*/
static function getAttribsRegex() {
if ( self::$attribsRegex === null ) {
- $attribFirst = "[:_\p{L}\p{N}]";
- $attrib = "[:_\.\-\p{L}\p{N}]";
- $space = '[\x09\x0a\x0c\x0d\x20]';
+ $spaceChars = '\x09\x0a\x0c\x0d\x20';
+ $space = "[{$spaceChars}]";
+ $attrib = "[^{$spaceChars}\/>=]";
+ $attribFirst = "(?:{$attrib}|=)";
self::$attribsRegex =
- "/(?:^|$space)({$attribFirst}{$attrib}*)
+ "/({$attribFirst}{$attrib}*)
($space*=$space*
(?:
# The attribute value: quoted or alone
| '([^']*)(?:'|\$)
| (((?!$space|>).)*)
)
- )?(?=$space|\$)/sxu";
+ )?/sxu";
}
return self::$attribsRegex;
}
+ /**
+ * Lazy-initialised attribute name regex, see getAttribNameRegex()
+ */
+ private static $attribNameRegex;
+
+ /**
+ * Used in Sanitizer::decodeTagAttributes to filter attributes.
+ * @return string
+ */
+ static function getAttribNameRegex() {
+ if ( self::$attribNameRegex === null ) {
+ $attribFirst = "[:_\p{L}\p{N}]";
+ $attrib = "[:_\.\-\p{L}\p{N}]";
+ self::$attribNameRegex = "/^({$attribFirst}{$attrib}*)$/sxu";
+ }
+ return self::$attribNameRegex;
+ }
+
/**
* Return the various lists of recognized tags
* @param array $extratags For any extra tags to include
return [];
}
- $attribs = [];
$pairs = [];
if ( !preg_match_all(
self::getAttribsRegex(),
$text,
$pairs,
PREG_SET_ORDER ) ) {
- return $attribs;
+ return [];
}
+ $attribs = [];
foreach ( $pairs as $set ) {
$attribute = strtolower( $set[1] );
+
+ // Filter attribute names with unacceptable characters
+ if ( !preg_match( self::getAttribNameRegex(), $attribute ) ) {
+ continue;
+ }
+
$value = self::getTagAttributeCallback( $set );
// Normalize whitespace
* @return string
*/
function getLegend( $key ) {
- $aliasKey = ( $key === 'optoutwatchlist' || $key === 'optoutrc' ) ? 'opt-out' : $key;
- $legend = parent::getLegend( $aliasKey );
+ $legend = parent::getLegend( $key );
Hooks::run( 'PreferencesGetLegend', [ $this, $key, &$legend ] );
return $legend;
}
"prefs-editor": "Editor",
"prefs-preview": "Preview",
"prefs-advancedrc": "Advanced options",
- "prefs-opt-out": "Opt out of improvements",
"prefs-advancedrendering": "Advanced options",
"prefs-advancedsearchoptions": "Advanced options",
"prefs-advancedwatchlist": "Advanced options",
"prefs-editor": "Used in [[Special:Preferences]], tab \"Editing\" ({{int:prefs-editing}}).\n\n{{Identical|Editor}}",
"prefs-preview": "Used in [[Special:Preferences]], tab \"Editing\".\n{{Identical|Preview}}",
"prefs-advancedrc": "Used in [[Special:Preferences]], tab \"Recent changes\".\n{{Identical|Advanced options}}",
- "prefs-opt-out": "Used in [[Special:Preferences]], tabs \"Recent changes\" and \"Watchlist\".",
"prefs-advancedrendering": "Used in [[Special:Preferences]], tab \"Appearence\".\n{{Identical|Advanced options}}",
"prefs-advancedsearchoptions": "Used in [[Special:Preferences]], tab \"Search options\".\n{{Identical|Advanced options}}",
"prefs-advancedwatchlist": "Used in [[Special:Preferences]], tab \"Watchlist\".\n{{Identical|Advanced options}}",
!! end
-# Note that the PHP parser output appears to be broken when the table
-# end tag is not separated by a space from the style attribute
!! test
A table with stray table end tags on start tag line (wt2html)
!! options
|foo
|}
!! html/php+tidy
-<table style=""color:">
+<table style="color: red;">
</table><table style="color: red;">
<tbody><tr>
<td>foo
</td></tr></tbody></table>
-<table style=""color:" id="foo">
+<table style="color: red;" id="foo">
<tbody><tr>
<td>foo
</td></tr></tbody></table>
<div title=bar />
<div title=bar/>
<div title=bar/ >
-!! html/php
-<p><div title />
-<div title/>
-</p>
-<div>
-<p><div title=bar />
-<div title=bar/>
-</p>
-<div title="bar/"></div>
-</div>
-
+!! html/php+tidy
+<div title=""></div>
+<div title=""></div>
+<div title="">
+<div title="bar"></div>
+<div title="bar"></div>
+<div title="bar/">
+</div></div>
!! html/parsoid
<div title="" data-parsoid='{"stx":"html","selfClose":true}'></div>
<div title="" data-parsoid='{"stx":"html","selfClose":true}'></div>
<br title=bar />
<br title=bar/>
<br title=bar/ >
-!! html/php
+!! html/php+tidy
<p><br title="" />
<br title="" />
-<br />
+<br title="" />
<br title="bar" />
<br title="bar" />
<br title="bar/" />
</p>
!! end
+!! test
+Quoted attributes without spaces
+!! options
+parsoid=wt2html
+!! wikitext
+<div class="foo"style="color:red">red</div>
+!! html/php+tidy
+<div class="foo" style="color:red">red</div>
+!! html/parsoid
+<div class="foo" style="color:red">red</div>
+!! end
+
!! test
Horizontal ruler (should it add that extra space?)
!! wikitext
</ul>
!! end
+!! test
+HTML entity prefix in link markup (T209236)
+!! wikitext
+[[File:Foobar.jpg|link=https://example.com?foo¶ms=bar]]
+
+<!-- consistency with gallery extension -->
+<gallery>
+File:Foobar.jpg|link=https://example.com?foo¶ms=bar
+</gallery>
+!! html/php+tidy
+<p><a href="https://example.com?foo&params=bar" rel="nofollow"><img alt="Foobar.jpg" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220" /></a>
+</p>
+<ul class="gallery mw-gallery-traditional">
+ <li class="gallerybox" style="width: 155px"><div style="width: 155px">
+ <div class="thumb" style="width: 150px;"><div style="margin:68px auto;"><a href="https://example.com?foo&params=bar"><img alt="Foobar.jpg" src="http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" width="120" height="14" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/240px-Foobar.jpg 2x" /></a></div></div>
+ <div class="gallerytext">
+ </div>
+ </div></li>
+</ul>
+!! html/parsoid
+<p><figure-inline class="mw-default-size" typeof="mw:Image"><a href="https://example.com?foo&params=bar"><img resource="./File:Foobar.jpg" src="//example.com/images/3/3a/Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="220" width="1941"/></a></figure-inline></p>
+
+<!-- consistency with gallery extension -->
+<ul class="gallery mw-gallery-traditional" typeof="mw:Extension/gallery" data-mw='{"name":"gallery","attrs":{},"body":{"extsrc":"\nFile:Foobar.jpg|link=https://example.com?foo&params=bar\n"}}'>
+<li class="gallerybox">
+<div class="thumb"><figure-inline typeof="mw:Image"><a href="https://example.com?foo&params=bar"><img resource="./File:Foobar.jpg" src="//example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="14" width="120"/></a></figure-inline></div>
+<div class="gallerytext"></div>
+</li>
+</ul>
+!! end
+
!! test
Image with table with attributes in caption
!! options
</p>
!! end
-## Don't expect Parsoid and PHP to match, since PHP isn't exactly following
-## the HTML5 parsing spec.
+## FIXME: The untrimmed attribute in Parsoid is T205737
!! test
Element with broken attribute syntax
!! options
<div style=" style="123">hi</div>
<div =>ho</div>
!! html/php
-<div style="123">hi</div>
+<div style="style=">hi</div>
<div>ho</div>
!! html/parsoid
</ul>
!! end
-## Whoops, Parsoid shouldn't be parsing templates in the attribute caption!
!! test
Gallery with template inside caption
!! options
</gallery>
!! html/php
<ul class="gallery mw-gallery-traditional">
- <li class='gallerycaption'>{{echo|hi}}</li>
+ <li class='gallerycaption'>hi</li>
<li class="gallerybox" style="width: 155px"><div style="width: 155px">
<div class="thumb" style="width: 150px;"><div style="margin:68px auto;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" width="120" height="14" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/240px-Foobar.jpg 2x" /></a></div></div>
<div class="gallerytext">
</ul>
!! end
+!! test
+Gallery with wikitext inside gallery caption
+!! wikitext
+<gallery caption="# List item
+
+Text '''bold''' [[link]] {{ns:-1}}
+
+[[File:Foobar.jpg|thumb|File in gallery caption]]">
+File:Foobar.jpg|Image caption
+</gallery>
+!! html/php
+<ul class="gallery mw-gallery-traditional">
+ <li class='gallerycaption'># List item Text <b>bold</b> <a href="/index.php?title=Link&action=edit&redlink=1" class="new" title="Link (page does not exist)">link</a> Special <div class="thumb tright"><div class="thumbinner" style="width:182px;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" width="180" height="20" class="thumbimage" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/270px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/360px-Foobar.jpg 2x" /></a> <div class="thumbcaption"><div class="magnify"><a href="/wiki/File:Foobar.jpg" class="internal" title="Enlarge"></a></div>File in gallery caption</div></div></div></li>
+ <li class="gallerybox" style="width: 155px"><div style="width: 155px">
+ <div class="thumb" style="width: 150px;"><div style="margin:68px auto;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg" width="120" height="14" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/240px-Foobar.jpg 2x" /></a></div></div>
+ <div class="gallerytext">
+<p>Image caption
+</p>
+ </div>
+ </div></li>
+</ul>
+
+!! end
+
!! test
Gallery with wikitext inside caption
!! options