} else {
# attempt to sanitize at least some nesting problems
# (T4702 and quite a few others)
+ # This code path is buggy and deprecated!
+ wfDeprecated( 'disabling tidy', '1.33' );
$tidyregs = [
# ''Something [http://www.cool.com cool''] -->
# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
* @return string
*/
public static function normalizeLinkUrl( $url ) {
- # First, make sure unsafe characters are encoded
+ # Test for RFC 3986 IPv6 syntax
+ $scheme = '[a-z][a-z0-9+.-]*:';
+ $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
+ $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
+ if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
+ IP::isValid( rawurldecode( $m[1] ) )
+ ) {
+ $isIPv6 = rawurldecode( $m[1] );
+ } else {
+ $isIPv6 = false;
+ }
+
+ # Make sure unsafe characters are encoded
$url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
function ( $m ) {
return rawurlencode( $m[0] );
$ret = self::normalizeUrlComponent(
substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+ # Fix IPv6 syntax
+ if ( $isIPv6 !== false ) {
+ $ipv6Host = "%5B({$isIPv6})%5D";
+ $ret = preg_replace(
+ "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
+ "$1[$2]",
+ $ret
+ );
+ }
+
return $ret;
}
# $args is a list of argument nodes, starting from index 0, not including $part1
# @todo FIXME: If piece['parts'] is null then the call to getLength()
# below won't work b/c this $args isn't an object
- $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
+ $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
$profileSection = null; // profile templates
$ig->setShowFilename( false );
}
if ( isset( $params['caption'] ) ) {
- $caption = $params['caption'];
- $caption = htmlspecialchars( $caption );
- $caption = $this->replaceInternalLinks( $caption );
+ // NOTE: We aren't passing a frame here or below. Frame info
+ // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
+ // See T107332#4030581
+ $caption = $this->recursiveTagParse( $params['caption'] );
$ig->setCaptionHtml( $caption );
}
if ( isset( $params['perrow'] ) ) {
$alt = $this->stripAltText( $match, false );
break;
case 'gallery-internal-link':
- $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
+ $linkValue = $this->stripAltText( $match, false );
if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
// Result of LanguageConverter::markNoConversion
// invoked on an external link.
# * bottom
# * text-bottom
+ global $wgMediaInTargetLanguage;
+
# Protect LanguageConverter markup when splitting into parts
$parts = StringUtils::delimiterExplode(
'-{', '}-', '|', $options, true /* allow nesting */
$value = $this->stripAltText( $value, $holders );
break;
case 'link':
- list( $paramName, $value ) = $this->parseLinkParameter( $value );
+ list( $paramName, $value ) =
+ $this->parseLinkParameter(
+ $this->stripAltText( $value, $holders )
+ );
if ( $paramName ) {
$validated = true;
if ( $paramName === 'no-link' ) {
# Use the "caption" for the tooltip text
$params['frame']['title'] = $this->stripAltText( $caption, $holders );
}
+ if ( $wgMediaInTargetLanguage ) {
+ $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
+ }
Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
* Adds an entry to appropriate link tables.
*
* @since 1.32
+ * @param string $value
* @return array of `[ type, target ]`, where:
* - `type` is one of:
* - `null`: Given value is not a valid link target, use default
# that are later expanded to html- so expand them now and
# remove the tags
$tooltip = $this->mStripState->unstripBoth( $tooltip );
+ # Compatibility hack! In HTML certain entity references not terminated
+ # by a semicolon are decoded (but not if we're in an attribute; that's
+ # how link URLs get away without properly escaping & in queries).
+ # But wikitext has always required semicolon-termination of entities,
+ # so encode & where needed to avoid decode of semicolon-less entities.
+ # See T209236 and
+ # https://www.w3.org/TR/html5/syntax.html#named-character-references
+ # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
+ $tooltip = preg_replace( "/
+ & # 1. entity prefix
+ (?= # 2. followed by:
+ (?: # a. one of the legacy semicolon-less named entities
+ A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
+ C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
+ GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
+ O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
+ U(?:acute|circ|grave|uml)|Yacute|
+ a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
+ c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
+ divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
+ frac(?:1(?:2|4)|34)|
+ gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
+ i(?:acute|circ|excl|grave|quest|uml)|laquo|
+ lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
+ m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
+ not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
+ o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
+ p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
+ s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
+ u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
+ )
+ (?:[^;]|$)) # b. and not followed by a semicolon
+ # S = study, for efficiency
+ /Sx", '&', $tooltip );
$tooltip = Sanitizer::stripAllTags( $tooltip );
return $tooltip;