This fixes an issue in
f21f3942 where if there was an html
element with an alt or title attribute containing an <
entity, an ascii EOT control character (0x04) may become
inserted into the text if language converter was enabled.
Due to a really old bug in language converter, self-closed tags
got turned into non-self closed tags. However due a different
bug which was fixed in
f21f3942 this code path was rarely taken
so nobody noticed until now.
Follow-up Idbc45cac12
Bug: T180552
Change-Id: I077d30c50fcb419837fef937d27caca307153d2d
if ( $this->guessVariant( $text, $toVariant ) ) {
return $text;
}
if ( $this->guessVariant( $text, $toVariant ) ) {
return $text;
}
/* we convert everything except:
1. HTML markups (anything between < and >)
2. HTML entities
/* we convert everything except:
1. HTML markups (anything between < and >)
2. HTML entities
// Guard against delimiter nulls in the input
// (should never happen: see T159174)
$text = str_replace( "\000", '', $text );
// Guard against delimiter nulls in the input
// (should never happen: see T159174)
$text = str_replace( "\000", '', $text );
+ $text = str_replace( "\004", '', $text );
$markupMatches = null;
$elementMatches = null;
$markupMatches = null;
$elementMatches = null;
// We hit the end.
$elementPos = strlen( $text );
$element = '';
// We hit the end.
$elementPos = strlen( $text );
$element = '';
+ } elseif( substr( $element, -1 ) === "\004" ) {
+ // This can sometimes happen if we have
+ // unclosed html tags (For example
+ // when converting a title attribute
+ // during a recursive call that contains
+ // a < e.g. <div title="<">.
+ $element = substr( $element, 0, -1 );
}
} else {
// If we hit here, then Language Converter could be tricked
}
} else {
// If we hit here, then Language Converter could be tricked
if ( $element !== ''
&& preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
) {
if ( $element !== ''
&& preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
) {
+ // FIXME, this decodes entities, so if you have something
+ // like <div title="foo<bar"> the bar won't get
+ // translated since after entity decoding it looks like
+ // unclosed html and we call this method recursively
+ // on attributes.
$attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
$attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+ // Ensure self-closing tags stay self-closing.
+ $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
$changed = false;
foreach ( [ 'title', 'alt' ] as $attrName ) {
if ( !isset( $attrs[$attrName] ) ) {
$changed = false;
foreach ( [ 'title', 'alt' ] as $attrName ) {
if ( !isset( $attrs[$attrName] ) ) {
}
if ( $changed ) {
$element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
}
if ( $changed ) {
$element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
+ $close . $elementMatches[3];
}
}
$literalBlob .= $element . "\000";
}
}
$literalBlob .= $element . "\000";
[[File:Foobar.jpg|alt=-{}-foAjrjvi-{}-]]
!! html
<p>
[[File:Foobar.jpg|alt=-{}-foAjrjvi-{}-]]
!! html
<p>
-</p><p><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="image"><img alt="" onload="alert(1)" data-foo="" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220"></a>
+</p><p><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="image"><img alt="" onload="alert(1)" data-foo="" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220" /></a>