if ( $this->guessVariant( $text, $toVariant ) ) {
return $text;
}
-
/* we convert everything except:
1. HTML markups (anything between < and >)
2. HTML entities
// Guard against delimiter nulls in the input
// (should never happen: see T159174)
$text = str_replace( "\000", '', $text );
+ $text = str_replace( "\004", '', $text );
$markupMatches = null;
$elementMatches = null;
// We hit the end.
$elementPos = strlen( $text );
$element = '';
+ } elseif( substr( $element, -1 ) === "\004" ) {
+ // This can sometimes happen if we have
+ // unclosed html tags (For example
+ // when converting a title attribute
+ // during a recursive call that contains
+ // a < e.g. <div title="<">.
+ $element = substr( $element, 0, -1 );
}
} else {
// If we hit here, then Language Converter could be tricked
if ( $element !== ''
&& preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
) {
+ // FIXME, this decodes entities, so if you have something
+ // like <div title="foo<bar"> the bar won't get
+ // translated since after entity decoding it looks like
+ // unclosed html and we call this method recursively
+ // on attributes.
$attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+ // Ensure self-closing tags stay self-closing.
+ $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
$changed = false;
foreach ( [ 'title', 'alt' ] as $attrName ) {
if ( !isset( $attrs[$attrName] ) ) {
}
if ( $changed ) {
$element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
- $elementMatches[3];
+ $close . $elementMatches[3];
}
}
$literalBlob .= $element . "\000";