* @ingroup Parser
*/
+use MediaWiki\MediaWikiServices;
+
/**
* HTML sanitizer for MediaWiki
* @ingroup Parser
$fixtags = [
# French spaces, last one Guillemet-left
# only if there is something before the space
- '/(.) (?=[?:;!%»])/u' => "\\1$space",
+ # and a non-word character after the punctuation.
+ '/(\S) (?=[?:;!%»›](?!\w))/u' => "\\1$space",
# French spaces, Guillemet-right
- '/(«) /u' => "\\1$space",
+ '/([«‹]) /u' => "\\1$space",
];
return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
}
* @return string
*/
private static function normalizeWhitespace( $text ) {
- return preg_replace(
- '/\r\n|[\x20\x0d\x0a\x09]/',
+ return trim( preg_replace(
+ '/(?:\r\n|[\x20\x0d\x0a\x09])+/',
' ',
- $text );
+ $text ) );
}
/**
* @return string Still normalized, without entities
*/
public static function decodeCharReferencesAndNormalize( $text ) {
- global $wgContLang;
$text = preg_replace_callback(
self::CHAR_REFS_REGEX,
[ self::class, 'decodeCharReferencesCallback' ],
);
if ( $count ) {
- return $wgContLang->normalize( $text );
+ return MediaWikiServices::getInstance()->getContentLanguage()->normalize( $text );
} else {
return $text;
}