return $encValue;
}
+ /**
+ * Armor French spaces with a replacement character
+ *
+ * @since 1.32
+ * @param string $text Text to armor
+ * @param string $space Space character for the French spaces, defaults to ' '
+ * @return string Armored text
+ */
+ public static function armorFrenchSpaces( $text, $space = ' ' ) {
+ // Replace $ with \$ and \ with \\
+ $space = preg_replace( '#(?<!\\\\)(\\$|\\\\)#', '\\\\$1', $space );
+ $fixtags = [
+ # French spaces, last one Guillemet-left
+ # only if there is something before the space
+ '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => "\\1$space",
+ # French spaces, Guillemet-right
+ '/(\\302\\253) /' => "\\1$space",
+ ];
+ return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
+ }
+
/**
* Encode an attribute value for HTML tags, with extra armoring
* against further wiki processing.
'__' => '__',
] );
+ # Armor against French spaces detection (T5158)
+ $encValue = self::armorFrenchSpaces( $encValue, ' ' );
+
# Stupid hack
$encValue = preg_replace_callback(
'/((?i)' . wfUrlProtocols() . ')/',
/**
* Given a value, escape it so that it can be used in an id attribute and
- * return it. This will use HTML5 validation if $wgExperimentalHtmlIds is
- * true, allowing anything but ASCII whitespace. Otherwise it will use
- * HTML 4 rules, which means a narrow subset of ASCII, with bad characters
- * escaped with lots of dots.
+ * return it. This will use HTML5 validation, allowing anything but ASCII
+ * whitespace.
+ *
+ * To ensure we don't have to bother escaping anything, we also strip ', ".
+ * TODO: Is this the best tactic?
*
- * To ensure we don't have to bother escaping anything, we also strip ', ",
- * & even if $wgExperimentalIds is true. TODO: Is this the best tactic?
* We also strip # because it upsets IE, and % because it could be
* ambiguous if it's part of something that looks like a percent escape
* (which don't work reliably in fragments cross-browser).
* @param string|array $options String or array of strings (default is array()):
* 'noninitial': This is a non-initial fragment of an id, not a full id,
* so don't pay attention if the first character isn't valid at the
- * beginning of an id. Only matters if $wgExperimentalHtmlIds is
- * false.
- * 'legacy': Behave the way the old HTML 4-based ID escaping worked even
- * if $wgExperimentalHtmlIds is used, so we can generate extra
- * anchors and links won't break.
+ * beginning of an id.
* @return string
*/
static function escapeId( $id, $options = [] ) {
- global $wgExperimentalHtmlIds;
$options = (array)$options;
- if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) {
- $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
- $id = trim( $id, '_' );
- if ( $id === '' ) {
- // Must have been all whitespace to start with.
- return '_';
- } else {
- return $id;
- }
- }
-
// HTML4-style escaping
static $replace = [
'%3A' => ':',
$id = urlencode( str_replace( ' ', '_', $id ) );
$id = strtr( $id, $replace );
break;
- case 'html5-legacy':
- $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
- $id = trim( $id, '_' );
- if ( $id === '' ) {
- // Must have been all whitespace to start with.
- $id = '_';
- }
- break;
default:
throw new InvalidArgumentException( "Invalid mode '$mode' passed to '" . __METHOD__ );
}
*/
static function attributeWhitelist( $element ) {
$list = self::setupAttributeWhitelist();
- return isset( $list[$element] )
- ? $list[$element]
- : [];
+ return $list[$element] ?? [];
}
/**