=== Other changes in 1.32 ===
* Soft hyphens (U+00AD) are now automatically removed from titles; these
characters can accidentally end up in copy-and-pasted titles.
-* Strip Unicode 6.3.0 directional formatting characters (U+061C, U+2066,
- U+2067, U+2068, U+2069) from the title.
* …
== Compatibility ==
'user_case_dbkey' => $dbkey,
];
- # Strip soft hyphens (U+00AD) and Unicode directional formatting characters (U+061C, U+200E,
- # U+200F, U+202A. U+202B, U+202C, U+202D, U+202E, U+2066, U+2067, U+2068, U+2069).
+ # Strip soft hyphens (U+00AD) and Unicode bidi override characters
+ # (U+200E, U+200F, U+202A. U+202B, U+202C, U+202D, U+202E).
# Sometimes they slip into cut-n-pasted page titles, where the
# soft hyphens or override chars get included in list displays.
- $dbkey = preg_replace(
- '/\xC2\xAD|\xD8\x9C|\xE2\x80[\x8E\x8F\xAA-\xAE]|\xE2\x81[\xA6-\xA9]/S',
- '',
- $dbkey
- );
+ $dbkey = preg_replace( '/\xC2\xAD|\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey );
# Clean up whitespace
# Note: use of the /u option on preg_replace here will cause
rWhitespace = /[ _\u00A0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]+/g,
// From MediaWikiTitleCodec::splitTitleString() in PHP
- rStripCharacters = /[\u00AD\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g,
+ rStripCharacters = /[\u00AD\u200E\u200F\u202A-\u202E]/g,
/**
* Slightly modified from Flinfo. Credit goes to Lupo and Flominator.
namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace;
title = title
- // Strip soft hyphens and Unicode directional formatting characters
+ // Strip soft hyphens and Unicode bidi override characters
.replace( rStripCharacters, '' )
// Normalise whitespace to underscores and remove duplicates
.replace( rWhitespace, '_' )
# of the categories in wikitext
# Do not remove these characters in edits.
#
-# As part of the serialization, these Unicode directional formatting characters will get stripped.
+# As part of the serialization, these bidi characters will get stripped.
!! test
RTL (\u200f) and LTR (\u200e) markers around category tags should be stripped
!! options
// names ending in "a" to be female.
[ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa Müller' ],
[ NS_MAIN, 'FooBar', '', 'remotetestiw', 'en', 'remotetestiw:FooBar' ],
- // Strip soft hyphen and Unicode directional formatting characters
- [ NS_MAIN, "Foo\xC2\xAD\xD8\x9C\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" .
- "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAE\xE2\x81\xA6\xE2\x81\xA7" .
- "\xE2\x81\xA8\xE2\x81\xA9bar", '', '', 'en',
- "Foo\xC2\xAD\xD8\x9C\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" .
- "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAE\xE2\x81\xA6\xE2\x81\xA7" .
- "\xE2\x81\xA8\xE2\x81\xA9bar", 'Foobar' ],
+ // Strip soft hyphen and Unicode bidi override characters
+ [ NS_MAIN, "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" .
+ "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", '', '', 'en',
+ "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" .
+ "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", 'Foobar' ],
];
}
title = new mw.Title( 'Foo \u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000 bar' );
assert.equal( title.getMain(), 'Foo_bar', 'Merge multiple types of whitespace/underscores into a single underscore' );
- title = new mw.Title( 'Foo\u00AD\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069bar' );
- assert.equal( title.getMain(), 'Foobar', 'Strip soft hyphen and Unicode directional formatting characters' );
+ title = new mw.Title( 'Foo\u00AD\u200E\u200F\u202A\u202B\u202C\u202D\u202Ebar' );
+ assert.equal( title.getMain(), 'Foobar', 'Strip soft hyphen and Unicode bidi override characters' );
// Regression test: Previously it would only detect an extension if there is no space after it
title = new mw.Title( 'Example.js ' );
},
{
fileName: 'BI\u200EDI.jpg',
- typeOfName: 'Name containing Unicode directional formatting characters',
+ typeOfName: 'Name containing BIDI overrides',
nameText: 'BIDI',
prefixedText: 'File:BIDI.jpg'
},