'|&#x[0-9A-Fa-f]+;'
),
- // From MediaWikiTitleCodec.php#L225 @26fcab1f18c568a41
- // "Clean up whitespace" in function MediaWikiTitleCodec::splitTitleString()
- rWhitespace = /[ _\u0009\u00A0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\s]+/g,
+ // From MediaWikiTitleCodec::splitTitleString() in PHP
+ // Note that this is not equivalent to /\s/, e.g. underscore is included, tab is not included.
+ rWhitespace = /[ _\u00A0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]+/g,
+
+ // From MediaWikiTitleCodec::splitTitleString() in PHP
+ rUnicodeBidi = /[\u200E\u200F\u202A-\u202E]/g,
/**
* Slightly modified from Flinfo. Credit goes to Lupo and Flominator.
replace: '',
generalRule: true
},
- // Space, underscore, tab, NBSP and other unusual spaces
- {
- pattern: rWhitespace,
- replace: ' ',
- generalRule: true
- },
- // unicode bidi override characters: Implicit, Embeds, Overrides
- {
- pattern: /[\u200E\u200F\u202A-\u202E]/g,
- replace: '',
- generalRule: true
- },
// control characters
{
pattern: /[\x00-\x1f\x7f]/g,
namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace;
title = title
+ // Strip Unicode bidi override characters
+ .replace( rUnicodeBidi, '' )
// Normalise whitespace to underscores and remove duplicates
- .replace( /[ _\s]+/g, '_' )
+ .replace( rWhitespace, '_' )
// Trim underscores
.replace( rUnderscoreTrim, '' );
namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace;
- // Normalise whitespace and remove duplicates
- title = $.trim( title.replace( rWhitespace, ' ' ) );
+ // Normalise additional whitespace
+ title = $.trim( title.replace( /\s/g, ' ' ) );
// Process initial colon
if ( title !== '' && title[ 0 ] === ':' ) {
[ 'A < B', 'title-invalid-characters' ],
[ 'A > B', 'title-invalid-characters' ],
[ 'A | B', 'title-invalid-characters' ],
+ [ "A \t B", 'title-invalid-characters' ],
+ [ "A \n B", 'title-invalid-characters' ],
// URL encoding
[ 'A%20B', 'title-invalid-characters' ],
[ 'A%23B', 'title-invalid-characters' ],
'A < B',
'A > B',
'A | B',
+ 'A \t B',
+ 'A \n B',
// URL encoding
'A%20B',
'A%23B',
assert.equal( title.getPrefixedText(), '.foo' );
} );
- QUnit.test( 'Transformation', 11, function ( assert ) {
+ QUnit.test( 'Transformation', 12, function ( assert ) {
var title;
title = new mw.Title( 'File:quux pif.jpg' );
assert.equal( title.toText(), 'User:HAshAr' );
assert.equal( title.getNamespaceId(), 2, 'Case-insensitive namespace prefix' );
- // Don't ask why, it's the way the backend works. One space is kept of each set.
- title = new mw.Title( 'Foo __ \t __ bar' );
+ title = new mw.Title( 'Foo \u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000 bar' );
assert.equal( title.getMain(), 'Foo_bar', 'Merge multiple types of whitespace/underscores into a single underscore' );
+ title = new mw.Title( 'Foo\u200E\u200F\u202A\u202B\u202C\u202D\u202Ebar' );
+ assert.equal( title.getMain(), 'Foobar', 'Strip Unicode bidi override characters' );
+
// Regression test: Previously it would only detect an extension if there is no space after it
title = new mw.Title( 'Example.js ' );
assert.equal( title.getExtension(), 'js', 'Space after an extension is stripped' );