From 722ff7b1fc203f1f4b65446d697a860c24b4840f Mon Sep 17 00:00:00 2001 From: Jforrester Date: Thu, 31 May 2018 17:33:46 +0000 Subject: [PATCH] Revert "Strip Unicode 6.3.0 directional formatting characters from title" This reverts commit 7564624d1ca80a4d2f1cc2b4d3d32d5d2e0bca38. Change-Id: I5d596f8f3c784920829de6ae50b270b0396369e0 --- RELEASE-NOTES-1.32 | 2 -- includes/title/MediaWikiTitleCodec.php | 10 +++------- resources/src/mediawiki.Title/Title.js | 4 ++-- tests/parser/parserTests.txt | 2 +- .../includes/title/MediaWikiTitleCodecTest.php | 12 +++++------- .../resources/mediawiki/mediawiki.Title.test.js | 6 +++--- 6 files changed, 14 insertions(+), 22 deletions(-) diff --git a/RELEASE-NOTES-1.32 b/RELEASE-NOTES-1.32 index 46efbdfc56..59a53107ed 100644 --- a/RELEASE-NOTES-1.32 +++ b/RELEASE-NOTES-1.32 @@ -163,8 +163,6 @@ because of Phabricator reports. === Other changes in 1.32 === * Soft hyphens (U+00AD) are now automatically removed from titles; these characters can accidentally end up in copy-and-pasted titles. -* Strip Unicode 6.3.0 directional formatting characters (U+061C, U+2066, - U+2067, U+2068, U+2069) from the title. * … == Compatibility == diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php index 7c2d393516..655884b99a 100644 --- a/includes/title/MediaWikiTitleCodec.php +++ b/includes/title/MediaWikiTitleCodec.php @@ -275,15 +275,11 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { 'user_case_dbkey' => $dbkey, ]; - # Strip soft hyphens (U+00AD) and Unicode directional formatting characters (U+061C, U+200E, - # U+200F, U+202A. U+202B, U+202C, U+202D, U+202E, U+2066, U+2067, U+2068, U+2069). + # Strip soft hyphens (U+00AD) and Unicode bidi override characters + # (U+200E, U+200F, U+202A. U+202B, U+202C, U+202D, U+202E). # Sometimes they slip into cut-n-pasted page titles, where the # soft hyphens or override chars get included in list displays. - $dbkey = preg_replace( - '/\xC2\xAD|\xD8\x9C|\xE2\x80[\x8E\x8F\xAA-\xAE]|\xE2\x81[\xA6-\xA9]/S', - '', - $dbkey - ); + $dbkey = preg_replace( '/\xC2\xAD|\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey ); # Clean up whitespace # Note: use of the /u option on preg_replace here will cause diff --git a/resources/src/mediawiki.Title/Title.js b/resources/src/mediawiki.Title/Title.js index dcaae3e3de..b3542cd569 100644 --- a/resources/src/mediawiki.Title/Title.js +++ b/resources/src/mediawiki.Title/Title.js @@ -149,7 +149,7 @@ rWhitespace = /[ _\u00A0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]+/g, // From MediaWikiTitleCodec::splitTitleString() in PHP - rStripCharacters = /[\u00AD\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g, + rStripCharacters = /[\u00AD\u200E\u200F\u202A-\u202E]/g, /** * Slightly modified from Flinfo. Credit goes to Lupo and Flominator. @@ -233,7 +233,7 @@ namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace; title = title - // Strip soft hyphens and Unicode directional formatting characters + // Strip soft hyphens and Unicode bidi override characters .replace( rStripCharacters, '' ) // Normalise whitespace to underscores and remove duplicates .replace( rWhitespace, '_' ) diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 216d7e5d3c..bc0c43da5e 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -28869,7 +28869,7 @@ foo {{echo|bar [[Category:baz]]}} bar # of the categories in wikitext # Do not remove these characters in edits. # -# As part of the serialization, these Unicode directional formatting characters will get stripped. +# As part of the serialization, these bidi characters will get stripped. !! test RTL (\u200f) and LTR (\u200e) markers around category tags should be stripped !! options diff --git a/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php b/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php index c2725516b6..70aa0710d6 100644 --- a/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php +++ b/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php @@ -104,13 +104,11 @@ class MediaWikiTitleCodecTest extends MediaWikiTestCase { // names ending in "a" to be female. [ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa Müller' ], [ NS_MAIN, 'FooBar', '', 'remotetestiw', 'en', 'remotetestiw:FooBar' ], - // Strip soft hyphen and Unicode directional formatting characters - [ NS_MAIN, "Foo\xC2\xAD\xD8\x9C\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" . - "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAE\xE2\x81\xA6\xE2\x81\xA7" . - "\xE2\x81\xA8\xE2\x81\xA9bar", '', '', 'en', - "Foo\xC2\xAD\xD8\x9C\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" . - "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAE\xE2\x81\xA6\xE2\x81\xA7" . - "\xE2\x81\xA8\xE2\x81\xA9bar", 'Foobar' ], + // Strip soft hyphen and Unicode bidi override characters + [ NS_MAIN, "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" . + "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", '', '', 'en', + "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" . + "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", 'Foobar' ], ]; } diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js b/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js index e8db4e1d22..a775029709 100644 --- a/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js +++ b/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js @@ -245,8 +245,8 @@ title = new mw.Title( 'Foo \u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000 bar' ); assert.equal( title.getMain(), 'Foo_bar', 'Merge multiple types of whitespace/underscores into a single underscore' ); - title = new mw.Title( 'Foo\u00AD\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069bar' ); - assert.equal( title.getMain(), 'Foobar', 'Strip soft hyphen and Unicode directional formatting characters' ); + title = new mw.Title( 'Foo\u00AD\u200E\u200F\u202A\u202B\u202C\u202D\u202Ebar' ); + assert.equal( title.getMain(), 'Foobar', 'Strip soft hyphen and Unicode bidi override characters' ); // Regression test: Previously it would only detect an extension if there is no space after it title = new mw.Title( 'Example.js ' ); @@ -668,7 +668,7 @@ }, { fileName: 'BI\u200EDI.jpg', - typeOfName: 'Name containing Unicode directional formatting characters', + typeOfName: 'Name containing BIDI overrides', nameText: 'BIDI', prefixedText: 'File:BIDI.jpg' }, -- 2.20.1