From 9befbd38dce32d24deeb20fddc17e21dfd052f71 Mon Sep 17 00:00:00 2001 From: Jforrester Date: Thu, 31 May 2018 17:33:31 +0000 Subject: [PATCH] Revert "Strip soft hyphens (U+00AD) from title" This reverts commit 6b8a5a137d3f449b4056e3de82fa6747b45f1f9a. Change-Id: Ica5abe69c316792aa2f7eafad9b1d63183b282a8 --- RELEASE-NOTES-1.32 | 2 -- includes/title/MediaWikiTitleCodec.php | 7 +++--- resources/src/mediawiki.Title/Title.js | 6 ++--- tests/parser/parserTests.txt | 24 ------------------- .../title/MediaWikiTitleCodecTest.php | 5 ---- .../mediawiki/mediawiki.Title.test.js | 4 ++-- 6 files changed, 8 insertions(+), 40 deletions(-) diff --git a/RELEASE-NOTES-1.32 b/RELEASE-NOTES-1.32 index 59a53107ed..dca8311d78 100644 --- a/RELEASE-NOTES-1.32 +++ b/RELEASE-NOTES-1.32 @@ -161,8 +161,6 @@ because of Phabricator reports. * The ApiQueryContributions class has been renamed to ApiQueryUserContribs. === Other changes in 1.32 === -* Soft hyphens (U+00AD) are now automatically removed from titles; these - characters can accidentally end up in copy-and-pasted titles. * … == Compatibility == diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php index 655884b99a..890a870a2a 100644 --- a/includes/title/MediaWikiTitleCodec.php +++ b/includes/title/MediaWikiTitleCodec.php @@ -275,11 +275,10 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { 'user_case_dbkey' => $dbkey, ]; - # Strip soft hyphens (U+00AD) and Unicode bidi override characters - # (U+200E, U+200F, U+202A. U+202B, U+202C, U+202D, U+202E). + # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the - # soft hyphens or override chars get included in list displays. - $dbkey = preg_replace( '/\xC2\xAD|\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey ); + # override chars get included in list displays. + $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey ); # Clean up whitespace # Note: use of the /u option on preg_replace here will cause diff --git a/resources/src/mediawiki.Title/Title.js b/resources/src/mediawiki.Title/Title.js index b3542cd569..2b76187359 100644 --- a/resources/src/mediawiki.Title/Title.js +++ b/resources/src/mediawiki.Title/Title.js @@ -149,7 +149,7 @@ rWhitespace = /[ _\u00A0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]+/g, // From MediaWikiTitleCodec::splitTitleString() in PHP - rStripCharacters = /[\u00AD\u200E\u200F\u202A-\u202E]/g, + rUnicodeBidi = /[\u200E\u200F\u202A-\u202E]/g, /** * Slightly modified from Flinfo. Credit goes to Lupo and Flominator. @@ -233,8 +233,8 @@ namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace; title = title - // Strip soft hyphens and Unicode bidi override characters - .replace( rStripCharacters, '' ) + // Strip Unicode bidi override characters + .replace( rUnicodeBidi, '' ) // Normalise whitespace to underscores and remove duplicates .replace( rWhitespace, '_' ) // Trim underscores diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index bc0c43da5e..b109e3967d 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -30875,27 +30875,3 @@ header *foo footer !! end - -!! test -Check soft hyphens as entities (­) in displaytitle (T66528) -!! options -showtitle -title=[[Lopadotemachoselachogaleokranioleipsanodrimhypotrimmatosilphioparaomelitokatakechymenokichlepikossyphophattoperisteralektryonoptekephalliokigklopeleiolagoiosiraiobaphetraganopterygon]] -!! wikitext -{{DISPLAYTITLE:Lopado­temacho­selacho­galeo­kranio­leipsano­drim­hypo­trimmato­silphio­parao­melito­katakechy­meno­kichl­epi­kossypho­phatto­perister­alektryon­opte­kephallio­kigklo­peleio­lagoio­siraio­baphe­tragano­pterygon}} -!! html/php -Lopado­temacho­selacho­galeo­kranio­leipsano­drim­hypo­trimmato­silphio­parao­melito­katakechy­meno­kichl­epi­kossypho­phatto­perister­alektryon­opte­kephallio­kigklo­peleio­lagoio­siraio­baphe­tragano­pterygon - -!! end - -!! test -Check soft hyphens as Unicode characters (U+00AD) in displaytitle (T66528) -!! options -showtitle -title=[[Lopadotemachoselachogaleokranioleipsanodrimhypotrimmatosilphioparaomelitokatakechymenokichlepikossyphophattoperisteralektryonoptekephalliokigklopeleiolagoiosiraiobaphetraganopterygon]] -!! wikitext -{{DISPLAYTITLE:Lopado­temacho­selacho­galeo­kranio­leipsano­drim­hypo­trimmato­silphio­parao­melito­katakechy­meno­kichl­epi­kossypho­phatto­perister­alektryon­opte­kephallio­kigklo­peleio­lagoio­siraio­baphe­tragano­pterygon}} -!! html/php -Lopado­temacho­selacho­galeo­kranio­leipsano­drim­hypo­trimmato­silphio­parao­melito­katakechy­meno­kichl­epi­kossypho­phatto­perister­alektryon­opte­kephallio­kigklo­peleio­lagoio­siraio­baphe­tragano­pterygon - -!! end diff --git a/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php b/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php index 70aa0710d6..e1b98ec341 100644 --- a/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php +++ b/tests/phpunit/includes/title/MediaWikiTitleCodecTest.php @@ -104,11 +104,6 @@ class MediaWikiTitleCodecTest extends MediaWikiTestCase { // names ending in "a" to be female. [ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa Müller' ], [ NS_MAIN, 'FooBar', '', 'remotetestiw', 'en', 'remotetestiw:FooBar' ], - // Strip soft hyphen and Unicode bidi override characters - [ NS_MAIN, "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" . - "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", '', '', 'en', - "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" . - "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", 'Foobar' ], ]; } diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js b/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js index a775029709..d6fe744fc0 100644 --- a/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js +++ b/tests/qunit/suites/resources/mediawiki/mediawiki.Title.test.js @@ -245,8 +245,8 @@ title = new mw.Title( 'Foo \u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000 bar' ); assert.equal( title.getMain(), 'Foo_bar', 'Merge multiple types of whitespace/underscores into a single underscore' ); - title = new mw.Title( 'Foo\u00AD\u200E\u200F\u202A\u202B\u202C\u202D\u202Ebar' ); - assert.equal( title.getMain(), 'Foobar', 'Strip soft hyphen and Unicode bidi override characters' ); + title = new mw.Title( 'Foo\u200E\u200F\u202A\u202B\u202C\u202D\u202Ebar' ); + assert.equal( title.getMain(), 'Foobar', 'Strip Unicode bidi override characters' ); // Regression test: Previously it would only detect an extension if there is no space after it title = new mw.Title( 'Example.js ' ); -- 2.20.1