From ce16f36e954fd228ad1d605bc3650deb67e6183e Mon Sep 17 00:00:00 2001 From: Ilmari Karonen Date: Thu, 27 Nov 2008 20:11:38 +0000 Subject: [PATCH] (bug 6100) Strip Unicode BiDi embedding/override characters (U+202A - U+202E) from titles. NOTE: run maintenance/cleanupImages.php and cleanupTitles.php ASAP after deploying this! --- RELEASE-NOTES | 4 ++++ includes/Title.php | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index f496598899..ef0d8bdc33 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -369,6 +369,10 @@ The following extensions are migrated into MediaWiki 1.14: * Honour unchecked "Leave a redirect behind" for moved subpages * (bug 16440) Broken 0-byte math renderings are now deleted and re-rendered when page is re-parsed. +* (bug 6100) Unicode BiDi embedding/override characters (U+202A - U+202E) are + now automatically removed from titles; these characters can accidentally end + up in copy-and-pasted titles, and, by overriding normal bidirectional text + handling, can lead to annoying behavior such as text rendering backwards === API changes in 1.14 === diff --git a/includes/Title.php b/includes/Title.php index aa9a739bdf..88118fdefa 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -2047,8 +2047,7 @@ class Title { # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the # override chars get included in list displays. - $dbkey = str_replace( "\xE2\x80\x8E", '', $dbkey ); // 200E LEFT-TO-RIGHT MARK - $dbkey = str_replace( "\xE2\x80\x8F", '', $dbkey ); // 200F RIGHT-TO-LEFT MARK + $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey ); # Clean up whitespace # -- 2.20.1