From c00c1f0b21ec4b07ab317e410945fb9cd336317a Mon Sep 17 00:00:00 2001
From: Thiemo Kreuz <thiemo.kreuz@wikimedia.de>
Date: Mon, 13 May 2019 11:28:30 +0200
Subject: [PATCH] title: Convert binary regexp to use Unicode code points

The hex sequences are the raw binary values for the Unicode code points.
Now that we have a more modern PHP at hand, we can use \x{FFFF} for
Unicode characters.

I believe the /S is not needed any more. It "precompiles" the regular
expression. But this is a pretty trivial regular expression.
Precompiling it is most probably even slower.

Change-Id: I49435114b3bc31dcce8aa4e48091d509844a2a07
---
 includes/title/MediaWikiTitleCodec.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php
index 31a022255c..7af0c1e9c5 100644
--- a/includes/title/MediaWikiTitleCodec.php
+++ b/includes/title/MediaWikiTitleCodec.php
@@ -284,7 +284,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
 		# Strip Unicode bidi override characters.
 		# Sometimes they slip into cut-n-pasted page titles, where the
 		# override chars get included in list displays.
-		$dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey );
+		$dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
 
 		# Clean up whitespace
 		# Note: use of the /u option on preg_replace here will cause
-- 
2.20.1