From b687f14131d1ffb7a038a25029f948383b08299e Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 2 Sep 2004 07:50:04 +0000 Subject: [PATCH] Normalize Unicode input to normalization form C. Most of the time input is already in this form and it shouldn't take very long to verify it. There is still optimization to be done though. Partial fix for http://bugzilla.wikipedia.org/show_bug.cgi?id=240 Will also need to verify correct UTF-8 sequences and strip characters that are illegal in XML. Some input may not be going through this verification yet (eg the uploaded filenames) --- includes/WebRequest.php | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/includes/WebRequest.php b/includes/WebRequest.php index a37c257bb4..04742bb0a8 100644 --- a/includes/WebRequest.php +++ b/includes/WebRequest.php @@ -20,13 +20,15 @@ # http://www.gnu.org/copyleft/gpl.html # Hypothetically, we could use a WebRequest object to fake a -# self-contained request. - -## Enable this to debug total elimination of register_globals +# self-contained request (FauxRequest). class WebRequest { function WebRequest() { $this->checkMagicQuotes(); + global $wgUseLatin1; + if( !$wgUseLatin1 ) { + $this->normalizeUnicode(); + } } function &fix_magic_quotes( &$arr ) { @@ -51,6 +53,17 @@ class WebRequest { } } + function normalizeUnicode() { + wfProfileIn( 'WebRequest:normalizeUnicode-include' ); + require_once( 'normal/UtfNormal.php' ); + wfProfileOut( 'WebRequest:normalizeUnicode-include' ); + wfProfileIn( 'WebRequest:normalizeUnicode-fix' ); + foreach( $_REQUEST as $key => $val ) { + $_REQUEST[$key] = UtfNormal::toNFC( $val ); + } + wfProfileOut( 'WebRequest:normalizeUnicode-fix' ); + } + function getGPCVal( &$arr, $name, $default ) { if( isset( $arr[$name] ) ) { return $arr[$name]; -- 2.20.1