From: Brion Vibber Date: Thu, 2 Sep 2004 07:50:04 +0000 (+0000) Subject: Normalize Unicode input to normalization form C. Most of the time input X-Git-Tag: 1.5.0alpha1~2159 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/banques/ajouter.php?a=commitdiff_plain;h=b687f14131d1ffb7a038a25029f948383b08299e;p=lhc%2Fweb%2Fwiklou.git Normalize Unicode input to normalization form C. Most of the time input is already in this form and it shouldn't take very long to verify it. There is still optimization to be done though. Partial fix for http://bugzilla.wikipedia.org/show_bug.cgi?id=240 Will also need to verify correct UTF-8 sequences and strip characters that are illegal in XML. Some input may not be going through this verification yet (eg the uploaded filenames) --- diff --git a/includes/WebRequest.php b/includes/WebRequest.php index a37c257bb4..04742bb0a8 100644 --- a/includes/WebRequest.php +++ b/includes/WebRequest.php @@ -20,13 +20,15 @@ # http://www.gnu.org/copyleft/gpl.html # Hypothetically, we could use a WebRequest object to fake a -# self-contained request. - -## Enable this to debug total elimination of register_globals +# self-contained request (FauxRequest). class WebRequest { function WebRequest() { $this->checkMagicQuotes(); + global $wgUseLatin1; + if( !$wgUseLatin1 ) { + $this->normalizeUnicode(); + } } function &fix_magic_quotes( &$arr ) { @@ -51,6 +53,17 @@ class WebRequest { } } + function normalizeUnicode() { + wfProfileIn( 'WebRequest:normalizeUnicode-include' ); + require_once( 'normal/UtfNormal.php' ); + wfProfileOut( 'WebRequest:normalizeUnicode-include' ); + wfProfileIn( 'WebRequest:normalizeUnicode-fix' ); + foreach( $_REQUEST as $key => $val ) { + $_REQUEST[$key] = UtfNormal::toNFC( $val ); + } + wfProfileOut( 'WebRequest:normalizeUnicode-fix' ); + } + function getGPCVal( &$arr, $name, $default ) { if( isset( $arr[$name] ) ) { return $arr[$name];