Move the check for legacy/UTF character conversion on incoming URLs from
authorBrion Vibber <brion@users.mediawiki.org>
Mon, 6 Sep 2004 03:01:33 +0000 (03:01 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Mon, 6 Sep 2004 03:01:33 +0000 (03:01 +0000)
Title::newFromURL into WebRequest itself. Should now work on all get params,
so essentially the full URL will be converted.

(The check is not done if the referer matches the canonical server, as before.)

Fixes problem with latin-1 typed URLs introduced by the stricter UTF-8
validation checks running before the conversion check.

One minor glitch; the canonical URL redirection no longer picks up on a
charset conversion. However it's broken anyway and doesn't pick up eg
non-canonical URLs using ?title= instead of / or other such things. Needs
to be improved...

includes/Title.php
includes/WebRequest.php

index bb89312..f0fb00b 100644 (file)
@@ -106,17 +106,6 @@ class Title {
                # from some external search tools.
                $s = str_replace( '+', ' ', $url );
                
-               # For links that came from outside, check for alternate/legacy
-               # character encoding.
-               wfDebug( "Servr: $wgServer\n" );
-               if( empty( $_SERVER['HTTP_REFERER'] ) ||
-                       strncmp($wgServer, $_SERVER['HTTP_REFERER'], strlen( $wgServer ) ) ) 
-               {
-                       $s = $wgLang->checkTitleEncoding( $s );
-               } else {
-                       wfDebug( "Refer: {$_SERVER['HTTP_REFERER']}\n" );
-               }
-               
                $t->mDbkeyform = str_replace( ' ', '_', $s );
                if( $t->secureAndSplit() ) {
                        # check that length of title is < cur_title size
index 24d20de..c4a139d 100644 (file)
@@ -40,14 +40,8 @@ class WebRequest {
                global $wgUsePathInfo;
                if( isset( $_SERVER['PATH_INFO'] ) && $wgUsePathInfo ) {
                        # Stuff it!
-                       $_REQUEST['title'] = substr( $_SERVER['PATH_INFO'], 1 );
-               }
-               global $wgUseLatin1;
-               if( !$wgUseLatin1 ) {
-                       require_once( 'normal/UtfNormal.php' );
-                       wfProfileIn( 'WebRequest:normalizeUnicode-fix' );
-                       $this->normalizeUnicode( $_REQUEST );
-                       wfProfileOut( 'WebRequest:normalizeUnicode-fix' );
+                       $_GET['title'] = $_REQUEST['title'] =
+                               substr( $_SERVER['PATH_INFO'], 1 );
                }
        }
 
@@ -89,17 +83,19 @@ class WebRequest {
        
        /**
         * Recursively normalizes UTF-8 strings in the given array.
-        * @param array &$arr will be modified
+        * @param array $data string or array
+        * @return cleaned-up version of the given
         * @private
         */
-       function normalizeUnicode( &$arr ) {
-               foreach( $arr as $key => $val ) {
-                       if( is_array( $val ) ) {
-                               $this->normalizeUnicode( $arr[$key ] );
-                       } else {
-                               $arr[$key] = UtfNormal::cleanUp( $val );
+       function normalizeUnicode( $data ) {
+               if( is_array( $data ) ) {
+                       foreach( $data as $key => $val ) {
+                               $data[$key] = $this->normalizeUnicode( $val );
                        }
+               } else {
+                       $data = UtfNormal::cleanUp( $data );
                }
+               return $data;
        }
        
        /**
@@ -112,7 +108,20 @@ class WebRequest {
         */
        function getGPCVal( &$arr, $name, $default ) {
                if( isset( $arr[$name] ) ) {
-                       return $arr[$name];
+                       global $wgUseLatin1, $wgServer, $wgLang;
+                       $data = $arr[$name];
+                       if( isset( $_GET[$name] ) &&
+                               ( empty( $_SERVER['HTTP_REFERER'] ) ||
+                               strncmp($wgServer, $_SERVER['HTTP_REFERER'], strlen( $wgServer ) ) ) ) {
+                               # For links that came from outside, check for alternate/legacy
+                               # character encoding.
+                               $data = $wgLang->checkTitleEncoding( $data );
+                       }
+                       if( !$wgUseLatin1 ) {
+                               require_once( 'normal/UtfNormal.php' );
+                               $data = $this->normalizeUnicode( $data );
+                       }
+                       return $data;
                } else {
                        return $default;
                }