From: Brion Vibber Date: Mon, 6 Sep 2004 03:01:33 +0000 (+0000) Subject: Move the check for legacy/UTF character conversion on incoming URLs from X-Git-Tag: 1.5.0alpha1~2097 X-Git-Url: http://git.cyclocoop.org/%24href?a=commitdiff_plain;h=e199fe8803b022a7588abdecc39ba45fb0b15ac2;p=lhc%2Fweb%2Fwiklou.git Move the check for legacy/UTF character conversion on incoming URLs from Title::newFromURL into WebRequest itself. Should now work on all get params, so essentially the full URL will be converted. (The check is not done if the referer matches the canonical server, as before.) Fixes problem with latin-1 typed URLs introduced by the stricter UTF-8 validation checks running before the conversion check. One minor glitch; the canonical URL redirection no longer picks up on a charset conversion. However it's broken anyway and doesn't pick up eg non-canonical URLs using ?title= instead of / or other such things. Needs to be improved... --- diff --git a/includes/Title.php b/includes/Title.php index bb8931223b..f0fb00bd30 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -106,17 +106,6 @@ class Title { # from some external search tools. $s = str_replace( '+', ' ', $url ); - # For links that came from outside, check for alternate/legacy - # character encoding. - wfDebug( "Servr: $wgServer\n" ); - if( empty( $_SERVER['HTTP_REFERER'] ) || - strncmp($wgServer, $_SERVER['HTTP_REFERER'], strlen( $wgServer ) ) ) - { - $s = $wgLang->checkTitleEncoding( $s ); - } else { - wfDebug( "Refer: {$_SERVER['HTTP_REFERER']}\n" ); - } - $t->mDbkeyform = str_replace( ' ', '_', $s ); if( $t->secureAndSplit() ) { # check that length of title is < cur_title size diff --git a/includes/WebRequest.php b/includes/WebRequest.php index 24d20dea77..c4a139d6b8 100644 --- a/includes/WebRequest.php +++ b/includes/WebRequest.php @@ -40,14 +40,8 @@ class WebRequest { global $wgUsePathInfo; if( isset( $_SERVER['PATH_INFO'] ) && $wgUsePathInfo ) { # Stuff it! - $_REQUEST['title'] = substr( $_SERVER['PATH_INFO'], 1 ); - } - global $wgUseLatin1; - if( !$wgUseLatin1 ) { - require_once( 'normal/UtfNormal.php' ); - wfProfileIn( 'WebRequest:normalizeUnicode-fix' ); - $this->normalizeUnicode( $_REQUEST ); - wfProfileOut( 'WebRequest:normalizeUnicode-fix' ); + $_GET['title'] = $_REQUEST['title'] = + substr( $_SERVER['PATH_INFO'], 1 ); } } @@ -89,17 +83,19 @@ class WebRequest { /** * Recursively normalizes UTF-8 strings in the given array. - * @param array &$arr will be modified + * @param array $data string or array + * @return cleaned-up version of the given * @private */ - function normalizeUnicode( &$arr ) { - foreach( $arr as $key => $val ) { - if( is_array( $val ) ) { - $this->normalizeUnicode( $arr[$key ] ); - } else { - $arr[$key] = UtfNormal::cleanUp( $val ); + function normalizeUnicode( $data ) { + if( is_array( $data ) ) { + foreach( $data as $key => $val ) { + $data[$key] = $this->normalizeUnicode( $val ); } + } else { + $data = UtfNormal::cleanUp( $data ); } + return $data; } /** @@ -112,7 +108,20 @@ class WebRequest { */ function getGPCVal( &$arr, $name, $default ) { if( isset( $arr[$name] ) ) { - return $arr[$name]; + global $wgUseLatin1, $wgServer, $wgLang; + $data = $arr[$name]; + if( isset( $_GET[$name] ) && + ( empty( $_SERVER['HTTP_REFERER'] ) || + strncmp($wgServer, $_SERVER['HTTP_REFERER'], strlen( $wgServer ) ) ) ) { + # For links that came from outside, check for alternate/legacy + # character encoding. + $data = $wgLang->checkTitleEncoding( $data ); + } + if( !$wgUseLatin1 ) { + require_once( 'normal/UtfNormal.php' ); + $data = $this->normalizeUnicode( $data ); + } + return $data; } else { return $default; }