* (bug 17146) Fix for UTF-8 and short word search for some possible MySQL configs
authorBrion Vibber <brion@users.mediawiki.org>
Sat, 31 Jan 2009 01:19:18 +0000 (01:19 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Sat, 31 Jan 2009 01:19:18 +0000 (01:19 +0000)
Language::stripForSearch() was lowercasing input, but producing 'U8' for its unicode and short-word normalizations... but SearchUpdate::doUpdate() was running *that* through an additional strtolower() before actually saving to the database.
It's possible that some folks configurations were applying a case-sensitive search on the search table (?) which would make these not match up when actually searching. Going ahead and producing 'u8' right off will ensure these stay in sync.

RELEASE-NOTES
languages/Language.php

index b83ba4d..81f9615 100644 (file)
@@ -119,6 +119,7 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * Categoryfinder utility class no longer fails on invalid input or gives wrong
   results for category names that include pseudo-namespaces
 * (bug 17252) Galician numbering format
+* (bug 17146) Fix for UTF-8 and short word search for some possible MySQL configs
 
 == API changes in 1.15 ==
 * (bug 16858) Revamped list=deletedrevs to make listing deleted contributions
index c1bccd9..0d3c76e 100644 (file)
@@ -1564,7 +1564,7 @@ class Language {
                        $n = $minLength-1;
                        $out = preg_replace(
                                "/\b(\w{1,$n})\b/",
-                               "$1U800",
+                               "$1u800",
                                $out );
                }
                
@@ -1576,7 +1576,7 @@ class Language {
                // "example.wikipedia.com" and "192.168.83.1" as well.
                $out = preg_replace(
                        "/(\w)\.(\w|\*)/u",
-                       "$1U82e$2",
+                       "$1u82e$2",
                        $out );
                
                wfProfileOut( __METHOD__ );
@@ -1589,7 +1589,7 @@ class Language {
         * settings or anything else of the sort.
         */
        protected function stripForSearchCallback( $matches ) {
-               return 'U8' . bin2hex( $matches[1] );
+               return 'u8' . bin2hex( $matches[1] );
        }
        
        /**