From 920eb84f1fe03f4614d4ff998dc9321432822cd9 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 19 Dec 2008 01:50:07 +0000 Subject: [PATCH] * (bug 15027) Internet domain names and IP addresses can now be indexed and searched sensibly with the default MySQL search backend. Previously things like "192.168.1.1" couldn't be searched very cleanly in the MySQL backend for two reasons: * First, the periods were stripped out. This resulted in it being broken into multiple short words: "192 168 1 1", leading at best to false positives and general weirdness. * Second, for IP addresses these were shorter than the default minimum word length of 4 and thus didn't even get indexed! The addition of padding for short words let them at least get indexed, but they still didn't turn up cleanly due to the word split. Now allowing periods through to the indexed text, and encoding periods that appear within a compound word so they get caught more cleanly. Also made a tweak so highlighting works a bit better on word boundaries -- eg "192.168.1.1" no longer hits a highlight match for "192.168.1.100". However it's still not 100% handling some cases with the periods. Sigh. --- RELEASE-NOTES | 2 ++ includes/SearchEngine.php | 2 +- includes/SearchMySQL.php | 6 +++++- languages/Language.php | 11 +++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index edb8d8c87a..6870151016 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -428,6 +428,8 @@ The following extensions are migrated into MediaWiki 1.14: DB environments when $wgDBserver isn't set. * (bug 3691) Aspect ratio from viewBox attribute is now preserved for SVG images which do not specify width and height attributes. +* (bug 15027) Internet domain names and IP addresses can now be indexed and + searched sensibly with the default MySQL search backend. === API changes in 1.14 === diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php index 66e8d9bb8a..9aee994538 100644 --- a/includes/SearchEngine.php +++ b/includes/SearchEngine.php @@ -150,7 +150,7 @@ class SearchEngine { } public static function legalSearchChars() { - return "A-Za-z_'0-9\\x80-\\xFF\\-"; + return "A-Za-z_'.0-9\\x80-\\xFF\\-"; } /** diff --git a/includes/SearchMySQL.php b/includes/SearchMySQL.php index f9b71c8ecd..984fcd892a 100644 --- a/includes/SearchMySQL.php +++ b/includes/SearchMySQL.php @@ -54,7 +54,11 @@ class SearchMySQL extends SearchEngine { if( !empty( $terms[3] ) ) { // Match individual terms in result highlighting... $regexp = preg_quote( $terms[3], '/' ); - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; + if( $terms[4] ) { + $regexp = "\b$regexp"; // foo* + } else { + $regexp = "\b$regexp\b"; + } } else { // Match the quoted term in result highlighting... $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); diff --git a/languages/Language.php b/languages/Language.php index 5aa1650da6..b238c5fb9b 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -1549,6 +1549,17 @@ class Language { $out ); } + // Periods within things like hostnames and IP addresses + // are also important -- we want a search for "example.com" + // or "192.168.1.1" to work sanely. + // + // MySQL's search seems to ignore them, so you'd match on + // "example.wikipedia.com" and "192.168.83.1" as well. + $out = preg_replace( + "/(\w)\.(\w|\*)/u", + "$1U82e$2", + $out ); + wfProfileOut( __METHOD__ ); return $out; } -- 2.20.1