I think the bug was introduced during a cleanup in Iaabc10c.
I don't think that " should be part of the legalSearchChars at query
time, it seems to break the regex.
The strategy here is to distinguish legalSearchChars used query time vs
the ones used at index time by introducing:
SearchEngine::legalSearchCharsForUpdate()
Bug: T167798
Change-Id: I61dc53665e26d3c6c48caed78dd3bbde9a33def7
# Language-specific strip/conversion
$text = $wgContLang->normalizeForSearch( $text );
$se = $se ?: MediaWikiServices::getInstance()->newSearchEngine();
- $lc = $se->legalSearchChars() . '&#;';
+ $lc = $se->legalSearchCharsForUpdate() . '&#;';
$text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/",
' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup
$ns = $this->title->getNamespace();
$title = $this->title->getText();
- $lc = $search->legalSearchChars() . '&#;';
+ $lc = $search->legalSearchCharsForUpdate() . '&#;';
$t = $wgContLang->normalizeForSearch( $title );
$t = preg_replace( "/[^{$lc}]+/", ' ', $t );
$t = $wgContLang->lc( $t );
}
/**
- * Get chars legal for search.
+ * Get chars legal for search (at query time).
* NOTE: usage as static is deprecated and preserved only as BC measure
* @return string
*/
return "A-Za-z_'.0-9\\x80-\\xFF\\-";
}
+ /**
+ * Get chars legal for search (at index time).
+ *
+ * @since 1.30
+ * @return string
+ */
+ public function legalSearchCharsForUpdate() {
+ return static::legalSearchChars();
+ }
+
/**
* Set the maximum number of results to return
* and how many to skip before returning the first.
return $regex;
}
- public static function legalSearchChars() {
- return "\"*" . parent::legalSearchChars();
+ public function legalSearchCharsForUpdate() {
+ return "\"*" . parent::legalSearchCharsForUpdate();
}
/**
[] );
}
- public static function legalSearchChars() {
- return "\"" . parent::legalSearchChars();
+ public function legalSearchCharsForUpdate() {
+ return "\"" . parent::legalSearchCharsForUpdate();
}
}
return $regex;
}
- public static function legalSearchChars() {
- return "\"*" . parent::legalSearchChars();
+ public function legalSearchCharsForUpdate() {
+ return "\"*" . parent::legalSearchCharsForUpdate();
}
/**
"Plain search failed" );
}
+ public function testPhraseSearch() {
+ $res = $this->search->searchText( '"smithee is one who smiths"' );
+ $this->assertEquals(
+ [ 'Smithee' ],
+ $this->fetchIds( $res ),
+ "Phrase search failed" );
+ $res = $this->search->searchText( '"smithee is one who smiths"' );
+ $match = $res->next();
+ $terms = [ 'smithee', 'is', 'one', 'who', 'smiths' ];
+ $snippet = "";
+ foreach ( $terms as $term ) {
+ $snippet .= " <span class='searchmatch'>" . $term . "</span>";
+ }
+ $this->assertRegexp( '/' . preg_quote( $snippet, '/' ) . '/',
+ $match->getTextSnippet( $res->termMatches() ),
+ "Phrase search failed to highlight" );
+ }
+
public function testTextPowerSearch() {
$this->search->setNamespaces( [ 0, 1, 4 ] );
$this->assertEquals(