/** @var Content|bool Content of the page (not text) */
private $content;
+ /** @var WikiPage **/
+ private $page;
+
/**
* Constructor
*
return;
}
- $page = WikiPage::newFromID( $this->id, WikiPage::READ_LATEST );
-
foreach ( SearchEngine::getSearchTypes() as $type ) {
$search = SearchEngine::create( $type );
- $indexTitle = $this->indexTitle( $search );
if ( !$search->supports( 'search-update' ) ) {
continue;
}
- $normalTitle = $search->normalizeText( $indexTitle );
+ $normalTitle = $this->getNormalizedTitle( $search );
- if ( $page === null ) {
+ if ( $this->getLatestPage() === null ) {
$search->delete( $this->id, $normalTitle );
continue;
} elseif ( $this->content === false ) {
# Strip all remaining non-search characters
$text = preg_replace( "/[^{$lc}]+/", " ", $text );
- # Handle 's, s'
- #
- # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
- # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
- #
- # These tail-anchored regexps are insanely slow. The worst case comes
- # when Japanese or Chinese text (ie, no word spacing) is written on
- # a wiki configured for Western UTF-8 mode. The Unicode characters are
- # expanded to hex codes and the "words" are very long paragraph-length
- # monstrosities. On a large page the above regexps may take over 20
- # seconds *each* on a 1GHz-level processor.
- #
- # Following are reversed versions which are consistently fast
- # (about 3 milliseconds on 1GHz-level processor).
- #
+ /**
+ * Handle 's, s'
+ *
+ * $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
+ * $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
+ *
+ * These tail-anchored regexps are insanely slow. The worst case comes
+ * when Japanese or Chinese text (ie, no word spacing) is written on
+ * a wiki configured for Western UTF-8 mode. The Unicode characters are
+ * expanded to hex codes and the "words" are very long paragraph-length
+ * monstrosities. On a large page the above regexps may take over 20
+ * seconds *each* on a 1GHz-level processor.
+ *
+ * Following are reversed versions which are consistently fast
+ * (about 3 milliseconds on 1GHz-level processor).
+ */
$text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
$text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
}
/**
- * Get a string representation of a title suitable for
+ * Get WikiPage for the SearchUpdate $id using WikiPage::READ_LATEST
+ * and ensure using the same WikiPage object if there are multiple
+ * SearchEngine types.
+ *
+ * Returns null if a page has been deleted or is not found.
+ *
+ * @return WikiPage|null
+ */
+ private function getLatestPage() {
+ if ( !isset( $this->page ) ) {
+ $this->page = WikiPage::newFromID( $this->id, WikiPage::READ_LATEST );
+ }
+
+ return $this->page;
+ }
+
+ /**
+ * Get a normalized string representation of a title suitable for
* including in a search index
*
* @param SearchEngine $search
* @return string A stripped-down title string ready for the search index
*/
- private function indexTitle( SearchEngine $search ) {
+ private function getNormalizedTitle( SearchEngine $search ) {
global $wgContLang;
$ns = $this->title->getNamespace();
if ( $ns == NS_FILE ) {
$t = preg_replace( "/ (png|gif|jpg|jpeg|ogg)$/", "", $t );
}
- return trim( $t );
+
+ return $search->normalizeText( trim( $t ) );
}
}