X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fdeferred%2FSearchUpdate.php;h=2abf02875e6f64641d7201c780aa2cf7fc532071;hb=f0a28890a000b2c3259438a6b733ed46749c6e2a;hp=ba14f09947a21b6119a5229582356a60946da066;hpb=96366c1cf1966ca07e8f0b4361bbca28bfaf43a0;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/deferred/SearchUpdate.php b/includes/deferred/SearchUpdate.php index ba14f09947..2abf02875e 100644 --- a/includes/deferred/SearchUpdate.php +++ b/includes/deferred/SearchUpdate.php @@ -38,6 +38,9 @@ class SearchUpdate implements DeferrableUpdate { /** @var Content|bool Content of the page (not text) */ private $content; + /** @var WikiPage **/ + private $page; + /** * Constructor * @@ -78,18 +81,15 @@ class SearchUpdate implements DeferrableUpdate { return; } - $page = WikiPage::newFromID( $this->id, WikiPage::READ_LATEST ); - foreach ( SearchEngine::getSearchTypes() as $type ) { $search = SearchEngine::create( $type ); - $indexTitle = $this->indexTitle( $search ); if ( !$search->supports( 'search-update' ) ) { continue; } - $normalTitle = $search->normalizeText( $indexTitle ); + $normalTitle = $this->getNormalizedTitle( $search ); - if ( $page === null ) { + if ( $this->getLatestPage() === null ) { $search->delete( $this->id, $normalTitle ); continue; } elseif ( $this->content === false ) { @@ -148,21 +148,22 @@ class SearchUpdate implements DeferrableUpdate { # Strip all remaining non-search characters $text = preg_replace( "/[^{$lc}]+/", " ", $text ); - # Handle 's, s' - # - # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); - # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); - # - # These tail-anchored regexps are insanely slow. The worst case comes - # when Japanese or Chinese text (ie, no word spacing) is written on - # a wiki configured for Western UTF-8 mode. The Unicode characters are - # expanded to hex codes and the "words" are very long paragraph-length - # monstrosities. On a large page the above regexps may take over 20 - # seconds *each* on a 1GHz-level processor. - # - # Following are reversed versions which are consistently fast - # (about 3 milliseconds on 1GHz-level processor). - # + /** + * Handle 's, s' + * + * $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); + * $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); + * + * These tail-anchored regexps are insanely slow. The worst case comes + * when Japanese or Chinese text (ie, no word spacing) is written on + * a wiki configured for Western UTF-8 mode. The Unicode characters are + * expanded to hex codes and the "words" are very long paragraph-length + * monstrosities. On a large page the above regexps may take over 20 + * seconds *each* on a 1GHz-level processor. + * + * Following are reversed versions which are consistently fast + * (about 3 milliseconds on 1GHz-level processor). + */ $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); @@ -173,13 +174,30 @@ class SearchUpdate implements DeferrableUpdate { } /** - * Get a string representation of a title suitable for + * Get WikiPage for the SearchUpdate $id using WikiPage::READ_LATEST + * and ensure using the same WikiPage object if there are multiple + * SearchEngine types. + * + * Returns null if a page has been deleted or is not found. + * + * @return WikiPage|null + */ + private function getLatestPage() { + if ( !isset( $this->page ) ) { + $this->page = WikiPage::newFromID( $this->id, WikiPage::READ_LATEST ); + } + + return $this->page; + } + + /** + * Get a normalized string representation of a title suitable for * including in a search index * * @param SearchEngine $search * @return string A stripped-down title string ready for the search index */ - private function indexTitle( SearchEngine $search ) { + private function getNormalizedTitle( SearchEngine $search ) { global $wgContLang; $ns = $this->title->getNamespace(); @@ -199,6 +217,7 @@ class SearchUpdate implements DeferrableUpdate { if ( $ns == NS_FILE ) { $t = preg_replace( "/ (png|gif|jpg|jpeg|ogg)$/", "", $t ); } - return trim( $t ); + + return $search->normalizeText( trim( $t ) ); } }