Allow SearchUpdate hook to abort core update call

author Chad Horohoe <chadh@wikimedia.org>

Thu, 20 Jun 2013 22:50:31 +0000 (18:50 -0400)

committer Chad Horohoe <chadh@wikimedia.org>

Fri, 21 Jun 2013 18:00:45 +0000 (14:00 -0400)
author Chad Horohoe <chadh@wikimedia.org>
Thu, 20 Jun 2013 22:50:31 +0000 (18:50 -0400)
committer Chad Horohoe <chadh@wikimedia.org>
Fri, 21 Jun 2013 18:00:45 +0000 (14:00 -0400)
diff --git a/docs/hooks.txt b/docs/hooks.txt

index e5444ce..f2c10ce 100644 (file)
--- a/docs/hooks.txt
+++ b/docs/hooks.txt
@@ -1949,11 +1949,12 @@ $data: the data stored in old_text.  The meaning depends on $flags: if external
  $flags: a comma-delimited list of strings representing the options used.  May
    include: utf8 (this will always be set for new revisions); gzip; external.
  
  $flags: a comma-delimited list of strings representing the options used.  May
    include: utf8 (this will always be set for new revisions); gzip; external.
  
-'SearchUpdate': Prior to search update completion.
+'SearchUpdate': Prior to search update completion. Return false to stop any
+further text/content processing
  $id : Page id
  $id : Page id
-$namespace : Page namespace
-$title : Page title
+$title : Title object
  $text : Current text being indexed
  $text : Current text being indexed
+$content : Content object for text being indexed.
  
  'SearchGetNearMatchBefore': Perform exact-title-matches in "go" searches before
  the normal operations.
  
  'SearchGetNearMatchBefore': Perform exact-title-matches in "go" searches before
  the normal operations.
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php

index bbd8886..4f4e31d 100644 (file)
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -523,6 +523,20 @@ class SearchEngine {
                         return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
                 }
         }
                         return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
                 }
         }
+
+       /**
+        * Get the raw text for updating the index from a content object
+        * Nicer search backends could possibly do something cooler than
+        * just returning raw text
+        *
+        * @todo This isn't ideal, we'd really like to have content-specific handling here
+        * @param Title $t Title we're indexing
+        * @param Content $c Content of the page to index
+        * @return string
+        */
+       public function getTextFromContent( Title $t, Content $c = null ) {
+               return $c ? $c->getTextForSearchIndex() : '';
+       }
  }
  
  /**
  }
  
  /**
@@ -815,11 +829,8 @@ class SearchResult {
         protected function initText() {
                 if ( !isset( $this->mText ) ) {
                         if ( $this->mRevision != null ) {
         protected function initText() {
                 if ( !isset( $this->mText ) ) {
                         if ( $this->mRevision != null ) {
-                               //TODO: if we could plug in some code that knows about special content models *and* about
-                               //      special features of the search engine, the search could benefit. See similar
-                               //      comment in SearchUpdate's constructor
-                               $content = $this->mRevision->getContent();
-                               $this->mText = $content ? $content->getTextForSearchIndex() : '';
+                               $this->mText = SearchEngine::create()
+                                       ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
                         } else { // TODO: can we fetch raw wikitext for commons images?
                                 $this->mText = '';
                         }
                         } else { // TODO: can we fetch raw wikitext for commons images?
                                 $this->mText = '';
                         }
diff --git a/includes/search/SearchUpdate.php b/includes/search/SearchUpdate.php

index 10dc0a0..7146917 100644 (file)
--- a/includes/search/SearchUpdate.php
+++ b/includes/search/SearchUpdate.php
@@ -36,32 +36,27 @@ class SearchUpdate implements DeferrableUpdate {
         private $id = 0;
  
         /**
         private $id = 0;
  
         /**
-        * Namespace of page being updated
-        * @var int
-        */
-       private $namespace;
-
-       /**
-        * Title we're updating (without namespace)
-        * @var string
+        * Title we're updating
+        * @var Title
          */
         private $title;
  
         /**
          */
         private $title;
  
         /**
-        * Raw text to put into the index
+        * Content of the page (not text)
+        * @var Content|false
          */
          */
-       private $text;
+       private $content;
  
         /**
          * Constructor
          *
          * @param int $id Page id to update
          * @param Title|string $title Title of page to update
  
         /**
          * Constructor
          *
          * @param int $id Page id to update
          * @param Title|string $title Title of page to update
-        * @param Content|string|false $content Content of the page to update.
+        * @param Content|string|false $c Content of the page to update.
          *  If a Content object, text will be gotten from it. String is for back-compat.
          *  Passing false tells the backend to just update the title, not the content
          */
          *  If a Content object, text will be gotten from it. String is for back-compat.
          *  Passing false tells the backend to just update the title, not the content
          */
-       public function __construct( $id, $title, $content = false ) {
+       public function __construct( $id, $title, $c = false ) {
                 if ( is_string( $title ) ) {
                         $nt = Title::newFromText( $title );
                 } else {
                 if ( is_string( $title ) ) {
                         $nt = Title::newFromText( $title );
                 } else {
@@ -70,17 +65,13 @@ class SearchUpdate implements DeferrableUpdate {
  
                 if ( $nt ) {
                         $this->id = $id;
  
                 if ( $nt ) {
                         $this->id = $id;
-                       // @todo This isn't ideal, we'd really like to have content-specific
-                       // handling here. See similar content in SearchEngine::initText().
-                       if( is_string( $content ) ) {
-                               // b/c for ApprovedRevs
-                               $this->text = $content;
+                       // is_string() check is back-compat for ApprovedRevs
+                       if( is_string( $c ) ) {
+                               $this->content = new TextContent( $c );
                         } else {
                         } else {
-                               $this->text = $content ? $content->getTextForSearchIndex() : false;
+                               $this->content = $c ?: false;
                         }
                         }
-
-                       $this->namespace = $nt->getNamespace();
-                       $this->title = $nt->getText(); # Discard namespace
+                       $this->title = $nt;
                 } else {
                         wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
                 }
                 } else {
                         wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
                 }
@@ -99,21 +90,23 @@ class SearchUpdate implements DeferrableUpdate {
                 wfProfileIn( __METHOD__ );
  
                 $search = SearchEngine::create();
                 wfProfileIn( __METHOD__ );
  
                 $search = SearchEngine::create();
-               $normalTitle = $search->normalizeText( Title::indexTitle( $this->namespace, $this->title ) );
+               $normalTitle = $search->normalizeText(
+                       Title::indexTitle( $this->title->getNamespace(), $this->title->getText() ) );
  
                 if ( WikiPage::newFromId( $this->id ) === null ) {
                         $search->delete( $this->id, $normalTitle );
                         wfProfileOut( __METHOD__ );
                         return;
  
                 if ( WikiPage::newFromId( $this->id ) === null ) {
                         $search->delete( $this->id, $normalTitle );
                         wfProfileOut( __METHOD__ );
                         return;
-               } elseif ( $this->text === false ) {
+               } elseif ( $this->content === false ) {
                         $search->updateTitle( $this->id, $normalTitle );
                         wfProfileOut( __METHOD__ );
                         return;
                 }
  
                         $search->updateTitle( $this->id, $normalTitle );
                         wfProfileOut( __METHOD__ );
                         return;
                 }
  
-               $text = self::updateText( $this->text );
-
-               wfRunHooks( 'SearchUpdate', array( $this->id, $this->namespace, $this->title, &$text ) );
+               $text = $search->getTextFromContent( $this->title, $this->content );
+               if( wfRunHooks( 'SearchUpdate', array( $this->id, $this->title, &$text, $this->content ) ) ) {
+                       $text = self::updateText( $text );
+               }
  
                 # Perform the actual update
                 $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );
  
                 # Perform the actual update
                 $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );
author	Chad Horohoe <chadh@wikimedia.org>
	Thu, 20 Jun 2013 22:50:31 +0000 (18:50 -0400)
committer	Chad Horohoe <chadh@wikimedia.org>
	Fri, 21 Jun 2013 18:00:45 +0000 (14:00 -0400)
docs/hooks.txt		patch \| blob \| history
includes/search/SearchEngine.php		patch \| blob \| history
includes/search/SearchUpdate.php		patch \| blob \| history