Pass content to SearchUpdate rather than raw text

author Chad Horohoe <chadh@wikimedia.org>

Wed, 19 Jun 2013 17:55:33 +0000 (13:55 -0400)

committer Chad Horohoe <chadh@wikimedia.org>

Wed, 19 Jun 2013 20:40:46 +0000 (16:40 -0400)
author Chad Horohoe <chadh@wikimedia.org>
Wed, 19 Jun 2013 17:55:33 +0000 (13:55 -0400)
committer Chad Horohoe <chadh@wikimedia.org>
Wed, 19 Jun 2013 20:40:46 +0000 (16:40 -0400)
diff --git a/includes/WikiPage.php b/includes/WikiPage.php

index 9e6a0c8..398a424 100644 (file)
--- a/includes/WikiPage.php
+++ b/includes/WikiPage.php
@@ -2109,8 +2109,7 @@ class WikiPage implements Page, IDBAccessObject {
                 }
  
                 DeferredUpdates::addUpdate( new SiteStatsUpdate( 0, 1, $good, $total ) );
-               DeferredUpdates::addUpdate( new SearchUpdate( $id, $title, $content->getTextForSearchIndex() ) );
-               // @todo let the search engine decide what to do with the content object
+               DeferredUpdates::addUpdate( new SearchUpdate( $id, $title, $content ) );
  
                 // If this is another user's talk page, update newtalk.
                 // Don't do this if $options['changed'] = false (null-edits) nor if
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php

index 00f7923..bbd8886 100644 (file)
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -816,7 +816,8 @@ class SearchResult {
                 if ( !isset( $this->mText ) ) {
                         if ( $this->mRevision != null ) {
                                 //TODO: if we could plug in some code that knows about special content models *and* about
-                               //      special features of the search engine, the search could benefit.
+                               //      special features of the search engine, the search could benefit. See similar
+                               //      comment in SearchUpdate's constructor
                                 $content = $this->mRevision->getContent();
                                 $this->mText = $content ? $content->getTextForSearchIndex() : '';
                         } else { // TODO: can we fetch raw wikitext for commons images?
diff --git a/includes/search/SearchUpdate.php b/includes/search/SearchUpdate.php

index 759c7b9..a0e1ccf 100644 (file)
--- a/includes/search/SearchUpdate.php
+++ b/includes/search/SearchUpdate.php
@@ -33,7 +33,16 @@ class SearchUpdate implements DeferrableUpdate {
         private $mId = 0, $mNamespace, $mTitle, $mText;
         private $mTitleWords;
  
-       public function __construct( $id, $title, $text = false ) {
+       /**
+        * Constructor
+        *
+        * @param int $id Page id to update
+        * @param Title|string $title Title of page to update
+        * @param Content|string|false $content Content of the page to update.
+        *  If a Content object, text will be gotten from it. String is for back-compat.
+        *  Passing false tells the backend to just update the title, not the content
+        */
+       public function __construct( $id, $title, $content = false ) {
                 if ( is_string( $title ) ) {
                         $nt = Title::newFromText( $title );
                 } else {
@@ -42,7 +51,14 @@ class SearchUpdate implements DeferrableUpdate {
  
                 if ( $nt ) {
                         $this->mId = $id;
-                       $this->mText = $text;
+                       // @todo This isn't ideal, we'd really like to have content-specific
+                       // handling here. See similar content in SearchEngine::initText().
+                       if( is_string( $content ) ) {
+                               // b/c for ApprovedRevs
+                               $this->mText = $content;
+                       } else {
+                               $this->mText = $content ? $content->getTextForSearchIndex() : false;
+                       }
  
                         $this->mNamespace = $nt->getNamespace();
                         $this->mTitle = $nt->getText(); # Discard namespace
diff --git a/maintenance/Maintenance.php b/maintenance/Maintenance.php

index a2bbec5..e29ffd9 100644 (file)
--- a/maintenance/Maintenance.php
+++ b/maintenance/Maintenance.php
@@ -1155,8 +1155,7 @@ abstract class Maintenance {
                         $title = $titleObj->getPrefixedDBkey();
                         $this->output( "$title..." );
                         # Update searchindex
-                       # TODO: pass the Content object to SearchUpdate, let the search engine decide how to deal with it.
-                       $u = new SearchUpdate( $pageId, $titleObj->getText(), $rev->getContent()->getTextForSearchIndex() );
+                       $u = new SearchUpdate( $pageId, $titleObj->getText(), $rev->getContent() );
                         $u->doUpdate();
                         $this->output( "\n" );
                 }
diff --git a/maintenance/rebuildtextindex.php b/maintenance/rebuildtextindex.php

index a31aba2..c651f72 100644 (file)
--- a/maintenance/rebuildtextindex.php
+++ b/maintenance/rebuildtextindex.php
@@ -115,9 +115,8 @@ class RebuildTextIndex extends Maintenance {
  
                                         $rev = new Revision( $s );
                                         $content = $rev->getContent();
-                                       $text = $content->getTextForSearchIndex();
  
-                                       $u = new SearchUpdate( $s->page_id, $title, $text );
+                                       $u = new SearchUpdate( $s->page_id, $title, $content );
                                         $u->doUpdate();
                                 } catch ( MWContentSerializationException $ex ) {
                                         $this->output( "Failed to deserialize content of revision {$s->rev_id} of page "
author	Chad Horohoe <chadh@wikimedia.org>
	Wed, 19 Jun 2013 17:55:33 +0000 (13:55 -0400)
committer	Chad Horohoe <chadh@wikimedia.org>
	Wed, 19 Jun 2013 20:40:46 +0000 (16:40 -0400)
includes/WikiPage.php		patch \| blob \| history
includes/search/SearchEngine.php		patch \| blob \| history
includes/search/SearchUpdate.php		patch \| blob \| history
maintenance/Maintenance.php		patch \| blob \| history
maintenance/rebuildtextindex.php		patch \| blob \| history