make rebuildtextindex script aware of content models.
authordaniel <daniel.kinzler@wikimedia.de>
Thu, 13 Dec 2012 20:48:36 +0000 (21:48 +0100)
committerdaniel <daniel.kinzler@wikimedia.de>
Thu, 13 Dec 2012 20:48:36 +0000 (21:48 +0100)
Change-Id: Ia976ce975e3c64a54e1d4b27c52bccaa07754041

includes/search/SearchUpdate.php
maintenance/rebuildtextindex.php

index 40dd36c..389c317 100644 (file)
@@ -34,7 +34,12 @@ class SearchUpdate implements DeferrableUpdate {
        private $mTitleWords;
 
        function __construct( $id, $title, $text = false ) {
-               $nt = Title::newFromText( $title );
+               if ( is_string( $title ) ) {
+                       $nt = Title::newFromText( $title );
+               } else {
+                       $nt = $title;
+               }
+
                if( $nt ) {
                        $this->mId = $id;
                        $this->mText = $text;
index 41b245f..e5b08f8 100644 (file)
@@ -86,28 +86,45 @@ class RebuildTextIndex extends Maintenance {
         * Populates the search index with content from all pages
         */
        protected function populateSearchIndex() {
+               global $wgContentHandlerUseDB;
+
                $res = $this->db->select( 'page', 'MAX(page_id) AS count' );
                $s = $this->db->fetchObject( $res );
                $count = $s->count;
                $this->output( "Rebuilding index fields for {$count} pages...\n" );
                $n = 0;
 
+               $fields = array_merge(
+                       Revision::selectPageFields(),
+                       Revision::selectFields(),
+                       Revision::selectTextFields()
+               );
+
                while ( $n < $count ) {
                        if ( $n ) {
                                $this->output( $n . "\n" );
                        }
                        $end = $n + self::RTI_CHUNK_SIZE - 1;
 
-                       $res = $this->db->select( array( 'page', 'revision', 'text' ),
-                               array( 'page_id', 'page_namespace', 'page_title', 'old_flags', 'old_text' ),
+                       $res = $this->db->select( array( 'page', 'revision', 'text' ), $fields,
                                array( "page_id BETWEEN $n AND $end", 'page_latest = rev_id', 'rev_text_id = old_id' ),
                                __METHOD__
-                               );
+                       );
 
                        foreach ( $res as $s ) {
-                               $revtext = Revision::getRevisionText( $s );
-                               $u = new SearchUpdate( $s->page_id, $s->page_title, $revtext );
-                               $u->doUpdate();
+                               try {
+                                       $title = Title::makeTitle( $s->page_namespace, $s->page_title );
+
+                                       $rev = new Revision( $s );
+                                       $content = $rev->getContent();
+                                       $text = $content->getTextForSearchIndex();
+
+                                       $u = new SearchUpdate( $s->page_id, $title, $text );
+                                       $u->doUpdate();
+                               } catch ( MWContentSerializationException $ex ) {
+                                       $this->output( "Failed to deserialize content of revision {$s->rev_id} of page "
+                                               . "`" . $title->getPrefixedDBkey() . "`!\n" );
+                               }
                        }
                        $n += self::RTI_CHUNK_SIZE;
                }