Merge maintenance-work branch:
[lhc/web/wiklou.git] / maintenance / updateSearchIndex.php
1 <?php
2 /**
3 * Script for periodic off-peak updating of the search index
4 *
5 * Usage: php updateSearchIndex.php [-s START] [-e END] [-p POSFILE] [-l LOCKTIME] [-q]
6 * Where START is the starting timestamp
7 * END is the ending timestamp
8 * POSFILE is a file to load timestamps from and save them to, searchUpdate.WIKI_ID.pos by default
9 * LOCKTIME is how long the searchindex and revision tables will be locked for
10 * -q means quiet
11 *
12 * @ingroup Maintenance
13 */
14
15 require_once( "Maintenance.php" );
16
17 class UpdateSearchIndex extends Maintenance {
18
19 public function __construct() {
20 parent::__construct();
21 $this->mDescription = "Script for periodic off-peak updating of the search index";
22 $this->addParam( 's', 'starting timestamp', false, true );
23 $this->addParam( 'e', 'Ending timestamp', false, true );
24 $this->addParam( 'p', 'File for saving/loading timestamps, searchUpdate.WIKI_ID.pos by default', false, true );
25 $this->addParam( 'l', 'How long the searchindex and revision tables will be locked for', false, true );
26 }
27
28 public function execute() {
29 $posFile = $this->getOption( 'p', 'searchUpdate.' . wfWikiId() . '.pos' );
30 $end = $this->getOption( 'e', wfTimestampNow() );
31 if ( $this->hasOption( 's' ) ) {
32 $start = $this->getOption('s');
33 } elseif( is_readable( 'searchUpdate.pos' ) ) {
34 # B/c to the old position file name which was hardcoded
35 # We can safely delete the file when we're done though.
36 $start = file_get_contents( 'searchUpdate.pos' );
37 unlink( 'searchUpdate.pos' );
38 } else {
39 $start = @file_get_contents( $posFile );
40 if ( !$start ) {
41 $start = wfTimestamp( TS_MW, time() - 86400 );
42 }
43 }
44 $lockTime = $this->getOption( 'l', 20 );
45
46 $this->updateSearchIndex( $start, $end, $lockTime );
47 $file = fopen( $posFile, 'w' );
48 fwrite( $file, $end );
49 fclose( $file );
50 }
51
52 private function updateSearchIndex( $start, $end, $maxLockTime ) {
53 global $wgDisableSearchUpdate;
54
55 $wgDisableSearchUpdate = false;
56
57 $dbw = wfGetDB( DB_MASTER );
58 $recentchanges = $dbw->tableName( 'recentchanges' );
59
60 $this->output( "Updating searchindex between $start and $end\n" );
61
62 # Select entries from recentchanges which are on top and between the specified times
63 $start = $dbw->strencode( $start );
64 $end = $dbw->strencode( $end );
65
66 $page = $dbw->tableName( 'page' );
67 $sql = "SELECT rc_cur_id,rc_type,rc_moved_to_ns,rc_moved_to_title FROM $recentchanges
68 JOIN $page ON rc_cur_id=page_id AND rc_this_oldid=page_latest
69 WHERE rc_timestamp BETWEEN '$start' AND '$end'
70 ";
71 $res = $dbw->query( $sql, __METHOD__ );
72
73
74 # Lock searchindex
75 if ( $maxLockTime ) {
76 $this->output( " --- Waiting for lock ---" );
77 $this->lockSearchindex( $dbw );
78 $lockTime = time();
79 $this->output( "\n" );
80 }
81
82 # Loop through the results and do a search update
83 while ( $row = $dbw->fetchObject( $res ) ) {
84 # Allow reads to be processed
85 if ( $maxLockTime && time() > $lockTime + $maxLockTime ) {
86 $this->output( " --- Relocking ---" );
87 $this->relockSearchindex( $dbw );
88 $lockTime = time();
89 $this->output( "\n" );
90 }
91 if ( $row->rc_type == RC_LOG ) {
92 continue;
93 } elseif ( $row->rc_type == RC_MOVE || $row->rc_type == RC_MOVE_OVER_REDIRECT ) {
94 # Rename searchindex entry
95 $titleObj = Title::makeTitle( $row->rc_moved_to_ns, $row->rc_moved_to_title );
96 $title = $titleObj->getPrefixedDBkey();
97 $this->output( "$title..." );
98 $u = new SearchUpdate( $row->rc_cur_id, $title, false );
99 $this->output( "\n" );
100 } else {
101 // Get current revision
102 $rev = Revision::loadFromPageId( $dbw, $row->rc_cur_id );
103 if( $rev ) {
104 $titleObj = $rev->getTitle();
105 $title = $titleObj->getPrefixedDBkey();
106 $this->output( $title );
107 # Update searchindex
108 $u = new SearchUpdate( $row->rc_cur_id, $titleObj->getText(), $rev->getText() );
109 $u->doUpdate();
110 $this->output( "\n" );
111 }
112 }
113 }
114
115 # Unlock searchindex
116 if ( $maxLockTime ) {
117 $this->output( " --- Unlocking --" );
118 $this->unlockSearchindex( $dbw );
119 $this->output( "\n" );
120 }
121 $this->output( "Done\n" );
122 }
123
124 /**
125 * Lock the search index
126 * @param &$db Database object
127 */
128 private function lockSearchindex( &$db ) {
129 $write = array( 'searchindex' );
130 $read = array( 'page', 'revision', 'text', 'interwiki' );
131 $items = array();
132
133 foreach( $write as $table ) {
134 $items[] = $db->tableName( $table ) . ' LOW_PRIORITY WRITE';
135 }
136 foreach( $read as $table ) {
137 $items[] = $db->tableName( $table ) . ' READ';
138 }
139 $sql = "LOCK TABLES " . implode( ',', $items );
140 $db->query( $sql, 'updateSearchIndex.php ' . __METHOD__ );
141 }
142
143 /**
144 * Unlock the tables
145 * @param &$db Database object
146 */
147 private function unlockSearchindex( &$db ) {
148 $db->query( "UNLOCK TABLES", 'updateSearchIndex.php ' . __METHOD__ );
149 }
150
151 /**
152 * Unlock and lock again
153 * Since the lock is low-priority, queued reads will be able to complete
154 * @param &$db Database object
155 */
156 private function relockSearchindex( &$db ) {
157 $this->unlockSearchindex( $db );
158 $this->lockSearchindex( $db );
159 }
160 }
161
162 $maintClass = "UpdateSearchIndex";
163 require_once( DO_MAINTENANCE );