No jquery.ui for now.
[lhc/web/wiklou.git] / maintenance / convertLinks.inc
1 <?php
2 /**
3 * @file
4 * @todo document
5 * @ingroup Maintenance
6 */
7
8 /** */
9 function convertLinks() {
10 global $wgDBtype;
11 if( $wgDBtype == 'postgres' ) {
12 wfOut( "Links table already ok on Postgres.\n" );
13 return;
14 }
15
16 wfOut( "Converting links table to ID-ID...\n" );
17
18 global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
19 global $noKeys, $logPerformance, $fh;
20
21 $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc
22 $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
23
24 $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table
25 $curReadReportInterval = 1000; #number of rows between progress reports
26
27 $reportLinksConvProgress = true; #whether or not to give progress reports during conversion
28 $linksConvInsertInterval = 1000; #number of rows per INSERT
29
30 $initialRowOffset = 0;
31 #$finalRowOffset = 0; # not used yet; highest row number from links table to process
32
33 # Overwrite the old links table with the new one. If this is set to false,
34 # the new table will be left at links_temp.
35 $overwriteLinksTable = true;
36
37 # Don't create keys, and so allow duplicates in the new links table.
38 # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?)
39 $noKeys = false;
40
41
42 $logPerformance = false; # output performance data to a file
43 $perfLogFilename = "convLinksPerf.txt";
44 #--------------------------------------------------------------------
45
46 $dbw = wfGetDB( DB_MASTER );
47 list ($cur, $links, $links_temp, $links_backup) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
48
49 // Get database-agnostic limit clause
50 $sql_limit = $dbw->limitResult( "SELECT l_from FROM $links", 1 );
51 $res = $dbw->query( $sql_limit );
52 if ( $dbw->fieldType( $res, 0 ) == "int" ) {
53 wfOut( "Schema already converted\n" );
54 return;
55 }
56
57 $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
58 $row = $dbw->fetchObject($res);
59 $numRows = $row->count;
60 $dbw->freeResult( $res );
61
62 if ( $numRows == 0 ) {
63 wfOut( "Updating schema (no rows to convert)...\n" );
64 createTempTable();
65 } else {
66 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); }
67 $baseTime = $startTime = getMicroTime();
68 # Create a title -> cur_id map
69 wfOut( "Loading IDs from $cur table...\n" );
70 performanceLog ( "Reading $numRows rows from cur table...\n" );
71 performanceLog ( "rows read vs seconds elapsed:\n" );
72
73 $dbw->bufferResults( false );
74 $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
75 $ids = array();
76
77 while ( $row = $dbw->fetchObject( $res ) ) {
78 $title = $row->cur_title;
79 if ( $row->cur_namespace ) {
80 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title";
81 }
82 $ids[$title] = $row->cur_id;
83 $curRowsRead++;
84 if ($reportCurReadProgress) {
85 if (($curRowsRead % $curReadReportInterval) == 0) {
86 performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" );
87 wfOut( "\t$curRowsRead rows of $cur table read.\n" );
88 }
89 }
90 }
91 $dbw->freeResult( $res );
92 $dbw->bufferResults( true );
93 wfOut( "Finished loading IDs.\n\n" );
94 performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" );
95 #--------------------------------------------------------------------
96
97 # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
98 # convert, and write to the new table.
99 createTempTable();
100 performanceLog( "Resetting timer.\n\n" );
101 $baseTime = getMicroTime();
102 wfOut( "Processing $numRows rows from $links table...\n" );
103 performanceLog( "Processing $numRows rows from $links table...\n" );
104 performanceLog( "rows inserted vs seconds elapsed:\n" );
105
106 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) {
107 $sqlRead = "SELECT * FROM $links ";
108 $sqlRead = $dbw->limitResult($sqlRead, $linksConvInsertInterval,$rowOffset);
109 $res = $dbw->query($sqlRead);
110 if ( $noKeys ) {
111 $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES ");
112 } else {
113 $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES ");
114 }
115
116 $tuplesAdded = 0; # no tuples added to INSERT yet
117 while ( $row = $dbw->fetchObject($res) ) {
118 $fromTitle = $row->l_from;
119 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
120 $from = $ids[$fromTitle];
121 $to = $row->l_to;
122 if ( $tuplesAdded != 0 ) {
123 $sqlWrite[] = ",";
124 }
125 $sqlWrite[] = "($from,$to)";
126 $tuplesAdded++;
127 } else { # invalid title
128 $numBadLinks++;
129 }
130 }
131 $dbw->freeResult($res);
132 #wfOut( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" );
133 if ( $tuplesAdded != 0 ) {
134 if ($reportLinksConvProgress) {
135 wfOut( "Inserting $tuplesAdded tuples into $links_temp..." );
136 }
137 $dbw->query( implode("",$sqlWrite) );
138 $totalTuplesInserted += $tuplesAdded;
139 if ($reportLinksConvProgress)
140 wfOut( " done. Total $totalTuplesInserted tuples inserted.\n" );
141 performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n" );
142 }
143 }
144 wfOut( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" );
145 performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
146 performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" );
147 if ( $logPerformance ) { fclose ( $fh ); }
148 }
149 #--------------------------------------------------------------------
150
151 if ( $overwriteLinksTable ) {
152 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
153 if (!($dbConn->isOpen())) {
154 wfOut( "Opening connection to database failed.\n" );
155 return;
156 }
157 # Check for existing links_backup, and delete it if it exists.
158 wfOut( "Dropping backup links table if it exists..." );
159 $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER);
160 wfOut( " done.\n" );
161
162 # Swap in the new table, and move old links table to links_backup
163 wfOut( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
164 $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER );
165 wfOut( " done.\n\n" );
166
167 $dbConn->close();
168 wfOut( "Conversion complete. The old table remains at $links_backup;\n" );
169 wfOut( "delete at your leisure.\n" );
170 } else {
171 wfOut( "Conversion complete. The converted table is at $links_temp;\n" );
172 wfOut( "the original links table is unchanged.\n" );
173 }
174 }
175
176 #--------------------------------------------------------------------
177
178 function createTempTable() {
179 global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
180 global $noKeys;
181 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
182
183 if (!($dbConn->isOpen())) {
184 wfOut( "Opening connection to database failed.\n" );
185 return;
186 }
187 $links_temp = $dbConn->tableName( 'links_temp' );
188
189 wfOut( "Dropping temporary links table if it exists..." );
190 $dbConn->query( "DROP TABLE IF EXISTS $links_temp");
191 wfOut( " done.\n" );
192
193 wfOut( "Creating temporary links table..." );
194 if ( $noKeys ) {
195 $dbConn->query( "CREATE TABLE $links_temp ( " .
196 "l_from int(8) unsigned NOT NULL default '0', " .
197 "l_to int(8) unsigned NOT NULL default '0')");
198 } else {
199 $dbConn->query( "CREATE TABLE $links_temp ( " .
200 "l_from int(8) unsigned NOT NULL default '0', " .
201 "l_to int(8) unsigned NOT NULL default '0', " .
202 "UNIQUE KEY l_from(l_from,l_to), " .
203 "KEY (l_to))");
204 }
205 wfOut( " done.\n\n" );
206 }
207
208 function performanceLog( $text ) {
209 global $logPerformance, $fh;
210 if ( $logPerformance ) {
211 fwrite( $fh, $text );
212 }
213 }
214
215 function getMicroTime() { # return time in seconds, with microsecond accuracy
216 list($usec, $sec) = explode(" ", microtime());
217 return ((float)$usec + (float)$sec);
218 }