3 // Alternate 1.4 -> 1.5 schema upgrade
4 // This does only the main tables + UTF-8
5 // and is designed to allow upgrades to interleave
6 // with other updates on the replication stream so
7 // that large wikis can be upgraded without disrupting
10 require_once( 'commandLine.inc' );
12 $upgrade = new FiveUpgrade();
16 function FiveUpgrade() {
18 $this->conversionTables
= $this->prepareWindows1252();
19 $this->dbw
=& $this->newConnection();
20 $this->dbr
=& $this->newConnection();
21 $this->dbr
->bufferResults( false );
30 * Open a second connection to the master server, with buffering off.
31 * This will let us stream large datasets in and write in chunks on the
36 function &newConnection() {
37 global $wgDBadminuser, $wgDBadminpassword;
38 global $wgDBserver, $wgDBname;
39 $db =& new Database( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
44 * Prepare a conversion array for converting Windows Code Page 1252 to
45 * UTF-8. This should provide proper conversion of text that was miscoded
46 * as Windows-1252 by naughty user-agents, and doesn't rely on an outside
52 function prepareWindows1252() {
54 # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
55 static $cp1252 = array(
56 0x80 => 0x20AC, #EURO SIGN
57 0x81 => UNICODE_REPLACEMENT
,
58 0x82 => 0x201A, #SINGLE LOW-9 QUOTATION MARK
59 0x83 => 0x0192, #LATIN SMALL LETTER F WITH HOOK
60 0x84 => 0x201E, #DOUBLE LOW-9 QUOTATION MARK
61 0x85 => 0x2026, #HORIZONTAL ELLIPSIS
62 0x86 => 0x2020, #DAGGER
63 0x87 => 0x2021, #DOUBLE DAGGER
64 0x88 => 0x02C6, #MODIFIER LETTER CIRCUMFLEX ACCENT
65 0x89 => 0x2030, #PER MILLE SIGN
66 0x8A => 0x0160, #LATIN CAPITAL LETTER S WITH CARON
67 0x8B => 0x2039, #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
68 0x8C => 0x0152, #LATIN CAPITAL LIGATURE OE
69 0x8D => UNICODE_REPLACEMENT
,
70 0x8E => 0x017D, #LATIN CAPITAL LETTER Z WITH CARON
71 0x8F => UNICODE_REPLACEMENT
,
72 0x90 => UNICODE_REPLACEMENT
,
73 0x91 => 0x2018, #LEFT SINGLE QUOTATION MARK
74 0x92 => 0x2019, #RIGHT SINGLE QUOTATION MARK
75 0x93 => 0x201C, #LEFT DOUBLE QUOTATION MARK
76 0x94 => 0x201D, #RIGHT DOUBLE QUOTATION MARK
77 0x95 => 0x2022, #BULLET
78 0x96 => 0x2013, #EN DASH
79 0x97 => 0x2014, #EM DASH
80 0x98 => 0x02DC, #SMALL TILDE
81 0x99 => 0x2122, #TRADE MARK SIGN
82 0x9A => 0x0161, #LATIN SMALL LETTER S WITH CARON
83 0x9B => 0x203A, #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
84 0x9C => 0x0153, #LATIN SMALL LIGATURE OE
85 0x9D => UNICODE_REPLACEMENT
,
86 0x9E => 0x017E, #LATIN SMALL LETTER Z WITH CARON
87 0x9F => 0x0178, #LATIN CAPITAL LETTER Y WITH DIAERESIS
90 for( $i = 0; $i < 0x100; $i++
) {
91 $unicode = isset( $cp1252[$i] ) ?
$cp1252[$i] : $i;
92 $pairs[chr( $i )] = codepointToUtf8( $unicode );
98 * Convert from 8-bit Windows-1252 to UTF-8 if necessary.
103 function conv( $text ) {
106 return strtr( $text, $this->conversionTables
);
113 * Dump timestamp and message to output
114 * @param string $message
117 function log( $message ) {
118 echo wfTimestamp( TS_DB
) . ': ' . $message . "\n";
123 * Initialize the chunked-insert system.
124 * Rows will be inserted in chunks of the given number, rather
125 * than in a giant INSERT...SELECT query, to keep the serialized
126 * MySQL database replication from getting hung up. This way other
127 * things can be going on during conversion without waiting for
128 * slaves to catch up as badly.
130 * @param int $chunksize Number of rows to insert at once
131 * @param int $final Total expected number of rows / id of last row,
132 * used for progress reports.
135 function setChunkScale( $chunksize, $final ) {
136 $this->chunkSize
= $chunksize;
137 $this->chunkFinal
= $final;
138 $this->chunkCount
= 0;
139 $this->chunkStartTime
= wfTime();
143 * Chunked inserts: perform an insert if we've reached the chunk limit.
144 * Prints a progress report with estimated completion time.
145 * @param string $table
146 * @param array &$chunk -- This will be emptied if an insert is done.
147 * @param string $fname function name to report in SQL
148 * @param int $key A key identifier to use in progress estimation in
149 * place of the number of rows inserted. Use this if
150 * you provided a max key number instead of a count
151 * as the final chunk number in setChunkScale()
154 function addChunk( $table, &$chunk, $fname, $key = null ) {
155 if( count( $chunk ) >= $this->chunkSize
) {
156 $this->insertChunk( $table, $chunk, $fname );
158 $this->chunkCount +
= count( $chunk );
160 $delta = $now - $this->chunkStartTime
;
161 $rate = $this->chunkCount
/ $delta;
163 if( is_null( $key ) ) {
164 $completed = $this->chunkCount
;
168 $portion = $completed / $this->chunkFinal
;
170 $estimatedTotalTime = $delta / $portion;
171 $eta = $now +
$estimatedTotalTime;
173 printf( "%s: %3.2f%% done on %s; ETA %s [%d/%d] %.2f/sec\n",
174 wfTimestamp( TS_DB
, intval( $now ) ),
177 wfTimestamp( TS_DB
, intval( $eta ) ),
188 * Chunked inserts: perform an insert unconditionally, at the end, and log.
189 * @param string $table
190 * @param array &$chunk -- This will be emptied if an insert is done.
191 * @param string $fname function name to report in SQL
194 function lastChunk( $table, &$chunk, $fname ) {
195 $n = count( $chunk );
197 $this->insertChunk( $table, $chunk, $fname );
199 $this->log( "100% done on $table (last chunk $n rows)." );
203 * Chunked inserts: perform an insert.
204 * @param string $table
205 * @param array &$chunk -- This will be emptied if an insert is done.
206 * @param string $fname function name to report in SQL
209 function insertChunk( $table, &$chunk, $fname ) {
210 $this->dbw
->insert( $table, $chunk, $fname );
213 function upgradePage() {
214 $fname = "FiveUpgrade::upgradePage";
217 $this->log( "...converting from cur/old to page/revision/text DB structure." );
219 extract( $this->dbw
->tableNames( 'cur', 'old', 'page', 'revision', 'text' ) );
221 $this->log( "Creating page and revision tables..." );
222 $this->dbw
->query("CREATE TABLE $page (
223 page_id int(8) unsigned NOT NULL auto_increment,
224 page_namespace int NOT NULL,
225 page_title varchar(255) binary NOT NULL,
226 page_restrictions tinyblob NOT NULL default '',
227 page_counter bigint(20) unsigned NOT NULL default '0',
228 page_is_redirect tinyint(1) unsigned NOT NULL default '0',
229 page_is_new tinyint(1) unsigned NOT NULL default '0',
230 page_random real unsigned NOT NULL,
231 page_touched char(14) binary NOT NULL default '',
232 page_latest int(8) unsigned NOT NULL,
233 page_len int(8) unsigned NOT NULL,
235 PRIMARY KEY page_id (page_id),
236 UNIQUE INDEX name_title (page_namespace,page_title),
239 ) TYPE=InnoDB", $fname );
240 $this->dbw
->query("CREATE TABLE $revision (
241 rev_id int(8) unsigned NOT NULL auto_increment,
242 rev_page int(8) unsigned NOT NULL,
243 rev_comment tinyblob NOT NULL default '',
244 rev_user int(5) unsigned NOT NULL default '0',
245 rev_user_text varchar(255) binary NOT NULL default '',
246 rev_timestamp char(14) binary NOT NULL default '',
247 rev_minor_edit tinyint(1) unsigned NOT NULL default '0',
248 rev_deleted tinyint(1) unsigned NOT NULL default '0',
250 PRIMARY KEY rev_page_id (rev_page, rev_id),
251 UNIQUE INDEX rev_id (rev_id),
252 INDEX rev_timestamp (rev_timestamp),
253 INDEX page_timestamp (rev_page,rev_timestamp),
254 INDEX user_timestamp (rev_user,rev_timestamp),
255 INDEX usertext_timestamp (rev_user_text,rev_timestamp)
256 ) TYPE=InnoDB", $fname );
258 $maxold = $this->dbw
->selectField( 'old', 'max(old_id)', '', $fname );
259 $this->log( "Last old record is {$maxold}" );
261 global $wgLegacySchemaConversion;
262 if( $wgLegacySchemaConversion ) {
263 // Create HistoryBlobCurStub entries.
264 // Text will be pulled from the leftover 'cur' table at runtime.
265 echo "......Moving metadata from cur; using blob references to text in cur table.\n";
266 $cur_text = "concat('O:18:\"historyblobcurstub\":1:{s:6:\"mCurId\";i:',cur_id,';}')";
267 $cur_flags = "'object'";
269 // Copy all cur text in immediately: this may take longer but avoids
270 // having to keep an extra table around.
271 echo "......Moving text from cur.\n";
272 $cur_text = 'cur_text';
276 $maxcur = $this->dbw
->selectField( 'cur', 'max(cur_id)', '', $fname );
277 $this->log( "Last cur entry is $maxcur" );
280 * Copy placeholder records for each page's current version into old
281 * Don't do any conversion here; text records are converted at runtime
282 * based on the flags (and may be originally binary!) while the meta
283 * fields will be converted in the old -> rev and cur -> page steps.
285 $this->setChunkScale( $chunksize, $maxcur );
286 $result = $this->dbr
->query(
287 "SELECT cur_id, cur_namespace, cur_title, $cur_text AS text, cur_comment,
288 cur_user, cur_user_text, cur_timestamp, cur_minor_edit, $cur_flags AS flags
290 ORDER BY cur_id", $fname );
292 while( $row = $this->dbr
->fetchObject( $result ) ) {
294 'old_namespace' => $row->cur_namespace
,
295 'old_title' => $row->cur_title
,
296 'old_text' => $row->text
,
297 'old_comment' => $row->cur_comment
,
298 'old_user' => $row->cur_user
,
299 'old_user_text' => $row->cur_user_text
,
300 'old_timestamp' => $row->cur_timestamp
,
301 'old_minor_edit' => $row->cur_minor_edit
,
302 'old_flags' => $row->flags
);
303 $this->addChunk( 'old', $add, $fname, $row->cur_id
);
305 $this->lastChunk( 'old', $add, $fname );
306 $this->dbr
->freeResult( $result );
309 * Copy revision metadata from old into revision.
310 * We'll also do UTF-8 conversion of usernames and comments.
312 $newmaxold = $this->dbw
->selectField( 'old', 'max(old_id)', '', $fname );
313 $this->setChunkScale( $chunksize, $newmaxold );
315 $this->log( "......Setting up revision table." );
316 $result = $this->dbr
->query(
317 "SELECT old_id, cur_id, old_comment, old_user, old_user_text,
318 old_timestamp, old_minor_edit
319 FROM $old,$cur WHERE old_namespace=cur_namespace AND old_title=cur_title
320 ORDER BY old_id", $fname );
323 while( $row = $this->dbr
->fetchObject( $result ) ) {
325 'rev_id' => $row->old_id
,
326 'rev_page' => $row->cur_id
,
327 'rev_comment' => $this->conv( $row->old_comment
),
328 'rev_user' => $row->old_user
,
329 'rev_user_text' => $this->conv( $row->old_user_text
),
330 'rev_timestamp' => $row->old_timestamp
,
331 'rev_minor_edit' => $row->old_minor_edit
);
332 $this->addChunk( 'revision', $add, $fname, $row->old_id
);
334 $this->lastChunk( 'revision', $add, $fname );
335 $this->dbr
->freeResult( $result );
339 * Copy revision metadata from cur into page.
340 * We'll also do UTF-8 conversion of titles.
342 $this->log( "......Setting up page table." );
343 $this->setChunkScale( $chunksize, $maxcur );
344 $result = $this->dbr
->query( "
345 SELECT cur_id, cur_namespace, cur_title, cur_restrictions, cur_counter, cur_is_redirect, cur_is_new,
346 cur_random, cur_touched, rev_id, LENGTH(cur_text) AS len
348 WHERE cur_id=rev_page AND rev_timestamp=cur_timestamp AND rev_id > {$maxold}
349 ORDER BY cur_id", $fname );
351 while( $row = $this->dbr
->fetchObject( $result ) ) {
353 'page_id' => $row->cur_id
,
354 'page_namespace' => $row->cur_namespace
,
355 'page_title' => $this->conv( $row->cur_title
),
356 'page_restrictions' => $row->cur_restrictions
,
357 'page_counter' => $row->cur_counter
,
358 'page_is_redirect' => $row->cur_is_redirect
,
359 'page_is_new' => $row->cur_is_new
,
360 'page_random' => $row->cur_random
,
361 'page_touched' => $this->dbw
->timestamp(),
362 'page_latest' => $row->rev_id
,
363 'page_len' => $row->len
);
364 $this->addChunk( 'page', $add, $fname, $row->cur_id
);
366 $this->lastChunk( 'page', $add, $fname );
367 $this->dbr
->freeResult( $result );
369 $this->log( "......Renaming old." );
370 $this->dbw
->query( "ALTER TABLE $old RENAME TO $text", $fname );
372 $this->log( "...done." );