Require dbname and user, fix sequences, updates schema a bit, other minor updates.
[lhc/web/wiklou.git] / maintenance / postgres / wp_mysql2postgres.pl
1 #!/usr/bin/perl
2
3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
4 ## svn: $Id$
5
6 use strict;
7 use warnings;
8 use Data::Dumper;
9 use Getopt::Long;
10
11 use vars qw(%table %tz %special @torder $COM);
12 my $VERSION = "1.1";
13
14 ## The following options can be changed via command line arguments:
15 my $MYSQLDB = '';
16 my $MYSQLUSER = '';
17
18 ## If the following are zero-length, we omit their arguments entirely:
19 my $MYSQLHOST = '';
20 my $MYSQLPASSWORD = '';
21 my $MYSQLSOCKET = '';
22
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = "mediawiki_upgrade.pg";
25
26 ## How verbose should this script be (0, 1, or 2)
27 my $verbose = 0;
28
29 my $help = 0;
30
31 my $USAGE = "
32 Usage: $0 --db=<dbname> --user=<user> [OPTION]...
33 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
34
35 Converts a MediaWiki schema from MySQL to Postgres
36 Options:
37 db Name of the MySQL database
38 user MySQL database username
39 pass MySQL database password
40 host MySQL database host
41 socket MySQL database socket
42 verbose Verbosity, increases with multiple uses
43 ";
44
45 GetOptions
46 (
47 "db=s" => \$MYSQLDB,
48 "user=s" => \$MYSQLUSER,
49 "pass=s" => \$MYSQLPASSWORD,
50 "host=s" => \$MYSQLHOST,
51 "socket=s" => \$MYSQLSOCKET,
52 "verbose+" => \$verbose,
53 "help" => \$help,
54 );
55
56 die $USAGE
57 if ! length $MYSQLDB
58 or ! length $MYSQLUSER
59 or $help;
60
61 ## The Postgres schema file: should not be changed
62 my $PG_SCHEMA = "tables.sql";
63
64 ## What version we default to when we can't parse the old schema
65 my $MW_DEFAULT_VERSION = '1.10';
66
67 ## Try and find a working version of mysqldump
68 $verbose and warn "Locating the mysqldump executable\n";
69 my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump");
70 my $MYSQLDUMP;
71 for my $mytry (@MYSQLDUMP) {
72 next if ! -e $mytry;
73 -x $mytry or die qq{Not an executable file: "$mytry"\n};
74 my $version = qx{$mytry -V};
75 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
76 $MYSQLDUMP = $mytry;
77 }
78 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
79
80 ## Flags we use for mysqldump
81 my @MYSQLDUMPARGS = qw(
82 --skip-lock-tables
83 --complete-insert
84 --skip-extended-insert
85 --skip-add-drop-table
86 --skip-add-locks
87 --skip-disable-keys
88 --skip-set-charset
89 --skip-comments
90 --skip-quote-names
91 );
92
93
94 $verbose and warn "Checking that mysqldump can handle our flags\n";
95 ## Make sure this version can handle all the flags we want.
96 ## Combine with user dump below
97 my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS;
98 ## Argh. Any way to make this work on Win32?
99 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
100 if ($version =~ /unknown option/) {
101 die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
102 }
103
104 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
105 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
106 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
107
108 ## Open the dump file to hold the mysqldump output
109 open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
110 print qq{Writing file "$MYSQLDUMPFILE"\n};
111
112 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB;
113 my $oldselect = select $mdump;
114
115 print while <$mfork2>;
116
117 ## Slurp in the current schema
118 my $current_schema;
119 seek $mdump, 0, 0;
120 {
121 local $/;
122 $current_schema = <$mdump>;
123 }
124 seek $mdump, 0, 0;
125 truncate $mdump, 0;
126
127 warn qq{Trying to determine database version...\n} if $verbose;
128
129 my $current_version = 0;
130 if ($current_schema =~ /CREATE TABLE \S+cur /) {
131 $current_version = '1.3';
132 }
133 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
134 $current_version = '1.4';
135 }
136 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
137 $current_version = '1.5';
138 }
139 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
140 $current_version = '1.6';
141 }
142 elsif ($current_schema !~ /ipb_auto tinyint/) {
143 $current_version = '1.7';
144 }
145 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
146 $current_version = '1.8';
147 }
148 elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
149 $current_version = '1.9';
150 }
151 else {
152 $current_version = '$MW_DEFAULT_VERSION';
153 }
154
155 if (!$current_version) {
156 warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
157 $current_version = $MW_DEFAULT_VERSION;
158 }
159
160 ## Check for a table prefix:
161 my $table_prefix = '';
162 if ($current_version =~ /CREATE TABLE (\S+)archive /) {
163 $table_prefix = $1;
164 }
165
166 warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
167 warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
168
169 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
170 my $now = scalar localtime();
171 my $conninfo = '';
172 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
173 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
174
175 print qq{
176 -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
177 -- Performed by the program: $0
178 -- Version: $VERSION (subversion }.q{$LastChangedRevision$}.qq{)
179 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
180 --
181 -- This file was created: $now
182 -- Executable used: $MYSQLDUMP
183 -- Connection information:
184 -- database: $MYSQLDB
185 -- user: $MYSQLUSER$conninfo
186
187 -- This file can be imported manually with psql like so:
188 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
189 -- This will overwrite any existing MediaWiki information, so be careful
190
191 };
192
193 ## psql specific stuff
194 print qq{
195 \\set ON_ERROR_STOP
196 SET client_min_messages = 'WARNING';
197 };
198
199 warn qq{Reading in the Postgres schema information\n} if $verbose;
200 open my $schema, "<", $PG_SCHEMA
201 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
202 my $t;
203 while (<$schema>) {
204 if (/CREATE TABLE\s+(\S+)/) {
205 $t = $1;
206 $table{$t}={};
207 }
208 elsif (/^ +(\w+)\s+TIMESTAMP/) {
209 $tz{$t}{$1}++;
210 }
211 elsif (/REFERENCES\s*([^( ]+)/) {
212 my $ref = $1;
213 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
214 $table{$t}{$ref}++;
215 }
216 }
217 close $schema;
218
219 ## Read in special cases and table/version information
220 $verbose and warn qq{Reading in schema exception information\n};
221 my %version_tables;
222 while (<DATA>) {
223 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
224 my $list = join '|' => split /\s+/ => $2;
225 $version_tables{$1} = qr{\b$list\b};
226 next;
227 }
228 next unless /^(\w+)\s*(.*)/;
229 $special{$1} = $2||'';
230 $special{$2} = $1 if length $2;
231 }
232
233 ## Determine the order of tables based on foreign key constraints
234 $verbose and warn qq{Figuring out order of tables to dump\n};
235 my %dumped;
236 my $bail = 0;
237 {
238 my $found=0;
239 T: for my $t (sort keys %table) {
240 next if exists $dumped{$t} and $dumped{$t} >= 1;
241 $found=1;
242 for my $dep (sort keys %{$table{$t}}) {
243 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
244 }
245 $dumped{$t} = -1 if ! exists $dumped{$t};
246 ## Skip certain tables that are not imported
247 next if exists $special{$t} and !$special{$t};
248 push @torder, $special{$t} || $t;
249 }
250 last if !$found;
251 push @torder, "---";
252 for (values %dumped) { $_+=2; }
253 die "Too many loops!\n" if $bail++ > 1000;
254 redo;
255 }
256
257 ## Prepare the Postgres database for the move
258 $verbose and warn qq{Writing Postgres transformation information\n};
259
260 print "\n-- Empty out all existing tables\n";
261 $verbose and warn qq{Writing truncates to empty existing tables\n};
262 for my $t (@torder "objectcache", "querycache") {
263 next if $t eq '---';
264 my $tname = $special{$t}||$t;
265 printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
266 }
267 print "\n\n";
268
269 print qq{-- Temporarily rename pagecontent to "text"\n};
270 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
271
272 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
273 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
274
275 print "-- Changing all timestamp fields to handle raw integers\n";
276 for my $t (sort keys %tz) {
277 next if $t eq "archive2";
278 for my $c (sort keys %{$tz{$t}}) {
279 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
280 }
281 }
282 print "\n";
283
284 print qq{
285 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
286 };
287
288 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
289 ## tables temporarily for the import. Perhaps consider making this an auto-schema
290 ## thing in the future.
291 if (length $table_prefix) {
292 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
293 for my $t (@torder) {
294 next if $t eq '---';
295 my $tname = $special{$t}||$t;
296 printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
297 }
298 }
299
300
301 ## Try and dump the ill-named "user" table:
302 ## We do this table alone because "user" is a reserved word.
303 print qq{
304
305 SET escape_string_warning TO 'off';
306 \\o /dev/null
307
308 -- Postgres uses a table name of "mwuser" instead of "user"
309
310 -- Create a dummy user to satisfy fk contraints especially with revisions
311 SELECT setval('user_user_id_seq',0,'false');
312 INSERT INTO mwuser
313 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
314
315 };
316
317 push @MYSQLDUMPARGS, "--no-create-info";
318
319 $verbose and warn qq{Dumping "user" table\n};
320 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
321 my $usertable = "${table_prefix}user";
322 open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
323 ## Unfortunately, there is no easy way to catch errors
324 my $numusers = 0;
325 while (<$mfork>) {
326 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
327 }
328 close $mfork;
329 if ($numusers < 1) {
330 warn qq{No users found, probably a connection error.\n};
331 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
332 close $mdump;
333 exit;
334 }
335 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
336
337 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
338
339 ## Dump the rest of the tables, in chunks based on constraints
340 ## We do not need the user table:
341 my @dumplist = grep { $_ ne 'user'} @torder;
342 my @alist;
343 {
344 undef @alist;
345 PICKATABLE: {
346 my $tname = shift @dumplist;
347 ## XXX Make this dynamic below
348 for my $ver (sort {$b <=> $a } keys %version_tables) {
349 redo PICKATABLE if $tname =~ $version_tables{$ver};
350 }
351 $tname = "${table_prefix}$tname" if length $table_prefix;
352 next if $tname !~ /^\w/;
353 push @alist, $tname;
354 $verbose and warn " $tname...\n";
355 pop @alist and last if index($alist[-1],'---') >= 0;
356 redo if @dumplist;
357 }
358
359 ## Dump everything else
360 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
361 print while <$mfork2>;
362 close $mfork2;
363 warn qq{Finished dumping from MySQL\n} if $verbose;
364
365 redo if @dumplist;
366 }
367
368 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
369 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
370 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
371 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
372
373 ## Return tables to their original names if a table prefix was used.
374 if (length $table_prefix) {
375 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
376 my $maxsize = 18;
377 for (@torder) {
378 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
379 }
380 for my $t (@torder) {
381 next if $t eq '---' or $t eq 'text';
382 my $tname = $special{$t}||$t;
383 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
384 }
385 }
386
387 print qq{\n\n--Returning timestamps to normal\n};
388 for my $t (sort keys %tz) {
389 next if $t eq "archive2";
390 for my $c (sort keys %{$tz{$t}}) {
391 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
392 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
393 }
394 }
395
396 ## Reset sequences
397 print qq{
398 SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
399 SELECT setval('ipblocks_ipb_id_val', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
400 SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
401 SELECT setval('log_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
402 SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
403 SELECT setval('pr_id_val', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
404 SELECT setval('rc_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
405 SELECT setval('rev_rev_id_val', 1+coalesce(max(rev_id) ,0),false) FROM revision;
406 SELECT setval('text_old_id_val', 1+coalesce(max(old_id) ,0),false) FROM pagecontent;
407 SELECT setval('trackbacks_tb_id_seq', 1+coalesce(max(tb_id) ,0),false) FROM trackbacks;
408 SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
409 };
410
411 ## Finally, make a record in the mediawiki_version table about this import
412 print qq{
413 INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
414 'Imported from file created on $now. Old version: $current_version');
415 };
416
417 print "\\o\n\n-- End of dump\n\n";
418 select $oldselect;
419 close $mdump;
420 exit;
421
422
423 __DATA__
424 ## Known remappings: either indicate the MySQL name,
425 ## or leave blank if it should be skipped
426 pagecontent text
427 mwuser user
428 mediawiki_version
429 archive2
430 profiling
431 objectcache
432
433 ## Which tables to ignore depending on the version
434 VERSION 1.5: trackback
435 VERSION 1.6: externallinks job templatelinks transcache
436 VERSION 1.7: filearchive langlinks querycache_info
437 VERSION 1.9: querycachetwo page_restrictions redirect