Bump default version, put in check for 1.8 schema.
[lhc/web/wiklou.git] / maintenance / postgres / wp_mysql2postgres.pl
1 #!/usr/bin/perl
2
3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
4 ## svn: $Id$
5
6 use strict;
7 use warnings;
8 use Data::Dumper;
9 use Getopt::Long;
10
11 use vars qw(%table %tz %special @torder $COM);
12 my $VERSION = "1.0";
13
14 ## The following options can be changed via command line arguments:
15 my $MYSQLDB = 'wikidb';
16 my $MYSQLUSER = 'wikiuser';
17
18 ## If the following are zero-length, we omit their arguments entirely:
19 my $MYSQLHOST = '';
20 my $MYSQLPASSWORD = '';
21 my $MYSQLSOCKET = '';
22
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = "mediawiki_upgrade.pg";
25
26 ## How verbose should this script be (0, 1, or 2)
27 my $verbose = 0;
28
29 my $USAGE = "
30 Usage: $0 [OPTION]...
31 Convert a MediaWiki schema from MySQL to Postgres
32 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
33 Options:
34 db Name of the MySQL database
35 user MySQL database username
36 pass MySQL database password
37 host MySQL database host
38 socket MySQL database socket
39 verbose Verbosity, increases with multiple uses
40 ";
41
42 GetOptions
43 (
44 "db=s" => \$MYSQLDB,
45 "user=s" => \$MYSQLUSER,
46 "pass=s" => \$MYSQLPASSWORD,
47 "host=s" => \$MYSQLHOST,
48 "socket=s" => \$MYSQLSOCKET,
49 "verbose+" => \$verbose
50 );
51
52 ## The Postgres schema file: should not be changed
53 my $PG_SCHEMA = "tables.sql";
54
55 ## What version we default to when we can't parse the old schema
56 my $MW_DEFAULT_VERSION = '1.9';
57
58 ## Try and find a working version of mysqldump
59 $verbose and warn "Locating the mysqldump executable\n";
60 my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump");
61 my $MYSQLDUMP;
62 for my $mytry (@MYSQLDUMP) {
63 next if ! -e $mytry;
64 -x $mytry or die qq{Not an executable file: "$mytry"\n};
65 my $version = qx{$mytry -V};
66 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
67 $MYSQLDUMP = $mytry;
68 }
69 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
70
71 ## Flags we use for mysqldump
72 my @MYSQLDUMPARGS = qw(
73 --skip-lock-tables
74 --complete-insert
75 --skip-extended-insert
76 --skip-add-drop-table
77 --skip-add-locks
78 --skip-disable-keys
79 --skip-set-charset
80 --skip-comments
81 --skip-quote-names
82 );
83
84
85 $verbose and warn "Checking that mysqldump can handle our flags\n";
86 ## Make sure this version can handle all the flags we want.
87 ## Combine with user dump below
88 my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS;
89 ## Argh. Any way to make this work on Win32?
90 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
91 if ($version =~ /unknown option/) {
92 die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
93 }
94
95 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
96 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
97 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
98
99 ## Open the dump file to hold the mysqldump output
100 open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
101 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
102
103 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB;
104 my $oldselect = select $mdump;
105
106 print while <$mfork2>;
107
108 ## Slurp in the current schema
109 my $current_schema;
110 seek $mdump, 0, 0;
111 {
112 local $/;
113 $current_schema = <$mdump>;
114 }
115 seek $mdump, 0, 0;
116 truncate $mdump, 0;
117
118 warn qq{Trying to determine database version...\n} if $verbose;
119
120 my $current_version = 0;
121 if ($current_schema =~ /CREATE TABLE \S+cur /) {
122 $current_version = '1.3';
123 }
124 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
125 $current_version = '1.4';
126 }
127 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
128 $current_version = '1.5';
129 }
130 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
131 $current_version = '1.6';
132 }
133 elsif ($current_schema !~ /ipb_auto tinyint/) {
134 $current_version = '1.7';
135 }
136 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
137 $current_version = '1.8';
138 }
139 else {
140 $current_version = '1.9';
141 }
142
143 if (!$current_version) {
144 warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
145 $current_version = $MW_DEFAULT_VERSION;
146 }
147
148 ## Check for a table prefix:
149 my $table_prefix = '';
150 if ($current_version =~ /CREATE TABLE (\S+)archive /) {
151 $table_prefix = $1;
152 }
153
154 warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
155 warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
156
157 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
158 my $now = scalar localtime();
159 my $conninfo = '';
160 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
161 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
162
163 print qq{
164 -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
165 -- Performed by the program: $0
166 -- Version: $VERSION (subversion }.q{$LastChangedRevision$}.qq{)
167 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
168 --
169 -- This file was created: $now
170 -- Executable used: $MYSQLDUMP
171 -- Connection information:
172 -- database: $MYSQLDB
173 -- user: $MYSQLUSER$conninfo
174
175 -- This file can be imported manually with psql like so:
176 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
177 -- This will overwrite any existing MediaWiki information, so be careful
178
179
180 };
181
182 warn qq{Reading in the Postgres schema information\n} if $verbose;
183 open my $schema, "<", $PG_SCHEMA
184 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
185 my $t;
186 while (<$schema>) {
187 if (/CREATE TABLE\s+(\S+)/) {
188 $t = $1;
189 $table{$t}={};
190 }
191 elsif (/^ +(\w+)\s+TIMESTAMP/) {
192 $tz{$t}{$1}++;
193 }
194 elsif (/REFERENCES\s*([^( ]+)/) {
195 my $ref = $1;
196 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
197 $table{$t}{$ref}++;
198 }
199 }
200 close $schema;
201
202 ## Read in special cases and table/version information
203 $verbose and warn qq{Reading in schema exception information\n};
204 my %version_tables;
205 while (<DATA>) {
206 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
207 my $list = join '|' => split /\s+/ => $2;
208 $version_tables{$1} = qr{\b$list\b};
209 next;
210 }
211 next unless /^(\w+)\s*(.*)/;
212 $special{$1} = $2||'';
213 $special{$2} = $1 if length $2;
214 }
215
216 ## Determine the order of tables based on foreign key constraints
217 $verbose and warn qq{Figuring out order of tables to dump\n};
218 my %dumped;
219 my $bail = 0;
220 {
221 my $found=0;
222 T: for my $t (sort keys %table) {
223 next if exists $dumped{$t} and $dumped{$t} >= 1;
224 $found=1;
225 for my $dep (sort keys %{$table{$t}}) {
226 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
227 }
228 $dumped{$t} = -1 if ! exists $dumped{$t};
229 ## Skip certain tables that are not imported
230 next if exists $special{$t} and !$special{$t};
231 push @torder, $special{$t} || $t;
232 }
233 last if !$found;
234 push @torder, "---";
235 for (values %dumped) { $_+=2; }
236 die "Too many loops!\n" if $bail++ > 1000;
237 redo;
238 }
239
240 ## Prepare the Postgres database for the move
241 $verbose and warn qq{Writing Postgres transformation information\n};
242
243 print "\n-- Empty out all existing tables\n";
244 $verbose and warn qq{Writing truncates to empty existing tables\n};
245 for my $t (@torder) {
246 next if $t eq '---';
247 my $tname = $special{$t}||$t;
248 printf qq{TRUNCATE TABLE %-18s CASCADE;\n}, qq{"$tname"};
249 }
250 print "\n\n";
251
252 print qq{-- Rename the "text" table\n};
253 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
254
255 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
256 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
257
258 print "-- Changing all timestamp fields to handle raw integers\n";
259 for my $t (sort keys %tz) {
260 next if $t eq "archive2";
261 for my $c (sort keys %{$tz{$t}}) {
262 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
263 }
264 }
265 print "\n";
266
267 print qq{
268 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
269 };
270
271 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
272 ## tables temporarily for the import. Perhaps consider making this an auto-schema
273 ## thing in the future.
274 if (length $table_prefix) {
275 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
276 for my $t (@torder) {
277 next if $t eq '---';
278 my $tname = $special{$t}||$t;
279 printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
280 }
281 }
282
283
284 ## Try and dump the ill-named "user" table:
285 ## We do this table alone because "user" is a reserved word.
286 print qq{
287
288 SET escape_string_warning TO 'off';
289 \\o /dev/null
290
291 -- Postgres uses a table name of "mwuser" instead of "user"
292
293 -- Create a dummy user to satisfy fk contraints especially with revisions
294 SELECT setval('user_user_id_seq',0,'false');
295 INSERT INTO mwuser
296 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
297
298 };
299
300 push @MYSQLDUMPARGS, "--no-create-info";
301
302 $verbose and warn qq{Dumping "user" table\n};
303 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
304 my $usertable = "${table_prefix}user";
305 open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
306 ## Unfortunately, there is no easy way to catch errors
307 my $numusers = 0;
308 while (<$mfork>) {
309 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
310 }
311 close $mfork;
312 if ($numusers < 1) {
313 warn qq{No users found, probably a connection error.\n};
314 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
315 close $mdump;
316 exit;
317 }
318 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
319
320 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
321
322 ## Dump the rest of the tables, in chunks based on constraints
323 ## We do not need the user table:
324 my @dumplist = grep { $_ ne 'user'} @torder;
325 my @alist;
326 {
327 undef @alist;
328 PICKATABLE: {
329 my $tname = shift @dumplist;
330 ## XXX Make this dynamic below
331 for my $ver (sort {$b <=> $a } keys %version_tables) {
332 redo PICKATABLE if $tname =~ $version_tables{$ver};
333 }
334 $tname = "${table_prefix}$tname" if length $table_prefix;
335 push @alist, $tname;
336 pop @alist and last if index($alist[-1],'---') >= 0;
337 redo if @dumplist;
338 }
339
340 ## Dump everything else
341 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
342 print while <$mfork2>;
343 close $mfork2;
344 warn qq{Finished dumping from MySQL\n} if $verbose;
345
346 redo if @dumplist;
347 }
348
349 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
350 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
351 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
352 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
353
354 ## Return tables to their original names if a table prefix was used.
355 if (length $table_prefix) {
356 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
357 my $maxsize = 18;
358 for (@torder) {
359 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
360 }
361 for my $t (@torder) {
362 next if $t eq '---' or $t eq 'text';
363 my $tname = $special{$t}||$t;
364 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
365 }
366 }
367
368 print qq{\n\n--Returning timestamps to normal\n};
369 for my $t (sort keys %tz) {
370 next if $t eq "archive2";
371 for my $c (sort keys %{$tz{$t}}) {
372 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
373 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
374 }
375 }
376
377 ## Finally, make a record in the mediawiki_version table about this import
378 print qq{
379 INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
380 'Imported from file created on $now. Old version: $current_version');
381 };
382
383
384 print "\\o\n\n-- End of dump\n\n";
385 select $oldselect;
386 close $mdump;
387 exit;
388
389
390 __DATA__
391 ## Known remappings: either indicate the MySQL name,
392 ## or leave blank if it should be skipped
393 pagecontent text
394 mwuser user
395 mediawiki_version
396 archive2
397 profiling
398 objectcache
399
400 ## Which tables to ignore depending on the version
401 VERSION 1.5: trackback
402 VERSION 1.6: externallinks job templatelinks transcache
403 VERSION 1.7: filearchive langlinks querycache_info