3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
11 use vars
qw(%table %tz %special @torder $COM);
14 ## The following options can be changed via command line arguments:
15 my $MYSQLDB = 'wikidb';
16 my $MYSQLUSER = 'wikiuser';
18 ## If the following are zero-length, we omit their arguments entirely:
20 my $MYSQLPASSWORD = '';
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = "mediawiki_upgrade.pg";
26 ## How verbose should this script be (0, 1, or 2)
31 Convert a MediaWiki schema from MySQL to Postgres
32 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
34 db Name of the MySQL database
35 user MySQL database username
36 pass MySQL database password
37 host MySQL database host
38 socket MySQL database socket
39 verbose Verbosity, increases with multiple uses
45 "user=s" => \$MYSQLUSER,
46 "pass=s" => \$MYSQLPASSWORD,
47 "host=s" => \$MYSQLHOST,
48 "socket=s" => \$MYSQLSOCKET,
49 "verbose+" => \$verbose
52 ## The Postgres schema file: should not be changed
53 my $PG_SCHEMA = "tables.sql";
55 ## What version we default to when we can't parse the old schema
56 my $MW_DEFAULT_VERSION = '1.9';
58 ## Try and find a working version of mysqldump
59 $verbose and warn "Locating the mysqldump executable\n";
60 my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump");
62 for my $mytry (@MYSQLDUMP) {
64 -x $mytry or die qq{Not an executable file: "$mytry"\n};
65 my $version = qx{$mytry -V};
66 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
69 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
71 ## Flags we use for mysqldump
72 my @MYSQLDUMPARGS = qw(
75 --skip-extended-insert
85 $verbose and warn "Checking that mysqldump can handle our flags\n";
86 ## Make sure this version can handle all the flags we want.
87 ## Combine with user dump below
88 my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS;
89 ## Argh. Any way to make this work on Win32?
90 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
91 if ($version =~ /unknown option/) {
92 die qq{Sorry
, you need to
use a newer version of the mysqldump program than the one at
"$MYSQLDUMP"\n};
95 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
96 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
97 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
99 ## Open the dump file to hold the mysqldump output
100 open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could
not open "$MYSQLDUMPFILE": $!\n};
101 $verbose and warn qq{Writing file
"$MYSQLDUMPFILE"\n};
103 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB;
104 my $oldselect = select $mdump;
106 print while <$mfork2>;
108 ## Slurp in the current schema
113 $current_schema = <$mdump>;
118 warn qq{Trying to determine database version
...\n} if $verbose;
120 my $current_version = 0;
121 if ($current_schema =~ /CREATE TABLE \S+cur /) {
122 $current_version = '1.3';
124 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
125 $current_version = '1.4';
127 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
128 $current_version = '1.5';
130 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
131 $current_version = '1.6';
133 elsif ($current_schema !~ /ipb_auto tinyint/) {
134 $current_version = '1.7';
136 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
137 $current_version = '1.8';
140 $current_version = '1.9';
143 if (!$current_version) {
144 warn qq{WARNING
! Could
not figure out the old version
, assuming MediaWiki
$MW_DEFAULT_VERSION\n};
145 $current_version = $MW_DEFAULT_VERSION;
148 ## Check for a table prefix:
149 my $table_prefix = '';
150 if ($current_version =~ /CREATE TABLE (\S+)archive /) {
154 warn qq{Old schema is from MediaWiki version
$current_version\n} if $verbose;
155 warn qq{Table prefix is
"$table_prefix"\n} if $verbose and length $table_prefix;
157 $verbose and warn qq{Writing file
"$MYSQLDUMPFILE"\n};
158 my $now = scalar localtime();
160 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
161 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
164 -- Dump of MySQL Mediawiki tables
for import into a Postgres Mediawiki schema
165 -- Performed by the program
: $0
166 -- Version
: $VERSION (subversion
}.q{$LastChangedRevision$}.qq{)
167 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
169 -- This file was created: $now
170 -- Executable used: $MYSQLDUMP
171 -- Connection information:
172 -- database: $MYSQLDB
173 -- user: $MYSQLUSER$conninfo
175 -- This file can be imported manually with psql like so:
176 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
177 -- This will overwrite any existing MediaWiki information, so be careful
182 warn qq{Reading in the Postgres schema information\n} if $verbose;
183 open my $schema, "<", $PG_SCHEMA
184 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
187 if (/CREATE TABLE\s+(\S+)/) {
191 elsif (/^ +(\w+)\s+TIMESTAMP/) {
194 elsif (/REFERENCES\s*([^( ]+)/) {
196 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
202 ## Read in special cases and table/version information
203 $verbose and warn qq{Reading in schema exception information\n};
206 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
207 my $list = join '|' => split /\s+/ => $2;
208 $version_tables{$1} = qr{\b$list\b};
211 next unless /^(\w+)\s*(.*)/;
212 $special{$1} = $2||'';
213 $special{$2} = $1 if length $2;
216 ## Determine the order of tables based on foreign key constraints
217 $verbose and warn qq{Figuring out order of tables to dump\n};
222 T: for my $t (sort keys %table) {
223 next if exists $dumped{$t} and $dumped{$t} >= 1;
225 for my $dep (sort keys %{$table{$t}}) {
226 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
228 $dumped{$t} = -1 if ! exists $dumped{$t};
229 ## Skip certain tables that are not imported
230 next if exists $special{$t} and !$special{$t};
231 push @torder, $special{$t} || $t;
235 for (values %dumped) { $_+=2; }
236 die "Too many loops!\n" if $bail++ > 1000;
240 ## Prepare the Postgres database for the move
241 $verbose and warn qq{Writing Postgres transformation information\n};
243 print "\n-- Empty out all existing tables\n";
244 $verbose and warn qq{Writing truncates to empty existing tables\n};
245 for my $t (@torder) {
247 my $tname = $special{$t}||$t;
248 printf qq{TRUNCATE TABLE %-18s CASCADE;\n}, qq{"$tname"};
252 print qq{-- Rename the "text" table\n};
253 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
255 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
256 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
258 print "-- Changing all timestamp fields to handle raw integers\n";
259 for my $t (sort keys %tz) {
260 next if $t eq "archive2";
261 for my $c (sort keys %{$tz{$t}}) {
262 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
268 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
271 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
272 ## tables temporarily for the import. Perhaps consider making this an auto-schema
273 ## thing in the future.
274 if (length $table_prefix) {
275 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
276 for my $t (@torder) {
278 my $tname = $special{$t}||$t;
279 printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
284 ## Try and dump the ill-named "user" table:
285 ## We do this table alone because "user" is a reserved word.
288 SET escape_string_warning TO 'off';
291 -- Postgres uses a table name of "mwuser" instead of "user"
293 -- Create a dummy user to satisfy fk contraints especially with revisions
294 SELECT setval('user_user_id_seq',0,'false');
296 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
300 push @MYSQLDUMPARGS, "--no-create-info";
302 $verbose and warn qq{Dumping "user" table\n};
303 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
304 my $usertable = "${table_prefix}user";
305 open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
306 ## Unfortunately, there is no easy way to catch errors
309 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
313 warn qq{No users found, probably a connection error.\n};
314 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
318 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
320 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
322 ## Dump the rest of the tables, in chunks based on constraints
323 ## We do not need the user table:
324 my @dumplist = grep { $_ ne 'user'} @torder;
329 my $tname = shift @dumplist;
330 ## XXX Make this dynamic below
331 for my $ver (sort {$b <=> $a } keys %version_tables) {
332 redo PICKATABLE if $tname =~ $version_tables{$ver};
334 $tname = "${table_prefix}$tname" if length $table_prefix;
336 pop @alist and last if index($alist[-1],'---') >= 0;
340 ## Dump everything else
341 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
342 print while <$mfork2>;
344 warn qq{Finished dumping from MySQL\n} if $verbose;
349 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
350 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
351 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
352 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
354 ## Return tables to their original names if a table prefix was used.
355 if (length $table_prefix) {
356 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
359 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
361 for my $t (@torder) {
362 next if $t eq '---' or $t eq 'text';
363 my $tname = $special{$t}||$t;
364 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
368 print qq{\n\n--Returning timestamps to normal\n};
369 for my $t (sort keys %tz) {
370 next if $t eq "archive2";
371 for my $c (sort keys %{$tz{$t}}) {
372 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
373 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
377 ## Finally, make a record in the mediawiki_version table about this import
379 INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
380 'Imported from file created on $now. Old version: $current_version');
384 print "\\o\n\n-- End of dump\n\n";
391 ## Known remappings: either indicate the MySQL name,
392 ## or leave blank if it should be skipped
400 ## Which tables to ignore depending on the version
401 VERSION 1.5: trackback
402 VERSION 1.6: externallinks job templatelinks transcache
403 VERSION 1.7: filearchive langlinks querycache_info