}
elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) {
$info{$table}{column}{$1} = $2;
+ my $extra = $3 || '';
+ $info{$table}{columnfull}{$1} = "$2$extra";
}
elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) {
$info{$table}{lc $1.'_name'} = $2 ? $2 : '';
}
}
+## Which column types are okay to map from mysql to postgres?
+my $COLMAP = q{
+## INTS:
+tinyint SMALLINT
+int INTEGER SERIAL
+bigint BIGINT
+real NUMERIC
+float NUMERIC
+
+## TEXT:
+varchar(32) TEXT
+varchar(70) TEXT
+varchar(255) TEXT
+varchar TEXT
+text TEXT
+tinytext TEXT
+ENUM TEXT
+
+## TIMESTAMPS:
+varbinary(14) TIMESTAMPTZ
+binary(14) TIMESTAMPTZ
+datetime TIMESTAMPTZ
+timestamp TIMESTAMPTZ
+
+## BYTEA:
+mediumblob BYTEA
+
+## OTHER:
+bool CHAR # Sigh
+
+};
+## Allow specific exceptions to the above
+my $COLMAPOK = q{
+## User inputted text strings:
+ar_comment tinyblob TEXT
+fa_description tinyblob TEXT
+img_description tinyblob TEXT
+ipb_reason tinyblob TEXT
+log_action varbinary(10) TEXT
+oi_description tinyblob TEXT
+rev_comment tinyblob TEXT
+rc_log_action varbinary(255) TEXT
+rc_log_type varbinary(255) TEXT
+
+## Simple text-only strings:
+ar_flags tinyblob TEXT
+fa_minor_mime varbinary(32) TEXT
+fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text
+fa_storage_key varbinary(64) TEXT # sha1 plus text extension
+ipb_address tinyblob TEXT # IP address or username
+ipb_range_end tinyblob TEXT # hexadecimal
+ipb_range_start tinyblob TEXT # hexadecimal
+img_minor_mime varbinary(32) TEXT
+img_sha1 varbinary(32) TEXT
+job_cmd varbinary(60) TEXT # Should we limit to 60 as well?
+keyname varbinary(255) TEXT # No tablename prefix (objectcache)
+ll_lang varbinary(20) TEXT # Language code
+log_params blob TEXT # LF separated list of args
+log_type varbinary(10) TEXT
+oi_minor_mime varbinary(32) TEXT
+oi_sha1 varbinary(32) TEXT
+old_flags tinyblob TEXT
+old_text mediumblob TEXT
+page_restrictions tinyblob TEXT # CSV string
+pf_server varchar(30) TEXT
+pr_level varbinary(60) TEXT
+pr_type varbinary(60) TEXT
+qc_type varbinary(32) TEXT
+qcc_type varbinary(32) TEXT
+qci_type varbinary(32) TEXT
+rc_params blob TEXT
+ug_group varbinary(16) TEXT
+user_email_token binary(32) TEXT
+user_ip varbinary(40) TEXT
+user_newpassword tinyblob TEXT
+user_options blob TEXT
+user_password tinyblob TEXT
+user_token binary(32) TEXT
+
+## Text URLs:
+el_index blob TEXT
+el_to blob TEXT
+iw_url blob TEXT
+tb_url blob TEXT
+tc_url varbinary(255) TEXT
+
+## Deprecated or not yet used:
+ar_text mediumblob TEXT
+job_params blob TEXT
+log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety
+rc_type tinyint CHAR
+
+## Number tweaking:
+fa_bits int SMALLINT # bits per pixel
+fa_height int SMALLINT
+fa_width int SMALLINT # Hope we don't see an image this wide...
+hc_id int BIGINT # Odd that site_stats is all bigint...
+img_bits int SMALLINT # bits per image should stay sane
+oi_bits int SMALLINT
+
+## True binary fields, usually due to gzdeflate and/or serialize:
+math_inputhash varbinary(16) BYTEA
+math_outputhash varbinary(16) BYTEA
+
+## Namespaces: not need for such a high range
+ar_namespace int SMALLINT
+job_namespace int SMALLINT
+log_namespace int SMALLINT
+page_namespace int SMALLINT
+pl_namespace int SMALLINT
+qc_namespace int SMALLINT
+rc_namespace int SMALLINT
+rd_namespace int SMALLINT
+tl_namespace int SMALLINT
+wl_namespace int SMALLINT
+
+## "Bools"
+ar_minor_edit tinyint CHAR
+iw_trans tinyint CHAR
+page_is_new tinyint CHAR
+page_is_redirect tinyint CHAR
+rc_bot tinyint CHAR
+rc_deleted tinyint CHAR
+rc_minor tinyint CHAR
+rc_new tinyint CHAR
+rc_patrolled tinyint CHAR
+rev_deleted tinyint CHAR
+rev_minor_edit tinyint CHAR
+
+## Easy enough to change if a wiki ever does grow this big:
+ss_good_articles bigint INTEGER
+ss_total_edits bigint INTEGER
+ss_total_pages bigint INTEGER
+ss_total_views bigint INTEGER
+ss_users bigint INTEGER
+
+## True IP - keep an eye on these, coders tend to make textual assumptions
+rc_ip varbinary(40) CIDR # Want to keep an eye on this
+
+## Others:
+tc_time int TIMESTAMPTZ
+
+
+};
+
+my %colmap;
+for (split /\n/ => $COLMAP) {
+ next unless /^\w/;
+ s/(.*?)#.*/$1/;
+ my ($col,@maps) = split / +/, $_;
+ for (@maps) {
+ $colmap{$col}{$_} = 1;
+ }
+}
+
+my %colmapok;
+for (split /\n/ => $COLMAPOK) {
+ next unless /^\w/;
+ my ($col,$old,$new) = split / +/, $_;
+ $colmapok{$col}{$old}{$new} = 1;
+}
+
## Old but not new
for my $t (sort keys %{$old{$oldfile}}) {
if (!exists $new{$t} and !exists $ok{OLD}{$t}) {
next if exists $ok{OLD}{$t} and !$ok{OLD}{$t};
my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t;
my $oldcol = $old{$oldfile}{$t}{column};
+ my $oldcolfull = $old{$oldfile}{$t}{columnfull};
my $newcol = $new{$newt}{column};
for my $c (keys %$oldcol) {
if (!exists $newcol->{$c}) {
next;
}
}
- for my $c (keys %$newcol) {
+ for my $c (sort keys %$newcol) {
if (!exists $oldcol->{$c}) {
print "Column $t.$c not in $oldfile\n";
next;
}
+ ## Column types (roughly) match up?
+ my $new = $newcol->{$c};
+ my $old = $oldcolfull->{$c};
+
+ ## Known exceptions:
+ next if exists $colmapok{$c}{$old}{$new};
+
+ $old =~ s/ENUM.*/ENUM/;
+ if (! exists $colmap{$old}{$new}) {
+ print "Column types for $t.$c do not match: $old does not map to $new\n";
+ }
}
}
## New but not old: