From: Greg Sabino Mullane Date: Mon, 24 Sep 2007 20:09:02 +0000 (+0000) Subject: Add column type brains, map out all non-standard stuff X-Git-Tag: 1.31.0-rc.0~51304 X-Git-Url: http://git.cyclocoop.org/%22.%28%24lien.?a=commitdiff_plain;h=974188412ff11c63b5f56d7f2e7e5c696f846b4c;p=lhc%2Fweb%2Fwiklou.git Add column type brains, map out all non-standard stuff --- diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl index 297e3af2da..1945589f39 100644 --- a/maintenance/postgres/compare_schemas.pl +++ b/maintenance/postgres/compare_schemas.pl @@ -112,6 +112,8 @@ sub parse_sql { } elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) { $info{$table}{column}{$1} = $2; + my $extra = $3 || ''; + $info{$table}{columnfull}{$1} = "$2$extra"; } elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) { $info{$table}{lc $1.'_name'} = $2 ? $2 : ''; @@ -209,6 +211,168 @@ while (<$newfh>) { } } +## Which column types are okay to map from mysql to postgres? +my $COLMAP = q{ +## INTS: +tinyint SMALLINT +int INTEGER SERIAL +bigint BIGINT +real NUMERIC +float NUMERIC + +## TEXT: +varchar(32) TEXT +varchar(70) TEXT +varchar(255) TEXT +varchar TEXT +text TEXT +tinytext TEXT +ENUM TEXT + +## TIMESTAMPS: +varbinary(14) TIMESTAMPTZ +binary(14) TIMESTAMPTZ +datetime TIMESTAMPTZ +timestamp TIMESTAMPTZ + +## BYTEA: +mediumblob BYTEA + +## OTHER: +bool CHAR # Sigh + +}; +## Allow specific exceptions to the above +my $COLMAPOK = q{ +## User inputted text strings: +ar_comment tinyblob TEXT +fa_description tinyblob TEXT +img_description tinyblob TEXT +ipb_reason tinyblob TEXT +log_action varbinary(10) TEXT +oi_description tinyblob TEXT +rev_comment tinyblob TEXT +rc_log_action varbinary(255) TEXT +rc_log_type varbinary(255) TEXT + +## Simple text-only strings: +ar_flags tinyblob TEXT +fa_minor_mime varbinary(32) TEXT +fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text +fa_storage_key varbinary(64) TEXT # sha1 plus text extension +ipb_address tinyblob TEXT # IP address or username +ipb_range_end tinyblob TEXT # hexadecimal +ipb_range_start tinyblob TEXT # hexadecimal +img_minor_mime varbinary(32) TEXT +img_sha1 varbinary(32) TEXT +job_cmd varbinary(60) TEXT # Should we limit to 60 as well? +keyname varbinary(255) TEXT # No tablename prefix (objectcache) +ll_lang varbinary(20) TEXT # Language code +log_params blob TEXT # LF separated list of args +log_type varbinary(10) TEXT +oi_minor_mime varbinary(32) TEXT +oi_sha1 varbinary(32) TEXT +old_flags tinyblob TEXT +old_text mediumblob TEXT +page_restrictions tinyblob TEXT # CSV string +pf_server varchar(30) TEXT +pr_level varbinary(60) TEXT +pr_type varbinary(60) TEXT +qc_type varbinary(32) TEXT +qcc_type varbinary(32) TEXT +qci_type varbinary(32) TEXT +rc_params blob TEXT +ug_group varbinary(16) TEXT +user_email_token binary(32) TEXT +user_ip varbinary(40) TEXT +user_newpassword tinyblob TEXT +user_options blob TEXT +user_password tinyblob TEXT +user_token binary(32) TEXT + +## Text URLs: +el_index blob TEXT +el_to blob TEXT +iw_url blob TEXT +tb_url blob TEXT +tc_url varbinary(255) TEXT + +## Deprecated or not yet used: +ar_text mediumblob TEXT +job_params blob TEXT +log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety +rc_type tinyint CHAR + +## Number tweaking: +fa_bits int SMALLINT # bits per pixel +fa_height int SMALLINT +fa_width int SMALLINT # Hope we don't see an image this wide... +hc_id int BIGINT # Odd that site_stats is all bigint... +img_bits int SMALLINT # bits per image should stay sane +oi_bits int SMALLINT + +## True binary fields, usually due to gzdeflate and/or serialize: +math_inputhash varbinary(16) BYTEA +math_outputhash varbinary(16) BYTEA + +## Namespaces: not need for such a high range +ar_namespace int SMALLINT +job_namespace int SMALLINT +log_namespace int SMALLINT +page_namespace int SMALLINT +pl_namespace int SMALLINT +qc_namespace int SMALLINT +rc_namespace int SMALLINT +rd_namespace int SMALLINT +tl_namespace int SMALLINT +wl_namespace int SMALLINT + +## "Bools" +ar_minor_edit tinyint CHAR +iw_trans tinyint CHAR +page_is_new tinyint CHAR +page_is_redirect tinyint CHAR +rc_bot tinyint CHAR +rc_deleted tinyint CHAR +rc_minor tinyint CHAR +rc_new tinyint CHAR +rc_patrolled tinyint CHAR +rev_deleted tinyint CHAR +rev_minor_edit tinyint CHAR + +## Easy enough to change if a wiki ever does grow this big: +ss_good_articles bigint INTEGER +ss_total_edits bigint INTEGER +ss_total_pages bigint INTEGER +ss_total_views bigint INTEGER +ss_users bigint INTEGER + +## True IP - keep an eye on these, coders tend to make textual assumptions +rc_ip varbinary(40) CIDR # Want to keep an eye on this + +## Others: +tc_time int TIMESTAMPTZ + + +}; + +my %colmap; +for (split /\n/ => $COLMAP) { + next unless /^\w/; + s/(.*?)#.*/$1/; + my ($col,@maps) = split / +/, $_; + for (@maps) { + $colmap{$col}{$_} = 1; + } +} + +my %colmapok; +for (split /\n/ => $COLMAPOK) { + next unless /^\w/; + my ($col,$old,$new) = split / +/, $_; + $colmapok{$col}{$old}{$new} = 1; +} + ## Old but not new for my $t (sort keys %{$old{$oldfile}}) { if (!exists $new{$t} and !exists $ok{OLD}{$t}) { @@ -218,6 +382,7 @@ for my $t (sort keys %{$old{$oldfile}}) { next if exists $ok{OLD}{$t} and !$ok{OLD}{$t}; my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t; my $oldcol = $old{$oldfile}{$t}{column}; + my $oldcolfull = $old{$oldfile}{$t}{columnfull}; my $newcol = $new{$newt}{column}; for my $c (keys %$oldcol) { if (!exists $newcol->{$c}) { @@ -225,11 +390,22 @@ for my $t (sort keys %{$old{$oldfile}}) { next; } } - for my $c (keys %$newcol) { + for my $c (sort keys %$newcol) { if (!exists $oldcol->{$c}) { print "Column $t.$c not in $oldfile\n"; next; } + ## Column types (roughly) match up? + my $new = $newcol->{$c}; + my $old = $oldcolfull->{$c}; + + ## Known exceptions: + next if exists $colmapok{$c}{$old}{$new}; + + $old =~ s/ENUM.*/ENUM/; + if (! exists $colmap{$old}{$new}) { + print "Column types for $t.$c do not match: $old does not map to $new\n"; + } } } ## New but not old: