3 die("This file is not complete; it's checked in so I don't forget it.");
6 UTF-8 conversion of DOOOOOOOM
9 2. Make a convertlist of all pages
10 3. Enable CONVERTLOCK mode and switch to UTF-8
11 4. As quick as possible, convert the cur, images, *links, user, etc tables. Clear cache tables.
12 5. Unlock the wiki. Attempts to access pages on the convertlist will be trapped to read-only.
13 6. Go through the list, fixing up old revisions. Remove pages from the convertlist.
17 if(function_exists("iconv")) {
18 # There are likely to be Windows code page 1252 chars in there.
19 # Convert them to the proper UTF-8 chars if possible.
20 function toUtf8($string) {
21 return wfStrencode(iconv("CP1252", "UTF-8", $string));
24 # Will work from plain iso 8859-1 and may corrupt these chars
25 function toUtf8($string) {
26 return wfStrencode(utf8_encode($string));
33 $sql = "SELECT user_id,user_name,user_real_name,user_options FROM user";
34 $res = wfQuery( $sql, DB_WRITE
);
35 print "Converting " . wfNumResults( $res ) . " user accounts:\n";
37 while( $s = wfFetchObject( $res ) ) {
38 $uname = toUtf8( $s->user_name
);
39 $ureal = toUtf8( $s->user_real_name
);
40 $uoptions = toUtf8( $s->user_options
);
41 if( $uname != wfStrencode( $s->user_name
) ||
42 $ureal != wfStrencode( $s->user_real_name
) ||
43 $uoptions != wfStrencode( $s->user_options
) ) {
44 $now = wfTimestampNow();
46 SET user_name='$uname',user_real_name='$ureal',
47 user_options='$uoptions',user_touched='$now'
48 WHERE user_id={$s->user_id}";
49 wfQuery( $sql, DB_WRITE
);
50 $wgMemc->delete( "$wgDBname:user:id:{$s->user_id}" );
53 if( ++
$n %
100 == 0 ) print "$n\n";
57 printf("%2.02%% required conversion.\n\n", $u / $n);
63 $sql = "SELECT DISTINCT ipb_reason FROM ipblocks";
64 $res = wfQuery( $sql, DB_WRITE
);
65 print "Converting " . wfNumResults( $res ) . " IP block comments:\n";
67 while( $s = wfFetchObject( $res ) ) {
68 $ucomment = toUtf8($s->ipb_reason
);
69 $ocomment = wfStrencode( $s->ipb_reason
);
71 $sql = "UPDATE ipblocks SET ipb_reason='$ucomment' WHERE ipb_reason='$ocomment'";
72 wfQuery( $sql, DB_WRITE
);
75 if( ++
$n %
100 == 0 ) print "$n\n";
79 printf("%2.02%% required conversion.\n\n", $u / $n);
85 $sql = "SELECT img_name,img_description,img_user_text FROM image";
86 img_name
--> also need to rename files
92 oi_archive_name
--> also need to rename files
101 print "Clearing searchindex... don't forget to rebuild it.\n";
102 $sql = "DELETE FROM searchindex";
103 wfQuery( $sql, DB_WRITE
);
106 print "Clearing linkscc...\n";
107 $sql = "DELETE FROM linkscc";
108 wfQuery( $sql, DB_WRITE
);
110 # querycache: just rebuild these
111 print "Clearing querycache...\n";
112 $sql = "DELETE FROM querycache";
113 wfQuery( $sql, DB_WRITE
);
116 print "Clearing objectcache...\n";
117 $sql = "DELETE FROM objectcache";
118 wfQuery( $sql, DB_WRITE
);
121 function unicodeLinks( $table, $field ) {
122 $sql = "SELECT DISTINCT $field FROM $table WHERE $field RLIKE '[\x80-\xff]'";
123 $res = wfQuery( $sql, DB_WRITE
);
124 print "Converting " . wfNumResults( $res ) . " from $table:\n";
126 while( $s = wfFetchObject( $res ) ) {
127 $ulink = toUtf8( $s->$field );
128 $olink = wfStrencode( $s->$field );
129 $sql = "UPDATE $table SET $field='$ulink' WHERE $field='$olink'";
130 wfQuery( $sql, DB_WRITE
);
131 if( ++
$n %
100 == 0 ) print "$n\n";
133 wfFreeResult( $res );
136 unicodeLinks( "brokenlinks", "bl_to" );
137 unicodeLinks( "imagelinks", "il_to" );
138 unicodeLinks( "categorylinks", "cl_to" );
142 $sql = "SELECT cur_id,cur_namespace,cur_title,cur_text,cur_user_text FROM cur
143 WHERE cur_title rlike '[\x80-\xff]' OR cur_comment rlike '[\x80-\xff]'
144 OR cur_user_text rlike '[\x80-\xff]' OR cur_text rlike '[\x80-\xff]'";
145 $res = wfQuery( $sql, DB_WRITE
);
146 print "Converting " . wfNumResults( $res ) . " cur pages:\n";
148 while( $s = wfFetchObject( $res ) ) {
149 $utitle = toUtf8( $s->cur_title
);
150 $uuser = toUtf8( $s->cur_user_text
);
151 $ucomment = toUtf8( $s->cur_comment
);
152 $utext = toUtf8( $s->cur_text
);
153 $now = wfTimestampNow();
156 SET cur_title='$utitle',cur_user_text='$uuser',
157 cur_comment='$ucomment',cur_text='$utext'
158 WHERE cur_id={$s->cur_id}";
159 wfQuery( $sql, DB_WRITE
);
160 #$wgMemc->delete( "$wgDBname:user:id:{$s->user_id}" );
162 $otitle = wfStrencode( $s->cur_title
);
163 if( $otitle != $utitle ) {
164 # Also update titles in watchlist and old
165 $sql = "UPDATE old SET old_title='$utitle'
166 WHERE old_namespace={$s->cur_namespace} AND old_title='$otitle'";
167 wfQuery( $sql, DB_WRITE
);
169 $ns = IntVal( $s->cur_namespace
) & ~
1;
170 $sql = "UPDATE watchlist SET wl_title='$utitle'
171 WHERE wl_namespace=$ns AND wl_title='$otitle'";
172 wfQuery( $sql, DB_WRITE
);
176 if( ++
$n %
100 == 0 ) print "$n\n";
178 wfFreeResult( $res );
180 printf("Updated old/watchlist titles on %2.02%%.\n\n", $u / $n);
182 print "Didn't update any old/watchlist titles.\n\n";
188 old_text -> may be gzipped
194 ar_text -> may be gzipped