Recentchanges optimization: avoid a lot of senseless parsing of link text coming...
[lhc/web/wiklou.git] / maintenance / convertUtf8.php
1 <?php
2 /**
3 * @package MediaWiki
4 * @subpackage Maintenance
5 */
6
7 die("This file is not complete; it's checked in so I don't forget it.");
8
9 /*
10 UTF-8 conversion of DOOOOOOOM
11
12 1. Lock the wiki
13 2. Make a convertlist of all pages
14 3. Enable CONVERTLOCK mode and switch to UTF-8
15 4. As quick as possible, convert the cur, images, *links, user, etc tables. Clear cache tables.
16 5. Unlock the wiki. Attempts to access pages on the convertlist will be trapped to read-only.
17 6. Go through the list, fixing up old revisions. Remove pages from the convertlist.
18 */
19
20
21 if(function_exists("iconv")) {
22 # There are likely to be Windows code page 1252 chars in there.
23 # Convert them to the proper UTF-8 chars if possible.
24 function toUtf8($string) {
25 return wfStrencode(iconv("CP1252", "UTF-8", $string));
26 }
27 } else {
28 # Will work from plain iso 8859-1 and may corrupt these chars
29 function toUtf8($string) {
30 return wfStrencode(utf8_encode($string));
31 }
32 }
33
34
35
36 # user table
37 $sql = "SELECT user_id,user_name,user_real_name,user_options FROM user";
38 $res = wfQuery( $sql, DB_WRITE );
39 print "Converting " . wfNumResults( $res ) . " user accounts:\n";
40 $n = 0;
41 while( $s = wfFetchObject( $res ) ) {
42 $uname = toUtf8( $s->user_name );
43 $ureal = toUtf8( $s->user_real_name );
44 $uoptions = toUtf8( $s->user_options );
45 if( $uname != wfStrencode( $s->user_name ) ||
46 $ureal != wfStrencode( $s->user_real_name ) ||
47 $uoptions != wfStrencode( $s->user_options ) ) {
48 $now = wfTimestampNow();
49 $sql = "UPDATE user
50 SET user_name='$uname',user_real_name='$ureal',
51 user_options='$uoptions',user_touched='$now'
52 WHERE user_id={$s->user_id}";
53 wfQuery( $sql, DB_WRITE );
54 $wgMemc->delete( "$wgDBname:user:id:{$s->user_id}" );
55 $u++;
56 }
57 if( ++$n % 100 == 0 ) print "$n\n";
58 }
59 wfFreeResult( $res );
60 if( $n ) {
61 printf("%2.02%% required conversion.\n\n", $u / $n);
62 } else {
63 print "None?\n\n";
64 }
65
66 # ipblocks
67 $sql = "SELECT DISTINCT ipb_reason FROM ipblocks";
68 $res = wfQuery( $sql, DB_WRITE );
69 print "Converting " . wfNumResults( $res ) . " IP block comments:\n";
70 $n = 0;
71 while( $s = wfFetchObject( $res ) ) {
72 $ucomment = toUtf8($s->ipb_reason);
73 $ocomment = wfStrencode( $s->ipb_reason );
74 if( $u != $o ) {
75 $sql = "UPDATE ipblocks SET ipb_reason='$ucomment' WHERE ipb_reason='$ocomment'";
76 wfQuery( $sql, DB_WRITE );
77 $u++;
78 }
79 if( ++$n % 100 == 0 ) print "$n\n";
80 }
81 wfFreeResult( $res );
82 if( $n ) {
83 printf("%2.02%% required conversion.\n\n", $u / $n);
84 } else {
85 print "None?\n\n";
86 }
87
88 # image
89 $sql = "SELECT img_name,img_description,img_user_text FROM image";
90 img_name --> also need to rename files
91 img_description
92 img_user_text
93
94 oldimage
95 oi_name
96 oi_archive_name --> also need to rename files
97 oi_user_text
98
99 recentchanges
100 rc_user_text
101 rc_title
102 rc_comment
103
104 # searchindex
105 print "Clearing searchindex... don't forget to rebuild it.\n";
106 $sql = "DELETE FROM searchindex";
107 wfQuery( $sql, DB_WRITE );
108
109 # linkscc
110 print "Clearing linkscc...\n";
111 $sql = "DELETE FROM linkscc";
112 wfQuery( $sql, DB_WRITE );
113
114 # querycache: just rebuild these
115 print "Clearing querycache...\n";
116 $sql = "DELETE FROM querycache";
117 wfQuery( $sql, DB_WRITE );
118
119 # objectcache
120 print "Clearing objectcache...\n";
121 $sql = "DELETE FROM objectcache";
122 wfQuery( $sql, DB_WRITE );
123
124
125 function unicodeLinks( $table, $field ) {
126 $sql = "SELECT DISTINCT $field FROM $table WHERE $field RLIKE '[\x80-\xff]'";
127 $res = wfQuery( $sql, DB_WRITE );
128 print "Converting " . wfNumResults( $res ) . " from $table:\n";
129 $n = 0;
130 while( $s = wfFetchObject( $res ) ) {
131 $ulink = toUtf8( $s->$field );
132 $olink = wfStrencode( $s->$field );
133 $sql = "UPDATE $table SET $field='$ulink' WHERE $field='$olink'";
134 wfQuery( $sql, DB_WRITE );
135 if( ++$n % 100 == 0 ) print "$n\n";
136 }
137 wfFreeResult( $res );
138 print "Done.\n\n";
139 }
140 unicodeLinks( "brokenlinks", "bl_to" );
141 unicodeLinks( "imagelinks", "il_to" );
142 unicodeLinks( "categorylinks", "cl_to" );
143
144
145 # The big guys...
146 $sql = "SELECT cur_id,cur_namespace,cur_title,cur_text,cur_user_text FROM cur
147 WHERE cur_title rlike '[\x80-\xff]' OR cur_comment rlike '[\x80-\xff]'
148 OR cur_user_text rlike '[\x80-\xff]' OR cur_text rlike '[\x80-\xff]'";
149 $res = wfQuery( $sql, DB_WRITE );
150 print "Converting " . wfNumResults( $res ) . " cur pages:\n";
151 $n = 0;
152 while( $s = wfFetchObject( $res ) ) {
153 $utitle = toUtf8( $s->cur_title );
154 $uuser = toUtf8( $s->cur_user_text );
155 $ucomment = toUtf8( $s->cur_comment );
156 $utext = toUtf8( $s->cur_text );
157 $now = wfTimestampNow();
158
159 $sql = "UPDATE cur
160 SET cur_title='$utitle',cur_user_text='$uuser',
161 cur_comment='$ucomment',cur_text='$utext'
162 WHERE cur_id={$s->cur_id}";
163 wfQuery( $sql, DB_WRITE );
164 #$wgMemc->delete( "$wgDBname:user:id:{$s->user_id}" );
165
166 $otitle = wfStrencode( $s->cur_title );
167 if( $otitle != $utitle ) {
168 # Also update titles in watchlist and old
169 $sql = "UPDATE old SET old_title='$utitle'
170 WHERE old_namespace={$s->cur_namespace} AND old_title='$otitle'";
171 wfQuery( $sql, DB_WRITE );
172
173 $ns = IntVal( $s->cur_namespace) & ~1;
174 $sql = "UPDATE watchlist SET wl_title='$utitle'
175 WHERE wl_namespace=$ns AND wl_title='$otitle'";
176 wfQuery( $sql, DB_WRITE );
177 $u++;
178 }
179
180 if( ++$n % 100 == 0 ) print "$n\n";
181 }
182 wfFreeResult( $res );
183 if( $n ) {
184 printf("Updated old/watchlist titles on %2.02%%.\n\n", $u / $n);
185 } else {
186 print "Didn't update any old/watchlist titles.\n\n";
187 }
188
189 /*
190 old
191 old_title
192 old_text -> may be gzipped
193 old_comment
194 old_user_text
195
196 archive
197 ar_title
198 ar_text -> may be gzipped
199 ar_comment
200 ar_user_text
201 */
202
203 ?>