Only start new session for anon users on submit, not edit
[lhc/web/wiklou.git] / maintenance / convertUtf8.php
1 <?php
2 /**
3 * @package MediaWiki
4 * @subpackage Maintenance
5 */
6
7 die("This file is not complete; it's checked in so I don't forget it.");
8
9 /*
10 UTF-8 conversion of DOOOOOOOM
11
12 1. Lock the wiki
13 2. Make a convertlist of all pages
14 3. Enable CONVERTLOCK mode and switch to UTF-8
15 4. As quick as possible, convert the cur, images, *links, user, etc tables. Clear cache tables.
16 5. Unlock the wiki. Attempts to access pages on the convertlist will be trapped to read-only.
17 6. Go through the list, fixing up old revisions. Remove pages from the convertlist.
18 */
19
20
21 class UtfUpdater {
22 function UtfUpdater() {
23 $this->db =& wfGetDB( DB_MASTER );
24 }
25
26 function toUtf8( $string ) {
27 if( function_exists( 'iconv' ) ) {
28 # There are likely to be Windows code page 1252 chars in there.
29 # Convert them to the proper UTF-8 chars if possible.
30 return iconv( 'CP1252', 'UTF-8', $string );
31 } else {
32 # Will work from plain iso 8859-1 and may corrupt these chars
33 return utf8_encode( $string );
34 }
35 }
36
37 function clearTable( $table ) {
38 print "Clearing $table...\n";
39 $tableName = $this->db->tableName( $table );
40 $this->db->query( 'TRUNCATE $tableName' );
41 }
42
43 /**
44 * @param string $table Table to be converted
45 * @param string $key Primary key, to identify fields in the UPDATE. If NULL, all fields will be used to match.
46 * @param array $fields List of all fields to grab and convert. If null, will assume you want the $key, and will ask for DISTINCT.
47 * @param array $timestamp A field which should be updated to the current timestamp on changed records.
48 * @access private
49 */
50 function convertTable( $table, $key, $fields = null, $timestamp = null ) {
51 $fname = 'UtfUpdater::convertTable';
52 if( $fields ) {
53 $distinct = '';
54 } else {
55 # If working on one key only, there will be multiple rows.
56 # Use DISTINCT to return only one and save us some trouble.
57 $fields = array( $key );
58 $distinct = 'DISTINCT';
59 }
60 $condition = '';
61 foreach( $fields as $field ) {
62 if( $condition ) $condition .= ' OR ';
63 $condition .= "$field RLIKE '[\x80-\xff]'";
64 }
65 $res = $this->db->selectArray(
66 $table,
67 array_merge( $fields, array( $key ) ),
68 $condition,
69 $fname,
70 $distinct );
71 print "Converting " . $this->db->numResults( $res ) . " rows from $table:\n";
72 $n = 0;
73 while( $s = $this->db->fetchObject( $res ) ) {
74 $set = array();
75 foreach( $fields as $field ) {
76 $set[] = $this->toUtf8( $s->$field );
77 }
78 if( $timestamp ) {
79 $set[$timestamp] = $this->db->timestamp();
80 }
81 if( $key ) {
82 $keyCond = array( $key, $s->$key );
83 } else {
84 $keyCond = array();
85 foreach( $fields as $field ) {
86 $keyCond[$field] = $s->$field;
87 }
88 }
89 $this->db->updateArray(
90 $table,
91 $set,
92 $keyCond,
93 $fname );
94 if( ++$n % 100 == 0 ) echo "$n\n";
95 }
96 echo "$n done.\n";
97 $this->db->freeResult( $res );
98 }
99
100 function lockTables( $tables ) {
101 $query = '';
102 foreach( $tables as $table ) {
103 $tableName = $this->db->tableName( $table );
104 if( $query ) $query .= ', ';
105 $query .= '$tableName WRITE';
106 }
107 $this->db->query( 'LOCK TABLES ' . $query );
108 }
109
110 function updateAll() {
111 $this->lockTables( array(
112 'linkscc', 'objectcache', 'searchindex', 'querycache',
113 'ipblocks', 'user', 'page', 'revision', 'recentchanges',
114 'brokenlinks', 'categorylinks', 'imagelinks', 'watchlist',
115 'image', 'oldimage', 'archive' ) );
116
117 # These are safe to clear out:
118 $this->clearTable( 'linkscc' );
119 $this->clearTable( 'objectcache' );
120
121 # These need to be rebuild if used:
122 $this->clearTable( 'searchindex' );
123 $this->clearTable( 'querycache' );
124
125 # And convert the rest...
126 $this->convertTable( 'ipblocks', 'ipb_id', array( 'ipb_reason' ) );
127 $this->convertTable( 'user', 'user_id',
128 array( 'user_name', 'user_real_name', 'user_options' ),
129 'user_touched' );
130 $this->convertTable( 'page', 'page_id',
131 array( 'page_title' ), 'page_touched' );
132 $this->convertTable( 'revision', 'rev_id',
133 array( 'rev_user_text', 'rev_comment' ) );
134
135 $this->convertTable( 'recentchanges', 'rc_id',
136 array( 'rc_user_text', 'rc_title', 'rc_comment' ) );
137
138 $this->convertTable( 'brokenlinks', 'bl_to' );
139 $this->convertTable( 'categorylinks', 'cl_to' );
140 $this->convertTable( 'imagelinks', 'il_to' );
141 $this->convertTable( 'watchlist', 'wl_title' );
142
143 # FIXME: We'll also need to change the files.
144 $this->convertTable( 'image', 'img_name',
145 array( 'img_name', 'img_description', 'img_user_text' ) );
146 $this->convertTable( 'oldimage', 'archive_name',
147 array( 'oi_name', 'oi_archive_name', 'oi_description', 'oi_user_text' ) );
148
149 # Don't change the ar_text entries; use $wgLegacyEncoding to read them at runtime
150 $this->convertTable( 'archive', null,
151 array( 'ar_title', 'ar_comment', 'ar_user_text' ) );
152 echo "Not converting text table: be sure to set \$wgLegacyEncoding!\n";
153
154 $this->db->query( 'UNLOCK TABLES' );
155 }
156
157 }
158
159 ?>