From b2411deec4b1732e73754f69eeaf19c0bd7c78f7 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 27 Sep 2004 06:23:33 +0000 Subject: [PATCH] Far from finished in-place UTF-8 wiki converter. I had this code lying around for a while and wanted to check it in before I forget about it. --- maintenance/convertUtf8.php | 199 ++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 maintenance/convertUtf8.php diff --git a/maintenance/convertUtf8.php b/maintenance/convertUtf8.php new file mode 100644 index 0000000000..3281d6b6ff --- /dev/null +++ b/maintenance/convertUtf8.php @@ -0,0 +1,199 @@ +user_name ); + $ureal = toUtf8( $s->user_real_name ); + $uoptions = toUtf8( $s->user_options ); + if( $uname != wfStrencode( $s->user_name ) || + $ureal != wfStrencode( $s->user_real_name ) || + $uoptions != wfStrencode( $s->user_options ) ) { + $now = wfTimestampNow(); + $sql = "UPDATE user + SET user_name='$uname',user_real_name='$ureal', + user_options='$uoptions',user_touched='$now' + WHERE user_id={$s->user_id}"; + wfQuery( $sql, DB_WRITE ); + $wgMemc->delete( "$wgDBname:user:id:{$s->user_id}" ); + $u++; + } + if( ++$n % 100 == 0 ) print "$n\n"; +} +wfFreeResult( $res ); +if( $n ) { + printf("%2.02%% required conversion.\n\n", $u / $n); +} else { + print "None?\n\n"; +} + +# ipblocks +$sql = "SELECT DISTINCT ipb_reason FROM ipblocks"; +$res = wfQuery( $sql, DB_WRITE ); +print "Converting " . wfNumResults( $res ) . " IP block comments:\n"; +$n = 0; +while( $s = wfFetchObject( $res ) ) { + $ucomment = toUtf8($s->ipb_reason); + $ocomment = wfStrencode( $s->ipb_reason ); + if( $u != $o ) { + $sql = "UPDATE ipblocks SET ipb_reason='$ucomment' WHERE ipb_reason='$ocomment'"; + wfQuery( $sql, DB_WRITE ); + $u++; + } + if( ++$n % 100 == 0 ) print "$n\n"; +} +wfFreeResult( $res ); +if( $n ) { + printf("%2.02%% required conversion.\n\n", $u / $n); +} else { + print "None?\n\n"; +} + +# image +$sql = "SELECT img_name,img_description,img_user_text FROM image"; + img_name --> also need to rename files + img_description + img_user_text + +oldimage + oi_name + oi_archive_name --> also need to rename files + oi_user_text + +recentchanges + rc_user_text + rc_title + rc_comment + +# searchindex +print "Clearing searchindex... don't forget to rebuild it.\n"; +$sql = "DELETE FROM searchindex"; +wfQuery( $sql, DB_WRITE ); + +# linkscc +print "Clearing linkscc...\n"; +$sql = "DELETE FROM linkscc"; +wfQuery( $sql, DB_WRITE ); + +# querycache: just rebuild these +print "Clearing querycache...\n"; +$sql = "DELETE FROM querycache"; +wfQuery( $sql, DB_WRITE ); + +# objectcache +print "Clearing objectcache...\n"; +$sql = "DELETE FROM objectcache"; +wfQuery( $sql, DB_WRITE ); + + +function unicodeLinks( $table, $field ) { + $sql = "SELECT DISTINCT $field FROM $table WHERE $field RLIKE '[\x80-\xff]'"; + $res = wfQuery( $sql, DB_WRITE ); + print "Converting " . wfNumResults( $res ) . " from $table:\n"; + $n = 0; + while( $s = wfFetchObject( $res ) ) { + $ulink = toUtf8( $s->$field ); + $olink = wfStrencode( $s->$field ); + $sql = "UPDATE $table SET $field='$ulink' WHERE $field='$olink'"; + wfQuery( $sql, DB_WRITE ); + if( ++$n % 100 == 0 ) print "$n\n"; + } + wfFreeResult( $res ); + print "Done.\n\n"; +} +unicodeLinks( "brokenlinks", "bl_to" ); +unicodeLinks( "imagelinks", "il_to" ); +unicodeLinks( "categorylinks", "cl_to" ); + + +# The big guys... +$sql = "SELECT cur_id,cur_namespace,cur_title,cur_text,cur_user_text FROM cur +WHERE cur_title rlike '[\x80-\xff]' OR cur_comment rlike '[\x80-\xff]' +OR cur_user_text rlike '[\x80-\xff]' OR cur_text rlike '[\x80-\xff]'"; +$res = wfQuery( $sql, DB_WRITE ); +print "Converting " . wfNumResults( $res ) . " cur pages:\n"; +$n = 0; +while( $s = wfFetchObject( $res ) ) { + $utitle = toUtf8( $s->cur_title ); + $uuser = toUtf8( $s->cur_user_text ); + $ucomment = toUtf8( $s->cur_comment ); + $utext = toUtf8( $s->cur_text ); + $now = wfTimestampNow(); + + $sql = "UPDATE cur + SET cur_title='$utitle',cur_user_text='$uuser', + cur_comment='$ucomment',cur_text='$utext' + WHERE cur_id={$s->cur_id}"; + wfQuery( $sql, DB_WRITE ); + #$wgMemc->delete( "$wgDBname:user:id:{$s->user_id}" ); + + $otitle = wfStrencode( $s->cur_title ); + if( $otitle != $utitle ) { + # Also update titles in watchlist and old + $sql = "UPDATE old SET old_title='$utitle' + WHERE old_namespace={$s->cur_namespace} AND old_title='$otitle'"; + wfQuery( $sql, DB_WRITE ); + + $ns = IntVal( $s->cur_namespace) & ~1; + $sql = "UPDATE watchlist SET wl_title='$utitle' + WHERE wl_namespace=$ns AND wl_title='$otitle'"; + wfQuery( $sql, DB_WRITE ); + $u++; + } + + if( ++$n % 100 == 0 ) print "$n\n"; +} +wfFreeResult( $res ); +if( $n ) { + printf("Updated old/watchlist titles on %2.02%%.\n\n", $u / $n); +} else { + print "Didn't update any old/watchlist titles.\n\n"; +} + +/* +old + old_title + old_text -> may be gzipped + old_comment + old_user_text + +archive + ar_title + ar_text -> may be gzipped + ar_comment + ar_user_text +*/ + +?> \ No newline at end of file -- 2.20.1