From 8de403f3a33e8f2bf0a0bac86d0433a233557576 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 10 Oct 2003 04:53:36 +0000 Subject: [PATCH] minimal stripped-down version --- maintenance/importUseModWiki.php | 305 +++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 maintenance/importUseModWiki.php diff --git a/maintenance/importUseModWiki.php b/maintenance/importUseModWiki.php new file mode 100644 index 0000000000..ee6f3717a6 --- /dev/null +++ b/maintenance/importUseModWiki.php @@ -0,0 +1,305 @@ + + Based loosely on Magnus's code from 2001-2002 + + Updated limited version to get something working temporarily + 2003-10-09 + Be sure to run the link & index rebuilding scripts! + + */ + +/* globals */ +$wgRootDirectory = "/Users/brion/src/wiki/convert/wiki-fy/lib-http/db/wiki"; +$wgFieldSeparator = "\xb3"; # Some wikis may use different char + $FS = $wgFieldSeparator ; + $FS1 = $FS."1" ; + $FS2 = $FS."2" ; + $FS3 = $FS."3" ; + +$conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp +$usercache = array(); + +wfSeedRandom(); +importPages(); + +# ------------------------------------------------------------------------------ + +function importPages() +{ + global $wgRootDirectory; + + $letters = array( + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', + 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' ); + foreach( $letters as $letter ) { + $dir = "$wgRootDirectory/page/$letter"; + if( is_dir( $dir ) ) + importPageDirectory( $dir ); + } +} + +function importPageDirectory( $dir, $prefix = "" ) +{ + echo "\n-- Checking page directory $dir\n"; + $mydir = opendir( $dir ); + while( $entry = readdir( $mydir ) ) { + if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { + echo importPage( $prefix . $m[1] ); + } else { + if( is_dir( "$dir/$entry" ) ) { + if( $entry != '.' && $entry != '..' ) { + importPageDirectory( "$dir/$entry", "$entry/" ); + } + } else { + echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n"; + } + } + } +} + + +# ------------------------------------------------------------------------------ + +/* fetch_ functions + Grab a given item from the database + */ +function fetchUser( $uid ) +{ + die ("fetchUser not implemented" ); + + global $FS,$FS2,$FS3, $wgRootDirectory; + + $fname = $wgRootDirectory . "/page/" . $title; + if( !file_exists( $fname ) ) return false; + + $data = splitHash( implode( "", file( $fname ) ) ); + # enough? + + return $data; +} + +function useModFilename( $title ) { + $c = substr( $title, 0, 1 ); + if(preg_match( '/[A-Z]/', $c ) ) { + return "$c/$title"; + } + return "other/$title"; +} + +function fetchPage( $title ) +{ + global $FS,$FS1,$FS2,$FS3, $wgRootDirectory; + + $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db"; + if( !file_exists( $fname ) ) { + die( "Couldn't open file '$fname' for page '$title'.\n" ); + } + + $page = splitHash( $FS1, file_get_contents( $fname ) ); + $section = splitHash( $FS2, $page["text_default"] ); + $text = splitHash( $FS3, $section["data"] ); + + return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] , + "minor" => $text["minor"] , "ts" => $section["ts"] , + "username" => $section["username"] , "host" => $section["host"] ) ); +} + +function fetchKeptPages( $title ) +{ + global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection; + + $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp"; + if( !file_exists( $fname ) ) return array(); + + $keptlist = explode( $FS1, file_get_contents( $fname ) ); + array_shift( $keptlist ); # Drop the junk at beginning of file + + $revisions = array(); + foreach( $keptlist as $rev ) { + $section = splitHash( $FS2, $rev ); + $text = splitHash( $FS3, $section["data"] ); + if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) { + array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] , + "minor" => $text["minor"] , "ts" => $section["ts"] , + "username" => $section["username"] , "host" => $section["host"] ) ) ); + } else { + echo "-- skipped a bad old revision\n"; + } + } + return $revisions; +} + +function splitHash ( $sep , $str ) { + $temp = explode ( $sep , $str ) ; + $ret = array () ; + for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) { + $ret[$temp[$i]] = $temp[++$i] ; + } + return $ret ; + } + + +/* import_ functions + Take a fetched item and produce SQL + */ + +/* importUser + $uid is the UseMod user id number. + The new ones will be assigned arbitrarily and are for internal use only. + + THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR + */ +function importUser( $uid ) +{ + global $last_uid, $user_list, $wgTimestampCorrection; + die("importUser NYI"); + return ""; + + $stuff = fetchUser( $uid ); + $last_uid++; + + $name = wfStrencode( $stuff->username ); + $hash = md5hash( $stuff->password ); # Doable? + $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1 + $hideminor = ($stuff['rcall'] ? 0 : 1); + $options = "cols={$stuff['editcols']} +rows={$stuff['editrows']} +rcdays={$stuff['rcdays']} +timecorrection={$tzoffset} +hideminor={$hideminor} + "; + + $sql = "INSERT + INTO user (user_id,user_name,user_password,user_options) + VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n"; + return $sql; +} + +function checkUserCache( $name, $host ) +{ + global $usercache; + + if( $name ) { + if( in_array( $name, $usercache ) ) { + $userid = $usercache[$name]; + } else { + # If we haven't imported user accounts + $userid = 0; + } + $username = wfStrencode( $name ); + } else { + $userid = 0; + $username = wfStrencode( $host ); + } + return array( $userid, $username ); +} + +function importPage( $title ) +{ + global $usercache; + global $conversiontime; + + echo "\n-- Importing page $title\n"; + $page = fetchPage( $title ); + + $newtitle = wfStrencode( recodeText( $title ) ); + $namespace = 0; + + # Current revision: + $text = wfStrencode( recodeText( $page->text ) ); + $minor = ($page->minor ? 1 : 0); + list( $userid, $username ) = checkUserCache( $page->username, $page->host ); + $timestamp = wfUnix2Timestamp( $page->ts ); + $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 ); + $random = mt_rand() / mt_getrandmax(); + $inverse = wfInvertTimestamp( $timestamp ); + $sql = " +INSERT + INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,inverse_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES + ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse','$conversiontime',$minor,$redirect,$random);\n"; + + # History + $revisions = fetchKeptPages( $title ); + if(count( $revisions ) == 0 ) { + return $sql; + } + + $any = false; + $sql .= "INSERT + INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,inverse_timestamp,old_minor_edit) VALUES\n"; + foreach( $revisions as $rev ) { + $text = wfStrencode( recodeText( $rev->text ) ); + $minor = ($rev->minor ? 1 : 0); + list( $userid, $username ) = checkUserCache( $rev->username, $rev->host ); + $username = wfStrencode( recodeText( $username ) ); + $timestamp = wfUnix2Timestamp( $rev->ts ); + $inverse = wfInvertTimestamp( $timestamp ); + $comment = wfStrencode( recodeText( $rev->text ) ); + + if($any) $sql .= ","; + $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse',$minor)"; + $any = true; + } + $sql .= ";\n\n"; + return $sql; +} + +# Whee! +function recodeText( $string ) { + # For currently latin-1 wikis + $string = str_replace( "\r\n", "\n", $string ); + # return iconv( "CP1252", "UTF-8", $string ); + return utf8_encode( $string ); +} + + +function wfStrencode( $string ) { + return mysql_escape_string( $string ); +} + +function wfUnix2Timestamp( $unixtime ) { + return gmdate( "YmdHis", $unixtime ); +} + +function wfTimestamp2Unix( $ts ) +{ + return gmmktime( ( (int)substr( $ts, 8, 2) ), + (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ), + (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ), + (int)substr( $ts, 0, 4 ) ); +} + +function wfTimestampNow() { + # return NOW + return gmdate( "YmdHis" ); +} + +# Sorting hack for MySQL 3, which doesn't use index sorts for DESC +function wfInvertTimestamp( $ts ) { + return strtr( + $ts, + "0123456789", + "9876543210" + ); +} + +function wfSeedRandom() +{ + $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff; + mt_srand( $seed ); + $wgRandomSeeded = true; +} + +function array2object( $arr ) { + $o = (object)0; + foreach( $arr as $x => $y ) { + $o->$x = $y; + } + return $o; +} + +?> -- 2.20.1