From: Brion Vibber Date: Mon, 14 Mar 2005 12:08:13 +0000 (+0000) Subject: Rewrite output to create Special:Export format for import X-Git-Tag: 1.5.0alpha1~608 X-Git-Url: http://git.cyclocoop.org/wiki/Target_page?a=commitdiff_plain;h=b6ce94f228184bc3690efc760978e4c32350ed06;p=lhc%2Fweb%2Fwiklou.git Rewrite output to create Special:Export format for import instead of raw SQL. Should be 'future-proof' against future schema changes. --- diff --git a/maintenance/importUseModWiki.php b/maintenance/importUseModWiki.php index 8a23f8bc77..7011bfbb27 100644 --- a/maintenance/importUseModWiki.php +++ b/maintenance/importUseModWiki.php @@ -1,7 +1,7 @@ * Based loosely on Magnus's code from 2001-2002 * @@ -16,6 +16,11 @@ * and CamelCase and /Subpage link conversion * 2004-11-17 * + * Rewrite output to create Special:Export format for import + * instead of raw SQL. Should be 'future-proof' against future + * schema changes. + * 2005-03-14 + * * @todo document * @package MediaWiki * @subpackage Maintenance @@ -39,10 +44,11 @@ $wgFieldSeparator = "\xb3"; # Some wikis may use different char $FS2 = $FS."2" ; $FS3 = $FS."3" ; -$conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp +# Unicode sanitization tools +require_once( '../includes/normal/UtfNormal.php' ); + $usercache = array(); -wfSeedRandom(); importPages(); # ------------------------------------------------------------------------------ @@ -51,6 +57,17 @@ function importPages() { global $wgRootDirectory; + $gt = '>'; + echo << + + +END; $letters = array( 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', @@ -60,11 +77,15 @@ function importPages() if( is_dir( $dir ) ) importPageDirectory( $dir ); } + echo << + +END; } function importPageDirectory( $dir, $prefix = "" ) { - echo "\n-- Checking page directory $dir\n"; + echo "\n\n"; $mydir = opendir( $dir ); while( $entry = readdir( $mydir ) ) { if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { @@ -75,7 +96,7 @@ function importPageDirectory( $dir, $prefix = "" ) importPageDirectory( "$dir/$entry", "$entry/" ); } } else { - echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n"; + echo "\n"; } } } @@ -87,20 +108,6 @@ function importPageDirectory( $dir, $prefix = "" ) /* fetch_ functions Grab a given item from the database */ -function fetchUser( $uid ) -{ - die ("fetchUser not implemented" ); - - global $FS,$FS2,$FS3, $wgRootDirectory; - - $fname = $wgRootDirectory . "/page/" . $title; - if( !file_exists( $fname ) ) return false; - - $data = splitHash( implode( "", file( $fname ) ) ); - # enough? - - return $data; -} function useModFilename( $title ) { $c = substr( $title, 0, 1 ); @@ -167,38 +174,6 @@ function splitHash ( $sep , $str ) { Take a fetched item and produce SQL */ -/* importUser - $uid is the UseMod user id number. - The new ones will be assigned arbitrarily and are for internal use only. - - THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR - */ -function importUser( $uid ) -{ - global $last_uid, $user_list, $wgTimestampCorrection; - die("importUser NYI"); - return ""; - - $stuff = fetchUser( $uid ); - $last_uid++; - - $name = wfStrencode( $stuff->username ); - $hash = md5hash( $stuff->password ); # Doable? - $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1 - $hideminor = ($stuff['rcall'] ? 0 : 1); - $options = "cols={$stuff['editcols']} -rows={$stuff['editrows']} -rcdays={$stuff['rcdays']} -timecorrection={$tzoffset} -hideminor={$hideminor} - "; - - $sql = "INSERT - INTO user (user_id,user_name,user_password,user_options) - VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n"; - return $sql; -} - function checkUserCache( $name, $host ) { global $usercache; @@ -210,10 +185,10 @@ function checkUserCache( $name, $host ) # If we haven't imported user accounts $userid = 0; } - $username = wfStrencode( $name ); + $username = str_replace( '_', ' ', $name ); } else { $userid = 0; - $username = wfStrencode( $host ); + $username = $host; } return array( $userid, $username ); } @@ -221,13 +196,11 @@ function checkUserCache( $name, $host ) function importPage( $title ) { global $usercache; - global $conversiontime; - echo "\n-- Importing page $title\n"; + echo "\n\n"; $page = fetchPage( $title ); - $newtitle = wfStrencode( recodeText( $title ) ); - $namespace = 0; + $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) ); $munged = mungeFormat( $page->text ); if( $munged != $page->text ) { @@ -235,35 +208,26 @@ function importPage( $title ) * Save a *new* revision with the conversion, and put the * previous last version into the history. */ - $text = wfStrencode( recodeText( $munged ) ); - $comment = "link fix"; - $minor = 1; - $userid = 0; - $username = "Conversion script"; - $timestamp = wfUnix2Timestamp( time() ); - $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 ); - $random = mt_rand() / mt_getrandmax(); - - $revisions = array( $page ); + $next = array2object( array( + 'text' => $munged, + 'minor' => 1, + 'username' => 'Conversion script', + 'host' => '127.0.0.1', + 'ts' => time(), + 'summary' => 'link fix', + ) ); + $revisions = array( $page, $next ); } else { /** * Current revision: */ - $text = wfStrencode( recodeText( $page->text ) ); - $comment = wfStrencode( recodeText( $page->summary ) ); - $minor = ($page->minor ? 1 : 0); - list( $userid, $username ) = checkUserCache( $page->username, $page->host ); - $username = wfStrencode( recodeText( $username ) ); - $timestamp = wfUnix2Timestamp( $page->ts ); - $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 ); - $random = mt_rand() / mt_getrandmax(); - - $revisions = array(); + $revisions = array( $page ); } - $sql = " -INSERT - INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES - ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$conversiontime',$minor,$redirect,$random);\n"; + $xml = << + $newtitle + +END; # History $revisions = array_merge( $revisions, fetchKeptPages( $title ) ); @@ -271,23 +235,27 @@ INSERT return $sql; } - $any = false; - $sql .= "INSERT - INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,old_minor_edit) VALUES\n"; foreach( $revisions as $rev ) { - $text = wfStrencode( recodeText( $rev->text ) ); - $minor = ($rev->minor ? 1 : 0); + $text = xmlsafe( recodeText( $rev->text ) ); + $minor = ($rev->minor ? '' : ''); list( $userid, $username ) = checkUserCache( $rev->username, $rev->host ); - $username = wfStrencode( recodeText( $username ) ); - $timestamp = wfUnix2Timestamp( $rev->ts ); - $comment = wfStrencode( recodeText( $rev->summary ) ); + $username = xmlsafe( recodeText( $username ) ); + $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) ); + $comment = xmlsafe( recodeText( $rev->summary ) ); - if($any) $sql .= ","; - $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp',$minor)"; - $any = true; + $xml .= << + $timestamp + $username + $comment + $minor + $text + + +END; } - $sql .= ";\n\n"; - return $sql; + $xml .= "\n\n"; + return $xml; } # Whee! @@ -322,42 +290,27 @@ function wfMungeToUtf8($string) { return $string; } -function wfStrencode( $string ) { - return mysql_escape_string( $string ); -} - -function wfUnix2Timestamp( $unixtime ) { - return gmdate( "YmdHis", $unixtime ); +function timestamp2ISO8601( $ts ) { + #2003-08-05T18:30:02Z + return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z'; } -function wfTimestamp2Unix( $ts ) -{ - return gmmktime( ( (int)substr( $ts, 8, 2) ), - (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ), - (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ), - (int)substr( $ts, 0, 4 ) ); -} - -function wfTimestampNow() { - # return NOW - return gmdate( "YmdHis" ); +function xmlsafe( $string ) { + /** + * The page may contain old data which has not been properly normalized. + * Invalid UTF-8 sequences or forbidden control characters will make our + * XML output invalid, so be sure to strip them out. + */ + $string = UtfNormal::cleanUp( $string ); + + $string = htmlspecialchars( $string ); + return $string; } -# Sorting hack for MySQL 3, which doesn't use index sorts for DESC -function wfInvertTimestamp( $ts ) { - return strtr( - $ts, - "0123456789", - "9876543210" - ); +function xmlCommentSafe( $text ) { + return str_replace( '--', '\\-\\-', xmlsafe( $text ) ); } -function wfSeedRandom() -{ - $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff; - mt_srand( $seed ); - $wgRandomSeeded = true; -} function array2object( $arr ) { $o = (object)0;