<?php
/**
- * Import data from a UseModWiki into a PediaWiki wiki
+ * Import data from a UseModWiki into a MediaWiki wiki
* 2003-02-09 Brion VIBBER <brion@pobox.com>
* Based loosely on Magnus's code from 2001-2002
*
* and CamelCase and /Subpage link conversion
* 2004-11-17
*
+ * Rewrite output to create Special:Export format for import
+ * instead of raw SQL. Should be 'future-proof' against future
+ * schema changes.
+ * 2005-03-14
+ *
* @todo document
* @package MediaWiki
* @subpackage Maintenance
$FS2 = $FS."2" ;
$FS3 = $FS."3" ;
-$conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp
+# Unicode sanitization tools
+require_once( '../includes/normal/UtfNormal.php' );
+
$usercache = array();
-wfSeedRandom();
importPages();
# ------------------------------------------------------------------------------
{
global $wgRootDirectory;
+ $gt = '>';
+ echo <<<END
+<?xml version="1.0" encoding="UTF-8" ?$gt
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/
+ http://www.mediawiki.org/xml/export-0.1.xsd"
+ version="0.1">
+<!-- generated by importUseModWiki.php -->
+
+END;
$letters = array(
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
if( is_dir( $dir ) )
importPageDirectory( $dir );
}
+ echo <<<END
+</mediawiki>
+
+END;
}
function importPageDirectory( $dir, $prefix = "" )
{
- echo "\n-- Checking page directory $dir\n";
+ echo "\n<!-- Checking page directory $dir -->\n";
$mydir = opendir( $dir );
while( $entry = readdir( $mydir ) ) {
if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
importPageDirectory( "$dir/$entry", "$entry/" );
}
} else {
- echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n";
+ echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n";
}
}
}
/* fetch_ functions
Grab a given item from the database
*/
-function fetchUser( $uid )
-{
- die ("fetchUser not implemented" );
-
- global $FS,$FS2,$FS3, $wgRootDirectory;
-
- $fname = $wgRootDirectory . "/page/" . $title;
- if( !file_exists( $fname ) ) return false;
-
- $data = splitHash( implode( "", file( $fname ) ) );
- # enough?
-
- return $data;
-}
function useModFilename( $title ) {
$c = substr( $title, 0, 1 );
Take a fetched item and produce SQL
*/
-/* importUser
- $uid is the UseMod user id number.
- The new ones will be assigned arbitrarily and are for internal use only.
-
- THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR
- */
-function importUser( $uid )
-{
- global $last_uid, $user_list, $wgTimestampCorrection;
- die("importUser NYI");
- return "";
-
- $stuff = fetchUser( $uid );
- $last_uid++;
-
- $name = wfStrencode( $stuff->username );
- $hash = md5hash( $stuff->password ); # Doable?
- $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1
- $hideminor = ($stuff['rcall'] ? 0 : 1);
- $options = "cols={$stuff['editcols']}
-rows={$stuff['editrows']}
-rcdays={$stuff['rcdays']}
-timecorrection={$tzoffset}
-hideminor={$hideminor}
- ";
-
- $sql = "INSERT
- INTO user (user_id,user_name,user_password,user_options)
- VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n";
- return $sql;
-}
-
function checkUserCache( $name, $host )
{
global $usercache;
# If we haven't imported user accounts
$userid = 0;
}
- $username = wfStrencode( $name );
+ $username = str_replace( '_', ' ', $name );
} else {
$userid = 0;
- $username = wfStrencode( $host );
+ $username = $host;
}
return array( $userid, $username );
}
function importPage( $title )
{
global $usercache;
- global $conversiontime;
- echo "\n-- Importing page $title\n";
+ echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n";
$page = fetchPage( $title );
- $newtitle = wfStrencode( recodeText( $title ) );
- $namespace = 0;
+ $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) );
$munged = mungeFormat( $page->text );
if( $munged != $page->text ) {
* Save a *new* revision with the conversion, and put the
* previous last version into the history.
*/
- $text = wfStrencode( recodeText( $munged ) );
- $comment = "link fix";
- $minor = 1;
- $userid = 0;
- $username = "Conversion script";
- $timestamp = wfUnix2Timestamp( time() );
- $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 );
- $random = mt_rand() / mt_getrandmax();
-
- $revisions = array( $page );
+ $next = array2object( array(
+ 'text' => $munged,
+ 'minor' => 1,
+ 'username' => 'Conversion script',
+ 'host' => '127.0.0.1',
+ 'ts' => time(),
+ 'summary' => 'link fix',
+ ) );
+ $revisions = array( $page, $next );
} else {
/**
* Current revision:
*/
- $text = wfStrencode( recodeText( $page->text ) );
- $comment = wfStrencode( recodeText( $page->summary ) );
- $minor = ($page->minor ? 1 : 0);
- list( $userid, $username ) = checkUserCache( $page->username, $page->host );
- $username = wfStrencode( recodeText( $username ) );
- $timestamp = wfUnix2Timestamp( $page->ts );
- $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 );
- $random = mt_rand() / mt_getrandmax();
-
- $revisions = array();
+ $revisions = array( $page );
}
- $sql = "
-INSERT
- INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES
- ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$conversiontime',$minor,$redirect,$random);\n";
+ $xml = <<<END
+ <page>
+ <title>$newtitle</title>
+
+END;
# History
$revisions = array_merge( $revisions, fetchKeptPages( $title ) );
return $sql;
}
- $any = false;
- $sql .= "INSERT
- INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,old_minor_edit) VALUES\n";
foreach( $revisions as $rev ) {
- $text = wfStrencode( recodeText( $rev->text ) );
- $minor = ($rev->minor ? 1 : 0);
+ $text = xmlsafe( recodeText( $rev->text ) );
+ $minor = ($rev->minor ? '<minor/>' : '');
list( $userid, $username ) = checkUserCache( $rev->username, $rev->host );
- $username = wfStrencode( recodeText( $username ) );
- $timestamp = wfUnix2Timestamp( $rev->ts );
- $comment = wfStrencode( recodeText( $rev->summary ) );
+ $username = xmlsafe( recodeText( $username ) );
+ $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) );
+ $comment = xmlsafe( recodeText( $rev->summary ) );
- if($any) $sql .= ",";
- $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp',$minor)";
- $any = true;
+ $xml .= <<<END
+ <revision>
+ <timestamp>$timestamp</timestamp>
+ <contributor><username>$username</username></contributor>
+ <comment>$comment</comment>
+ $minor
+ <text>$text</text>
+ </revision>
+
+END;
}
- $sql .= ";\n\n";
- return $sql;
+ $xml .= "</page>\n\n";
+ return $xml;
}
# Whee!
return $string;
}
-function wfStrencode( $string ) {
- return mysql_escape_string( $string );
-}
-
-function wfUnix2Timestamp( $unixtime ) {
- return gmdate( "YmdHis", $unixtime );
+function timestamp2ISO8601( $ts ) {
+ #2003-08-05T18:30:02Z
+ return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';
}
-function wfTimestamp2Unix( $ts )
-{
- return gmmktime( ( (int)substr( $ts, 8, 2) ),
- (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ),
- (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ),
- (int)substr( $ts, 0, 4 ) );
-}
-
-function wfTimestampNow() {
- # return NOW
- return gmdate( "YmdHis" );
+function xmlsafe( $string ) {
+ /**
+ * The page may contain old data which has not been properly normalized.
+ * Invalid UTF-8 sequences or forbidden control characters will make our
+ * XML output invalid, so be sure to strip them out.
+ */
+ $string = UtfNormal::cleanUp( $string );
+
+ $string = htmlspecialchars( $string );
+ return $string;
}
-# Sorting hack for MySQL 3, which doesn't use index sorts for DESC
-function wfInvertTimestamp( $ts ) {
- return strtr(
- $ts,
- "0123456789",
- "9876543210"
- );
+function xmlCommentSafe( $text ) {
+ return str_replace( '--', '\\-\\-', xmlsafe( $text ) );
}
-function wfSeedRandom()
-{
- $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff;
- mt_srand( $seed );
- $wgRandomSeeded = true;
-}
function array2object( $arr ) {
$o = (object)0;