minimal stripped-down version
[lhc/web/wiklou.git] / maintenance / importUseModWiki.php
1 <?php
2
3 /*
4 Import data from a UseModWiki into a PediaWiki wiki
5 2003-02-09 Brion VIBBER <brion@pobox.com>
6 Based loosely on Magnus's code from 2001-2002
7
8 Updated limited version to get something working temporarily
9 2003-10-09
10 Be sure to run the link & index rebuilding scripts!
11
12 */
13
14 /* globals */
15 $wgRootDirectory = "/Users/brion/src/wiki/convert/wiki-fy/lib-http/db/wiki";
16 $wgFieldSeparator = "\xb3"; # Some wikis may use different char
17 $FS = $wgFieldSeparator ;
18 $FS1 = $FS."1" ;
19 $FS2 = $FS."2" ;
20 $FS3 = $FS."3" ;
21
22 $conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp
23 $usercache = array();
24
25 wfSeedRandom();
26 importPages();
27
28 # ------------------------------------------------------------------------------
29
30 function importPages()
31 {
32 global $wgRootDirectory;
33
34 $letters = array(
35 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
36 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
37 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
38 foreach( $letters as $letter ) {
39 $dir = "$wgRootDirectory/page/$letter";
40 if( is_dir( $dir ) )
41 importPageDirectory( $dir );
42 }
43 }
44
45 function importPageDirectory( $dir, $prefix = "" )
46 {
47 echo "\n-- Checking page directory $dir\n";
48 $mydir = opendir( $dir );
49 while( $entry = readdir( $mydir ) ) {
50 if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
51 echo importPage( $prefix . $m[1] );
52 } else {
53 if( is_dir( "$dir/$entry" ) ) {
54 if( $entry != '.' && $entry != '..' ) {
55 importPageDirectory( "$dir/$entry", "$entry/" );
56 }
57 } else {
58 echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n";
59 }
60 }
61 }
62 }
63
64
65 # ------------------------------------------------------------------------------
66
67 /* fetch_ functions
68 Grab a given item from the database
69 */
70 function fetchUser( $uid )
71 {
72 die ("fetchUser not implemented" );
73
74 global $FS,$FS2,$FS3, $wgRootDirectory;
75
76 $fname = $wgRootDirectory . "/page/" . $title;
77 if( !file_exists( $fname ) ) return false;
78
79 $data = splitHash( implode( "", file( $fname ) ) );
80 # enough?
81
82 return $data;
83 }
84
85 function useModFilename( $title ) {
86 $c = substr( $title, 0, 1 );
87 if(preg_match( '/[A-Z]/', $c ) ) {
88 return "$c/$title";
89 }
90 return "other/$title";
91 }
92
93 function fetchPage( $title )
94 {
95 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;
96
97 $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
98 if( !file_exists( $fname ) ) {
99 die( "Couldn't open file '$fname' for page '$title'.\n" );
100 }
101
102 $page = splitHash( $FS1, file_get_contents( $fname ) );
103 $section = splitHash( $FS2, $page["text_default"] );
104 $text = splitHash( $FS3, $section["data"] );
105
106 return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
107 "minor" => $text["minor"] , "ts" => $section["ts"] ,
108 "username" => $section["username"] , "host" => $section["host"] ) );
109 }
110
111 function fetchKeptPages( $title )
112 {
113 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;
114
115 $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
116 if( !file_exists( $fname ) ) return array();
117
118 $keptlist = explode( $FS1, file_get_contents( $fname ) );
119 array_shift( $keptlist ); # Drop the junk at beginning of file
120
121 $revisions = array();
122 foreach( $keptlist as $rev ) {
123 $section = splitHash( $FS2, $rev );
124 $text = splitHash( $FS3, $section["data"] );
125 if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {
126 array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
127 "minor" => $text["minor"] , "ts" => $section["ts"] ,
128 "username" => $section["username"] , "host" => $section["host"] ) ) );
129 } else {
130 echo "-- skipped a bad old revision\n";
131 }
132 }
133 return $revisions;
134 }
135
136 function splitHash ( $sep , $str ) {
137 $temp = explode ( $sep , $str ) ;
138 $ret = array () ;
139 for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) {
140 $ret[$temp[$i]] = $temp[++$i] ;
141 }
142 return $ret ;
143 }
144
145
146 /* import_ functions
147 Take a fetched item and produce SQL
148 */
149
150 /* importUser
151 $uid is the UseMod user id number.
152 The new ones will be assigned arbitrarily and are for internal use only.
153
154 THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR
155 */
156 function importUser( $uid )
157 {
158 global $last_uid, $user_list, $wgTimestampCorrection;
159 die("importUser NYI");
160 return "";
161
162 $stuff = fetchUser( $uid );
163 $last_uid++;
164
165 $name = wfStrencode( $stuff->username );
166 $hash = md5hash( $stuff->password ); # Doable?
167 $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1
168 $hideminor = ($stuff['rcall'] ? 0 : 1);
169 $options = "cols={$stuff['editcols']}
170 rows={$stuff['editrows']}
171 rcdays={$stuff['rcdays']}
172 timecorrection={$tzoffset}
173 hideminor={$hideminor}
174 ";
175
176 $sql = "INSERT
177 INTO user (user_id,user_name,user_password,user_options)
178 VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n";
179 return $sql;
180 }
181
182 function checkUserCache( $name, $host )
183 {
184 global $usercache;
185
186 if( $name ) {
187 if( in_array( $name, $usercache ) ) {
188 $userid = $usercache[$name];
189 } else {
190 # If we haven't imported user accounts
191 $userid = 0;
192 }
193 $username = wfStrencode( $name );
194 } else {
195 $userid = 0;
196 $username = wfStrencode( $host );
197 }
198 return array( $userid, $username );
199 }
200
201 function importPage( $title )
202 {
203 global $usercache;
204 global $conversiontime;
205
206 echo "\n-- Importing page $title\n";
207 $page = fetchPage( $title );
208
209 $newtitle = wfStrencode( recodeText( $title ) );
210 $namespace = 0;
211
212 # Current revision:
213 $text = wfStrencode( recodeText( $page->text ) );
214 $minor = ($page->minor ? 1 : 0);
215 list( $userid, $username ) = checkUserCache( $page->username, $page->host );
216 $timestamp = wfUnix2Timestamp( $page->ts );
217 $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 );
218 $random = mt_rand() / mt_getrandmax();
219 $inverse = wfInvertTimestamp( $timestamp );
220 $sql = "
221 INSERT
222 INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,inverse_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES
223 ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse','$conversiontime',$minor,$redirect,$random);\n";
224
225 # History
226 $revisions = fetchKeptPages( $title );
227 if(count( $revisions ) == 0 ) {
228 return $sql;
229 }
230
231 $any = false;
232 $sql .= "INSERT
233 INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,inverse_timestamp,old_minor_edit) VALUES\n";
234 foreach( $revisions as $rev ) {
235 $text = wfStrencode( recodeText( $rev->text ) );
236 $minor = ($rev->minor ? 1 : 0);
237 list( $userid, $username ) = checkUserCache( $rev->username, $rev->host );
238 $username = wfStrencode( recodeText( $username ) );
239 $timestamp = wfUnix2Timestamp( $rev->ts );
240 $inverse = wfInvertTimestamp( $timestamp );
241 $comment = wfStrencode( recodeText( $rev->text ) );
242
243 if($any) $sql .= ",";
244 $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse',$minor)";
245 $any = true;
246 }
247 $sql .= ";\n\n";
248 return $sql;
249 }
250
251 # Whee!
252 function recodeText( $string ) {
253 # For currently latin-1 wikis
254 $string = str_replace( "\r\n", "\n", $string );
255 # return iconv( "CP1252", "UTF-8", $string );
256 return utf8_encode( $string );
257 }
258
259
260 function wfStrencode( $string ) {
261 return mysql_escape_string( $string );
262 }
263
264 function wfUnix2Timestamp( $unixtime ) {
265 return gmdate( "YmdHis", $unixtime );
266 }
267
268 function wfTimestamp2Unix( $ts )
269 {
270 return gmmktime( ( (int)substr( $ts, 8, 2) ),
271 (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ),
272 (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ),
273 (int)substr( $ts, 0, 4 ) );
274 }
275
276 function wfTimestampNow() {
277 # return NOW
278 return gmdate( "YmdHis" );
279 }
280
281 # Sorting hack for MySQL 3, which doesn't use index sorts for DESC
282 function wfInvertTimestamp( $ts ) {
283 return strtr(
284 $ts,
285 "0123456789",
286 "9876543210"
287 );
288 }
289
290 function wfSeedRandom()
291 {
292 $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff;
293 mt_srand( $seed );
294 $wgRandomSeeded = true;
295 }
296
297 function array2object( $arr ) {
298 $o = (object)0;
299 foreach( $arr as $x => $y ) {
300 $o->$x = $y;
301 }
302 return $o;
303 }
304
305 ?>