Spam regex feature
[lhc/web/wiklou.git] / includes / SpecialImport.php
1 <?php
2 # Copyright (C) 2003 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 function wfSpecialImport( $page = "" ) {
21 global $wgOut, $wgLang, $wgRequest, $wgTitle;
22 global $wgImportSources;
23
24 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
25 $importer = new WikiImporter();
26
27 switch( $wgRequest->getVal( "source" ) ) {
28 case "upload":
29 $ok = $importer->setupFromUpload( "xmlimport" );
30 break;
31 case "interwiki":
32 $ok = $importer->setupFromInterwiki(
33 $wgRequest->getVal( "interwiki" ),
34 $wgRequest->getText( "frompage" ) );
35 break;
36 default:
37 $ok = false;
38 }
39
40 if( $ok ) {
41 $importer->setRevisionHandler( "wfImportOldRevision" );
42 if( $importer->doImport() ) {
43 # Success!
44 $wgOut->addHTML( "<p>" . wfMsg( "importsuccess" ) . "</p>" );
45 } else {
46 $wgOut->addHTML( "<p>" . wfMsg( "importfailed",
47 htmlspecialchars( $importer->getError() ) ) . "</p>" );
48 }
49 } else {
50 $wgOut->addWikiText( htmlspecialchars( $importer->getError() ) );
51 }
52 }
53
54 $wgOut->addWikiText( "<p>" . wfMsg( "importtext" ) . "</p>" );
55 $action = $wgTitle->escapeLocalUrl();
56 $wgOut->addHTML( "
57 <fieldset>
58 <legend>Upload XML</legend>
59 <form enctype='multipart/form-data' method='post' action=\"$action\">
60 <input type='hidden' name='action' value='submit' />
61 <input type='hidden' name='source' value='upload' />
62 <input type='hidden' name='MAX_FILE_SIZE' value='200000' />
63 <input type='file' name='xmlimport' value='' size='30' />
64 <input type='submit' value='" . htmlspecialchars( wfMsg( "uploadbtn" ) ) . "'/>
65 </form>
66 </fieldset>
67 " );
68
69 if( !empty( $wgImportSources ) ) {
70 $wgOut->addHTML( "
71 <fieldset>
72 <legend>Interwiki import</legend>
73 <form method='post' action=\"$action\">
74 <input type='hidden' name='action' value='submit' />
75 <input type='hidden' name='source' value='interwiki' />
76 <select name='interwiki'>
77 " );
78 foreach( $wgImportSources as $interwiki ) {
79 $iw = htmlspecialchars( $interwiki );
80 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
81 }
82 $wgOut->addHTML( "
83 </select>
84 <input name='frompage' />
85 <input type='submit' />
86 </form>
87 </fieldset>
88 " );
89 }
90 }
91
92 function wfImportOldRevision( &$revision ) {
93 global $wgOut;
94 $fname = "wfImportOldRevision";
95
96 # Sneak a single revision into place
97 $ns = IntVal( $revision->title->getNamespace() );
98 $t = wfStrencode( $revision->title->getDBkey() );
99 $text = wfStrencode( $revision->getText() );
100 $ts = wfStrencode( $revision->timestamp );
101 $its = wfStrencode( wfInvertTimestamp( $revision->timestamp ) ) ;
102 $comment = wfStrencode( $revision->getComment() );
103
104 $user = User::newFromName( $revision->getUser() );
105 $user_id = IntVal( $user->getId() );
106 $user_text = wfStrencode( $user->getName() );
107
108 $minor = 0; # ??
109 $flags = "";
110
111 # Make sure it doesn't already exist
112 $sql = "SELECT 1 FROM old WHERE old_namespace=$ns AND old_title='$t' AND old_timestamp='$ts'";
113 $res = wfQuery( $sql, DB_WRITE, $fname );
114 $numrows = wfNumRows( $res );
115 wfFreeResult( $res );
116 if( $numrows > 0 ) {
117 return wfMsg( "importhistoryconflict" );
118 }
119
120 $res = wfQuery( "INSERT INTO old " .
121 "(old_namespace,old_title,old_text,old_comment,old_user,old_user_text," .
122 "old_timestamp,inverse_timestamp,old_minor_edit,old_flags) " .
123 "VALUES ($ns,'$t','$text','$comment',$user_id,'$user_text','$ts','$its',$minor,'$flags')",
124 DB_WRITE, $fname );
125
126 return wfMsg( "ok" );
127 }
128
129 class WikiRevision {
130 var $title = NULL;
131 var $timestamp = "20010115000000";
132 var $user = 0;
133 var $user_text = "";
134 var $text = "";
135 var $comment = "";
136
137 function setTitle( $text ) {
138 $text = $this->fixEncoding( $text );
139 $this->title = Title::newFromText( $text );
140 }
141
142 function setTimestamp( $ts ) {
143 # 2003-08-05T18:30:02Z
144 $this->timestamp = preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
145 }
146
147 function setUsername( $user ) {
148 $this->user_text = $this->fixEncoding( $user );
149 }
150
151 function setUserIP( $ip ) {
152 $this->user_text = $this->fixEncoding( $ip );
153 }
154
155 function setText( $text ) {
156 $this->text = $this->fixEncoding( $text );
157 }
158
159 function setComment( $text ) {
160 $this->comment = $this->fixEncoding( $text );
161 }
162
163 function fixEncoding( $data ) {
164 global $wgLang, $wgInputEncoding;
165
166 if( strcasecmp( $wgInputEncoding, "utf-8" ) == 0 ) {
167 return $data;
168 } else {
169 return $wgLang->iconv( "utf-8", $wgInputEncoding, $data );
170 }
171 }
172
173 function getTitle() {
174 return $this->title;
175 }
176
177 function getTimestamp() {
178 return $this->timestamp;
179 }
180
181 function getUser() {
182 return $this->user_text;
183 }
184
185 function getText() {
186 return $this->text;
187 }
188
189 function getComment() {
190 return $this->comment;
191 }
192 }
193
194 class WikiImporter {
195 var $mSource = NULL;
196 var $mError = "";
197 var $mXmlError = XML_ERROR_NONE;
198 var $mRevisionHandler = NULL;
199 var $lastfield;
200
201 function WikiImporter() {
202 $this->setRevisionHandler( array( &$this, "defaultRevisionHandler" ) );
203 }
204
205 function setError( $err ) {
206 $this->mError = $err;
207 return false;
208 }
209
210 function getError() {
211 if( $this->mXmlError == XML_ERROR_NONE ) {
212 return $this->mError;
213 } else {
214 return xml_error_string( $this->mXmlError );
215 }
216 }
217
218 function throwXmlError( $err ) {
219 $this->debug( "FAILURE: $err" );
220 }
221
222 function setupFromFile( $filename ) {
223 $this->mSource = file_get_contents( $filename );
224 return true;
225 }
226
227 function setupFromUpload( $fieldname = "xmlimport" ) {
228 global $wgOut;
229
230 $upload =& $_FILES[$fieldname];
231
232 if( !isset( $upload ) ) {
233 return $this->setError( wfMsg( "importnofile" ) );
234 }
235 if( !empty( $upload['error'] ) ) {
236 return $this->setError( wfMsg( "importuploaderror", $upload['error'] ) );
237 }
238 $fname = $upload['tmp_name'];
239 if( is_uploaded_file( $fname ) ) {
240 return $this->setupFromFile( $fname );
241 } else {
242 return $this->setError( wfMsg( "importnofile" ) );
243 }
244 }
245
246 function setupFromURL( $url ) {
247 # fopen-wrappers are normally turned off for security.
248 ini_set( "allow_url_fopen", true );
249 $ret = $this->setupFromFile( $url );
250 ini_set( "allow_url_fopen", false );
251 return $ret;
252 }
253
254 function setupFromInterwiki( $interwiki, $page ) {
255 $base = Title::getInterwikiLink( $interwiki );
256 if( empty( $base ) ) {
257 return false;
258 } else {
259 $import = wfUrlencode( "Special:Export/$page" );
260 $url = str_replace( "$1", $import, $base );
261 $this->notice( "Importing from $url" );
262 return $this->setupFromURL( $url );
263 }
264 }
265
266 # --------------
267
268 function doImport() {
269 if( empty( $this->mSource ) ) {
270 return $this->setError( wfMsg( "importnotext" ) );
271 }
272
273 $parser = xml_parser_create( "UTF-8" );
274
275 # case folding violates XML standard, turn it off
276 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
277
278 xml_set_object( $parser, &$this );
279 xml_set_element_handler( $parser, "in_start", "" );
280
281 if( !xml_parse( $parser, $this->mSource, true ) ) {
282 # return error message
283 $this->mXmlError = xml_get_error_code( $parser );
284 xml_parser_free( $parser );
285 return false;
286 }
287 xml_parser_free( $parser );
288
289 return true;
290 }
291
292 function debug( $data ) {
293 global $wgOut;
294 # $this->notice( "DEBUG: $data\n" );
295 }
296
297 function notice( $data ) {
298 global $wgCommandLineMode;
299 if( $wgCommandLineMode ) {
300 print "$data\n";
301 } else {
302 global $wgOut;
303 $wgOut->addHTML( "<li>$data</li>\n" );
304 }
305 }
306
307 function setRevisionHandler( $functionref ) {
308 $this->mRevisionHandler = $functionref;
309 }
310
311 function defaultRevisionHandler( &$revision ) {
312 $this->debug( "Got revision:" );
313 if( is_object( $revision->title ) ) {
314 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
315 } else {
316 $this->debug( "-- Title: <invalid>" );
317 }
318 $this->debug( "-- User: " . $revision->user_text );
319 $this->debug( "-- Timestamp: " . $revision->timestamp );
320 $this->debug( "-- Comment: " . $revision->comment );
321 $this->debug( "-- Text: " . $revision->text );
322 }
323
324
325
326 # XML parser callbacks from here out -- beware!
327 function donothing( $parser, $x, $y="" ) {
328 #$this->debug( "donothing" );
329 }
330
331 function in_start( $parser, $name, $attribs ) {
332 $this->debug( "in_start $name" );
333 if( $name != "mediawiki" ) {
334 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
335 }
336 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
337 }
338
339 function in_mediawiki( $parser, $name, $attribs ) {
340 $this->debug( "in_mediawiki $name" );
341 if( $name != "page" ) {
342 return $this->throwXMLerror( "Expected <page>, got <$name>" );
343 }
344 xml_set_element_handler( $parser, "in_page", "out_page" );
345 }
346 function out_mediawiki( $parser, $name ) {
347 $this->debug( "out_mediawiki $name" );
348 if( $name != "mediawiki" ) {
349 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
350 }
351 xml_set_element_handler( $parser, "donothing", "donothing" );
352 }
353
354 function in_page( $parser, $name, $attribs ) {
355 $this->debug( "in_page $name" );
356 switch( $name ) {
357 case "id":
358 case "title":
359 case "restrictions":
360 $this->appendfield = $name;
361 $this->appenddata = "";
362 $this->parenttag = "page";
363 xml_set_element_handler( $parser, "in_nothing", "out_append" );
364 xml_set_character_data_handler( $parser, "char_append" );
365 break;
366 case "revision":
367 $this->workRevision = new WikiRevision;
368 $this->workRevision->setTitle( $this->workTitle );
369 xml_set_element_handler( $parser, "in_revision", "out_revision" );
370 break;
371 default:
372 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
373 }
374 }
375
376 function out_page( $parser, $name ) {
377 $this->debug( "out_page $name" );
378 if( $name != "page" ) {
379 return $this->throwXMLerror( "Expected </page>, got </$name>" );
380 }
381 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
382
383 $this->workTitle = NULL;
384 $this->workRevision = NULL;
385 }
386
387 function in_nothing( $parser, $name, $attribs ) {
388 $this->debug( "in_nothing $name" );
389 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
390 }
391 function char_append( $parser, $data ) {
392 $this->debug( "char_append '$data'" );
393 $this->appenddata .= $data;
394 }
395 function out_append( $parser, $name ) {
396 $this->debug( "out_append $name" );
397 if( $name != $this->appendfield ) {
398 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
399 }
400 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
401 xml_set_character_data_handler( $parser, "donothing" );
402 switch( $this->appendfield ) {
403 case "title":
404 $this->workTitle = $this->appenddata;
405 break;
406 case "text":
407 $this->workRevision->setText( $this->appenddata );
408 break;
409 case "username":
410 $this->workRevision->setUsername( $this->appenddata );
411 break;
412 case "ip":
413 $this->workRevision->setUserIP( $this->appenddata );
414 break;
415 case "timestamp":
416 $this->workRevision->setTimestamp( $this->appenddata );
417 break;
418 case "comment":
419 $this->workRevision->setComment( $this->appenddata );
420 break;
421 default;
422 $this->debug( "Bad append: {$this->appendfield}" );
423 }
424 $this->appendfield = "";
425 $this->appenddata = "";
426 }
427
428 function in_revision( $parser, $name, $attribs ) {
429 $this->debug( "in_revision $name" );
430 switch( $name ) {
431 case "id":
432 case "timestamp":
433 case "comment":
434 case "text":
435 $this->parenttag = "revision";
436 $this->appendfield = $name;
437 xml_set_element_handler( $parser, "in_nothing", "out_append" );
438 xml_set_character_data_handler( $parser, "char_append" );
439 break;
440 case "contributor":
441 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
442 break;
443 default:
444 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
445 }
446 }
447
448 function out_revision( $parser, $name ) {
449 $this->debug( "out_revision $name" );
450 if( $name != "revision" ) {
451 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
452 }
453 xml_set_element_handler( $parser, "in_page", "out_page" );
454
455 $out = call_user_func( $this->mRevisionHandler, &$this->workRevision, &$this );
456 if( !empty( $out ) ) {
457 global $wgOut;
458 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
459 }
460 }
461
462 function in_contributor( $parser, $name, $attribs ) {
463 $this->debug( "in_contributor $name" );
464 switch( $name ) {
465 case "username":
466 case "ip":
467 $this->parenttag = "contributor";
468 $this->appendfield = $name;
469 xml_set_element_handler( $parser, "in_nothing", "out_append" );
470 xml_set_character_data_handler( $parser, "char_append" );
471 break;
472 default:
473 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
474 }
475 }
476
477 function out_contributor( $parser, $name ) {
478 $this->debug( "out_contributor $name" );
479 if( $name != "contributor" ) {
480 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
481 }
482 xml_set_element_handler( $parser, "in_revision", "out_revision" );
483 }
484 }
485
486
487 ?>