Merge "Use HTMLForm for Special:Export"
[lhc/web/wiklou.git] / includes / specials / SpecialExport.php
1 <?php
2 /**
3 * Implements Special:Export
4 *
5 * Copyright © 2003-2008 Brion Vibber <brion@pobox.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @file
23 * @ingroup SpecialPage
24 */
25
26 /**
27 * A special page that allows users to export pages in a XML file
28 *
29 * @ingroup SpecialPage
30 */
31 class SpecialExport extends SpecialPage {
32 private $curonly, $doExport, $pageLinkDepth, $templates;
33 private $images;
34
35 public function __construct() {
36 parent::__construct( 'Export' );
37 }
38
39 public function execute( $par ) {
40 $this->setHeaders();
41 $this->outputHeader();
42 $config = $this->getConfig();
43
44 // Set some variables
45 $this->curonly = true;
46 $this->doExport = false;
47 $request = $this->getRequest();
48 $this->templates = $request->getCheck( 'templates' );
49 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
50 $this->pageLinkDepth = $this->validateLinkDepth(
51 $request->getIntOrNull( 'pagelink-depth' )
52 );
53 $nsindex = '';
54 $exportall = false;
55
56 if ( $request->getCheck( 'addcat' ) ) {
57 $page = $request->getText( 'pages' );
58 $catname = $request->getText( 'catname' );
59
60 if ( $catname !== '' && $catname !== null && $catname !== false ) {
61 $t = Title::makeTitleSafe( NS_MAIN, $catname );
62 if ( $t ) {
63 /**
64 * @todo FIXME: This can lead to hitting memory limit for very large
65 * categories. Ideally we would do the lookup synchronously
66 * during the export in a single query.
67 */
68 $catpages = $this->getPagesFromCategory( $t );
69 if ( $catpages ) {
70 $page .= "\n" . implode( "\n", $catpages );
71 }
72 }
73 }
74 } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
75 $page = $request->getText( 'pages' );
76 $nsindex = $request->getText( 'nsindex', '' );
77
78 if ( strval( $nsindex ) !== '' ) {
79 /**
80 * Same implementation as above, so same @todo
81 */
82 $nspages = $this->getPagesFromNamespace( $nsindex );
83 if ( $nspages ) {
84 $page .= "\n" . implode( "\n", $nspages );
85 }
86 }
87 } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
88 $this->doExport = true;
89 $exportall = true;
90
91 /* Although $page and $history are not used later on, we
92 nevertheless set them to avoid that PHP notices about using
93 undefined variables foul up our XML output (see call to
94 doExport(...) further down) */
95 $page = '';
96 $history = '';
97 } elseif ( $request->wasPosted() && $par == '' ) {
98 $page = $request->getText( 'pages' );
99 $this->curonly = $request->getCheck( 'curonly' );
100 $rawOffset = $request->getVal( 'offset' );
101
102 if ( $rawOffset ) {
103 $offset = wfTimestamp( TS_MW, $rawOffset );
104 } else {
105 $offset = null;
106 }
107
108 $maxHistory = $config->get( 'ExportMaxHistory' );
109 $limit = $request->getInt( 'limit' );
110 $dir = $request->getVal( 'dir' );
111 $history = array(
112 'dir' => 'asc',
113 'offset' => false,
114 'limit' => $maxHistory,
115 );
116 $historyCheck = $request->getCheck( 'history' );
117
118 if ( $this->curonly ) {
119 $history = WikiExporter::CURRENT;
120 } elseif ( !$historyCheck ) {
121 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
122 $history['limit'] = $limit;
123 }
124
125 if ( !is_null( $offset ) ) {
126 $history['offset'] = $offset;
127 }
128
129 if ( strtolower( $dir ) == 'desc' ) {
130 $history['dir'] = 'desc';
131 }
132 }
133
134 if ( $page != '' ) {
135 $this->doExport = true;
136 }
137 } else {
138 // Default to current-only for GET requests.
139 $page = $request->getText( 'pages', $par );
140 $historyCheck = $request->getCheck( 'history' );
141
142 if ( $historyCheck ) {
143 $history = WikiExporter::FULL;
144 } else {
145 $history = WikiExporter::CURRENT;
146 }
147
148 if ( $page != '' ) {
149 $this->doExport = true;
150 }
151 }
152
153 if ( !$config->get( 'ExportAllowHistory' ) ) {
154 // Override
155 $history = WikiExporter::CURRENT;
156 }
157
158 $list_authors = $request->getCheck( 'listauthors' );
159 if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
160 $list_authors = false;
161 }
162
163 if ( $this->doExport ) {
164 $this->getOutput()->disable();
165
166 // Cancel output buffering and gzipping if set
167 // This should provide safer streaming for pages with history
168 wfResetOutputBuffers();
169 $request->response()->header( "Content-type: application/xml; charset=utf-8" );
170
171 if ( $request->getCheck( 'wpDownload' ) ) {
172 // Provide a sane filename suggestion
173 $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
174 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
175 }
176
177 $this->doExport( $page, $history, $list_authors, $exportall );
178
179 return;
180 }
181
182 $out = $this->getOutput();
183 $out->addWikiMsg( 'exporttext' );
184
185 if ( $page == '' ) {
186 $categoryName = $request->getText( 'catname' );
187 } else {
188 $categoryName = '';
189 }
190
191 $formDescriptor = array(
192 'catname' => array(
193 'type' => 'textwithbutton',
194 'name' => 'catname',
195 'horizontal-label' => true,
196 'label-message' => 'export-addcattext',
197 'default' => $categoryName,
198 'size' => 40,
199 'buttontype' => 'submit',
200 'buttonname' => 'addcat',
201 'buttondefault' => $this->msg( 'export-addcat' )->text(),
202 ),
203 );
204 if ( $config->get( 'ExportFromNamespaces' ) ) {
205 $formDescriptor += array(
206 'nsindex' => array(
207 'type' => 'namespaceselectwithbutton',
208 'default' => $nsindex,
209 'label-message' => 'export-addnstext',
210 'horizontal-label' => true,
211 'name' => 'nsindex',
212 'id' => 'namespace',
213 'cssclass' => 'namespaceselector',
214 'buttontype' => 'submit',
215 'buttonname' => 'addns',
216 'buttondefault' => $this->msg( 'export-addns' )->text(),
217 ),
218 );
219 }
220
221 if ( $config->get( 'ExportAllowAll' ) ) {
222 $formDescriptor += array(
223 'exportall' => array(
224 'type' => 'check',
225 'label-message' => 'exportall',
226 'name' => 'exportall',
227 'id' => 'exportall',
228 'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
229 ),
230 );
231 }
232
233 $formDescriptor += array(
234 'textarea' => array(
235 'class' => 'HTMLTextAreaField',
236 'name' => 'pages',
237 'nodata' => true,
238 'cols' => 40,
239 'rows' => 10,
240 'default' => $page,
241 ),
242 );
243
244 if ( $config->get( 'ExportAllowHistory' ) ) {
245 $formDescriptor += array(
246 'curonly' => array(
247 'type' => 'check',
248 'label-message' => 'exportcuronly',
249 'name' => 'curonly',
250 'id' => 'curonly',
251 'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
252 ),
253 );
254 } else {
255 $out->addWikiMsg( 'exportnohistory' );
256 }
257
258 $formDescriptor += array(
259 'templates' => array(
260 'type' => 'check',
261 'label-message' => 'export-templates',
262 'name' => 'templates',
263 'id' => 'wpExportTemplates',
264 'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
265 ),
266 );
267
268 if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
269 $formDescriptor += array(
270 'pagelink-depth' => array(
271 'type' => 'text',
272 'name' => 'pagelink-depth',
273 'id' => 'pagelink-depth',
274 'label-message' => 'export-pagelinks',
275 'default' => '0',
276 'size' => 20,
277 ),
278 );
279 }
280
281 $formDescriptor += array(
282 /* Enable this when we can do something useful exporting/importing image information.
283 'images' => array(
284 'type' => 'check',
285 'name' => 'images',
286 'id' => 'wpExportImages',
287 'default' => false,
288 ),*/
289 'wpDownload' => array(
290 'type' => 'check',
291 'name' =>'wpDownload',
292 'id' => 'wpDownload',
293 'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
294 'label-message' => 'export-download',
295 ),
296 );
297
298 if ( $config->get( 'ExportAllowListContributors' ) ) {
299 $formDescriptor += array(
300 'listauthors' => array(
301 'type' => 'check',
302 'label-message' => 'exportlistauthors',
303 'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
304 'name' => 'listauthors',
305 'id' => 'listauthors',
306 ),
307 );
308 }
309
310 $htmlForm = HTMLForm::factory( 'div', $formDescriptor, $this->getContext() );
311 $htmlForm->setSubmitTextMsg( 'export-submit' );
312 $htmlForm->prepareForm()->displayForm( false );
313 $this->addHelpLink( 'Help:Export' );
314 }
315
316 /**
317 * @return bool
318 */
319 private function userCanOverrideExportDepth() {
320 return $this->getUser()->isAllowed( 'override-export-depth' );
321 }
322
323 /**
324 * Do the actual page exporting
325 *
326 * @param string $page User input on what page(s) to export
327 * @param int $history One of the WikiExporter history export constants
328 * @param bool $list_authors Whether to add distinct author list (when
329 * not returning full history)
330 * @param bool $exportall Whether to export everything
331 */
332 private function doExport( $page, $history, $list_authors, $exportall ) {
333
334 // If we are grabbing everything, enable full history and ignore the rest
335 if ( $exportall ) {
336 $history = WikiExporter::FULL;
337 } else {
338
339 $pageSet = array(); // Inverted index of all pages to look up
340
341 // Split up and normalize input
342 foreach ( explode( "\n", $page ) as $pageName ) {
343 $pageName = trim( $pageName );
344 $title = Title::newFromText( $pageName );
345 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
346 // Only record each page once!
347 $pageSet[$title->getPrefixedText()] = true;
348 }
349 }
350
351 // Set of original pages to pass on to further manipulation...
352 $inputPages = array_keys( $pageSet );
353
354 // Look up any linked pages if asked...
355 if ( $this->templates ) {
356 $pageSet = $this->getTemplates( $inputPages, $pageSet );
357 }
358 $linkDepth = $this->pageLinkDepth;
359 if ( $linkDepth ) {
360 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
361 }
362
363 // Enable this when we can do something useful exporting/importing image information.
364 // if( $this->images ) ) {
365 // $pageSet = $this->getImages( $inputPages, $pageSet );
366 // }
367
368 $pages = array_keys( $pageSet );
369
370 // Normalize titles to the same format and remove dupes, see bug 17374
371 foreach ( $pages as $k => $v ) {
372 $pages[$k] = str_replace( " ", "_", $v );
373 }
374
375 $pages = array_unique( $pages );
376 }
377
378 /* Ok, let's get to it... */
379 if ( $history == WikiExporter::CURRENT ) {
380 $lb = false;
381 $db = wfGetDB( DB_SLAVE );
382 $buffer = WikiExporter::BUFFER;
383 } else {
384 // Use an unbuffered query; histories may be very long!
385 $lb = wfGetLBFactory()->newMainLB();
386 $db = $lb->getConnection( DB_SLAVE );
387 $buffer = WikiExporter::STREAM;
388
389 // This might take a while... :D
390 MediaWiki\suppressWarnings();
391 set_time_limit( 0 );
392 MediaWiki\restoreWarnings();
393 }
394
395 $exporter = new WikiExporter( $db, $history, $buffer );
396 $exporter->list_authors = $list_authors;
397 $exporter->openStream();
398
399 if ( $exportall ) {
400 $exporter->allPages();
401 } else {
402 foreach ( $pages as $page ) {
403 #Bug 8824: Only export pages the user can read
404 $title = Title::newFromText( $page );
405 if ( is_null( $title ) ) {
406 // @todo Perhaps output an <error> tag or something.
407 continue;
408 }
409
410 if ( !$title->userCan( 'read', $this->getUser() ) ) {
411 // @todo Perhaps output an <error> tag or something.
412 continue;
413 }
414
415 $exporter->pageByTitle( $title );
416 }
417 }
418
419 $exporter->closeStream();
420
421 if ( $lb ) {
422 $lb->closeAll();
423 }
424 }
425
426 /**
427 * @param Title $title
428 * @return array
429 */
430 private function getPagesFromCategory( $title ) {
431 global $wgContLang;
432
433 $name = $title->getDBkey();
434
435 $dbr = wfGetDB( DB_SLAVE );
436 $res = $dbr->select(
437 array( 'page', 'categorylinks' ),
438 array( 'page_namespace', 'page_title' ),
439 array( 'cl_from=page_id', 'cl_to' => $name ),
440 __METHOD__,
441 array( 'LIMIT' => '5000' )
442 );
443
444 $pages = array();
445
446 foreach ( $res as $row ) {
447 $n = $row->page_title;
448 if ( $row->page_namespace ) {
449 $ns = $wgContLang->getNsText( $row->page_namespace );
450 $n = $ns . ':' . $n;
451 }
452
453 $pages[] = $n;
454 }
455
456 return $pages;
457 }
458
459 /**
460 * @param int $nsindex
461 * @return array
462 */
463 private function getPagesFromNamespace( $nsindex ) {
464 global $wgContLang;
465
466 $dbr = wfGetDB( DB_SLAVE );
467 $res = $dbr->select(
468 'page',
469 array( 'page_namespace', 'page_title' ),
470 array( 'page_namespace' => $nsindex ),
471 __METHOD__,
472 array( 'LIMIT' => '5000' )
473 );
474
475 $pages = array();
476
477 foreach ( $res as $row ) {
478 $n = $row->page_title;
479
480 if ( $row->page_namespace ) {
481 $ns = $wgContLang->getNsText( $row->page_namespace );
482 $n = $ns . ':' . $n;
483 }
484
485 $pages[] = $n;
486 }
487
488 return $pages;
489 }
490
491 /**
492 * Expand a list of pages to include templates used in those pages.
493 * @param array $inputPages List of titles to look up
494 * @param array $pageSet Associative array indexed by titles for output
495 * @return array Associative array index by titles
496 */
497 private function getTemplates( $inputPages, $pageSet ) {
498 return $this->getLinks( $inputPages, $pageSet,
499 'templatelinks',
500 array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ),
501 array( 'page_id=tl_from' )
502 );
503 }
504
505 /**
506 * Validate link depth setting, if available.
507 * @param int $depth
508 * @return int
509 */
510 private function validateLinkDepth( $depth ) {
511 if ( $depth < 0 ) {
512 return 0;
513 }
514
515 if ( !$this->userCanOverrideExportDepth() ) {
516 $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
517 if ( $depth > $maxLinkDepth ) {
518 return $maxLinkDepth;
519 }
520 }
521
522 /*
523 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
524 * crazy-big export from being done by someone setting the depth
525 * number too high. In other words, last resort safety net.
526 */
527
528 return intval( min( $depth, 5 ) );
529 }
530
531 /**
532 * Expand a list of pages to include pages linked to from that page.
533 * @param array $inputPages
534 * @param array $pageSet
535 * @param int $depth
536 * @return array
537 */
538 private function getPageLinks( $inputPages, $pageSet, $depth ) {
539 // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
540 for ( ; $depth > 0; --$depth ) {
541 // @codingStandardsIgnoreEnd
542 $pageSet = $this->getLinks(
543 $inputPages, $pageSet, 'pagelinks',
544 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ),
545 array( 'page_id=pl_from' )
546 );
547 $inputPages = array_keys( $pageSet );
548 }
549
550 return $pageSet;
551 }
552
553 /**
554 * Expand a list of pages to include images used in those pages.
555 *
556 * @param array $inputPages List of titles to look up
557 * @param array $pageSet Associative array indexed by titles for output
558 *
559 * @return array Associative array index by titles
560 */
561 private function getImages( $inputPages, $pageSet ) {
562 return $this->getLinks(
563 $inputPages,
564 $pageSet,
565 'imagelinks',
566 array( 'namespace' => NS_FILE, 'title' => 'il_to' ),
567 array( 'page_id=il_from' )
568 );
569 }
570
571 /**
572 * Expand a list of pages to include items used in those pages.
573 * @param array $inputPages Array of page titles
574 * @param array $pageSet
575 * @param string $table
576 * @param array $fields Array of field names
577 * @param array $join
578 * @return array
579 */
580 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
581 $dbr = wfGetDB( DB_SLAVE );
582
583 foreach ( $inputPages as $page ) {
584 $title = Title::newFromText( $page );
585
586 if ( $title ) {
587 $pageSet[$title->getPrefixedText()] = true;
588 /// @todo FIXME: May or may not be more efficient to batch these
589 /// by namespace when given multiple input pages.
590 $result = $dbr->select(
591 array( 'page', $table ),
592 $fields,
593 array_merge(
594 $join,
595 array(
596 'page_namespace' => $title->getNamespace(),
597 'page_title' => $title->getDBkey()
598 )
599 ),
600 __METHOD__
601 );
602
603 foreach ( $result as $row ) {
604 $template = Title::makeTitle( $row->namespace, $row->title );
605 $pageSet[$template->getPrefixedText()] = true;
606 }
607 }
608 }
609
610 return $pageSet;
611 }
612
613 protected function getGroupName() {
614 return 'pagetools';
615 }
616 }