* Added filter options, compression piping, and multiple output streams for
[lhc/web/wiklou.git] / maintenance / dumpBackup.php
1 <?php
2 /**
3 * Copyright (C) 2005 Brion Vibber <brion@pobox.com>
4 * http://www.mediawiki.org/
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @package MediaWiki
22 * @subpackage SpecialPage
23 */
24
25 $originalDir = getcwd();
26
27 $optionsWithArgs = array( 'server', 'pagelist', 'start', 'end' );
28
29 require_once( 'commandLine.inc' );
30 require_once( 'SpecialExport.php' );
31
32 class BackupDumper {
33 var $reportingInterval = 100;
34 var $reporting = true;
35 var $pageCount = 0;
36 var $revCount = 0;
37 var $server = null; // use default
38 var $pages = null; // all pages
39 var $skipHeader = false; // don't output <mediawiki> and <siteinfo>
40 var $skipFooter = false; // don't output </mediawiki>
41 var $startId = 0;
42 var $endId = 0;
43 var $sink = null; // Output filters
44
45 function BackupDumper( $args ) {
46 $this->stderr = fopen( "php://stderr", "wt" );
47 $this->sink = $this->processArgs( $args );
48 }
49
50 /**
51 * @param array $args
52 * @return array
53 * @static
54 */
55 function processArgs( $args ) {
56 $outputTypes = array(
57 'file' => 'DumpFileOutput',
58 'gzip' => 'DumpGZipOutput',
59 'bzip2' => 'DumpBZip2Output',
60 '7zip' => 'Dump7ZipOutput' );
61 $filterTypes = array(
62 'latest' => 'DumpLatestFilter',
63 'notalk' => 'DumpNotalkFilter',
64 'namespace' => 'DumpNamespaceFilter' );
65 $sink = null;
66 $sinks = array();
67 foreach( $args as $arg ) {
68 if( preg_match( '/^--(.+?)(?:=(.+?)(?::(.+?))?)?$/', $arg, $matches ) ) {
69 @list( $full, $opt, $val, $param ) = $matches;
70 switch( $opt ) {
71 case "output":
72 if( !is_null( $sink ) ) {
73 $sinks[] = $sink;
74 }
75 if( !isset( $outputTypes[$val] ) ) {
76 die( "Unrecognized output sink type '$val'\n" );
77 }
78 $type = $outputTypes[$val];
79 $sink = new $type( $param );
80 break;
81 case "filter":
82 if( is_null( $sink ) ) {
83 $this->progress( "Warning: assuming stdout for filter output\n" );
84 $sink = new DumpOutput();
85 }
86 if( !isset( $filterTypes[$val] ) ) {
87 die( "Unrecognized filter type '$val'\n" );
88 }
89 $type = $filterTypes[$val];
90 $filter = new $type( $sink, $param );
91
92 // references are lame in php...
93 unset( $sink );
94 $sink = $filter;
95
96 break;
97 default:
98 //die( "Unrecognized dump option'$opt'\n" );
99 }
100 }
101 }
102
103 if( is_null( $sink ) ) {
104 $sink = new DumpOutput();
105 }
106 $sinks[] = $sink;
107
108 if( count( $sinks ) > 1 ) {
109 return new DumpMultiWriter( $sinks );
110 } else {
111 return $sink;
112 }
113 }
114
115 function dump( $history ) {
116 # This shouldn't happen if on console... ;)
117 header( 'Content-type: text/html; charset=UTF-8' );
118
119 # Notice messages will foul up your XML output even if they're
120 # relatively harmless.
121 ini_set( 'display_errors', false );
122
123 $this->startTime = wfTime();
124
125 $dbr =& wfGetDB( DB_SLAVE );
126 $this->maxCount = $dbr->selectField( 'page', 'MAX(page_id)', '', 'BackupDumper::dump' );
127 $this->startTime = wfTime();
128
129 $db =& $this->backupDb();
130 $exporter = new WikiExporter( $db, $history, MW_EXPORT_STREAM );
131
132 $wrapper = new ExportProgressFilter( $this->sink, $this );
133 $exporter->setOutputSink( $wrapper );
134
135 if( !$this->skipHeader )
136 $exporter->openStream();
137
138 if( is_null( $this->pages ) ) {
139 if( $this->startId || $this->endId ) {
140 $exporter->pagesByRange( $this->startId, $this->endId );
141 } else {
142 $exporter->allPages();
143 }
144 } else {
145 $exporter->pagesByName( $this->pages );
146 }
147
148 if( !$this->skipFooter )
149 $exporter->closeStream();
150
151 $this->report( true );
152 }
153
154 function &backupDb() {
155 global $wgDBadminuser, $wgDBadminpassword;
156 global $wgDBname;
157 $db =& new Database( $this->backupServer(), $wgDBadminuser, $wgDBadminpassword, $wgDBname );
158 $timeout = 3600 * 24;
159 $db->query( "SET net_read_timeout=$timeout" );
160 $db->query( "SET net_write_timeout=$timeout" );
161 return $db;
162 }
163
164 function backupServer() {
165 global $wgDBserver;
166 return $this->server
167 ? $this->server
168 : $wgDBserver;
169 }
170
171 function reportPage() {
172 $this->pageCount++;
173 $this->report();
174 }
175
176 function revCount() {
177 $this->revCount++;
178 }
179
180 function report( $final = false ) {
181 if( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
182 $this->showReport();
183 }
184 }
185
186 function showReport() {
187 if( $this->reporting ) {
188 $delta = wfTime() - $this->startTime;
189 $now = wfTimestamp( TS_DB );
190 if( $delta ) {
191 $rate = $this->pageCount / $delta;
192 $revrate = $this->revCount / $delta;
193 $portion = $this->pageCount / $this->maxCount;
194 $eta = $this->startTime + $delta / $portion;
195 $etats = wfTimestamp( TS_DB, intval( $eta ) );
196 } else {
197 $rate = '-';
198 $revrate = '-';
199 $etats = '-';
200 }
201 global $wgDBname;
202 $this->progress( "$now: $wgDBname $this->pageCount, ETA $etats ($rate pages/sec $revrate revs/sec)" );
203 }
204 }
205
206 function progress( $string ) {
207 fwrite( $this->stderr, $string . "\n" );
208 }
209 }
210
211 class ExportProgressFilter extends DumpFilter {
212 function ExportProgressFilter( &$sink, &$progress ) {
213 parent::DumpFilter( $sink );
214 $this->progress = $progress;
215 }
216
217 function writeClosePage( $string ) {
218 parent::writeClosePage( $string );
219 $this->progress->reportPage();
220 }
221
222 function writeRevision( $rev, $string ) {
223 parent::writeRevision( $rev, $string );
224 $this->progress->revCount();
225 }
226 }
227
228 $dumper = new BackupDumper( $argv );
229
230 if( isset( $options['quiet'] ) ) {
231 $dumper->reporting = false;
232 }
233 if( isset( $options['report'] ) ) {
234 $dumper->reportingInterval = intval( $options['report'] );
235 }
236 if( isset( $options['server'] ) ) {
237 $dumper->server = $options['server'];
238 }
239
240 if ( isset( $options['pagelist'] ) ) {
241 $olddir = getcwd();
242 chdir( $originalDir );
243 $pages = file( $options['pagelist'] );
244 chdir( $olddir );
245 if ( $pages === false ) {
246 print "Unable to open file {$options['pagelist']}\n";
247 exit;
248 }
249 $pages = array_map( 'trim', $pages );
250 $dumper->pages = array_filter( $pages, create_function( '$x', 'return $x !== "";' ) );
251 }
252
253 if( isset( $options['start'] ) ) {
254 $dumper->startId = intval( $options['start'] );
255 }
256 if( isset( $options['end'] ) ) {
257 $dumper->endId = intval( $options['end'] );
258 }
259 $dumper->skipHeader = isset( $options['skip-header'] );
260 $dumper->skipFooter = isset( $options['skip-footer'] );
261
262 if( isset( $options['full'] ) ) {
263 $dumper->dump( MW_EXPORT_FULL );
264 } elseif( isset( $options['current'] ) ) {
265 $dumper->dump( MW_EXPORT_CURRENT );
266 } else {
267 $dumper->progress( <<<END
268 This script dumps the wiki page database into an XML interchange wrapper
269 format for export or backup.
270
271 XML output is sent to stdout; progress reports are sent to stderr.
272
273 Usage: php dumpBackup.php <action> [<options>]
274 Actions:
275 --full Dump complete history of every page.
276 --current Includes only the latest revision of each page.
277
278 Options:
279 --quiet Don't dump status reports to stderr.
280 --report=n Report position and speed after every n pages processed.
281 (Default: 100)
282 --server=h Force reading from MySQL server h
283 --start=n Start from page_id n
284 --end=n Stop before page_id n (exclusive)
285 --skip-header Don't output the <mediawiki> header
286 --skip-footer Don't output the </mediawiki> footer
287 END
288 );
289 }
290
291 ?>