/** */
require_once( 'Revision.php' );
+require_once( 'ExternalStoreDB.php' );
/** @todo document */
-function compressOldPages( $start = 0 ) {
+function compressOldPages( $start = 0, $extdb = '' ) {
$fname = 'compressOldPages';
$chunksize = 50;
print "Starting from old_id $start...\n";
$dbw =& wfGetDB( DB_MASTER );
- $old = $dbw->tableName( 'old' );
do {
$end = $start + $chunksize;
- $res = $dbw->select( 'old', array( 'old_id','old_flags','old_namespace','old_title','old_text' ),
+ $res = $dbw->select( 'text', array( 'old_id','old_flags','old_namespace','old_title','old_text' ),
"old_id>=$start", $fname, array( 'ORDER BY' => 'old_id', 'LIMIT' => $chunksize, 'FOR UPDATE' ) );
if( $dbw->numRows( $res ) == 0 ) {
break;
$last = $start;
while( $row = $dbw->fetchObject( $res ) ) {
# print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n";
- compressPage( $row );
+ compressPage( $row, $extdb );
$last = $row->old_id;
}
$dbw->freeResult( $res );
}
/** @todo document */
-function compressPage( $row ) {
+function compressPage( $row, $extdb ) {
$fname = 'compressPage';
- if( false !== strpos( $row->old_flags, "gzip" ) ) {
- print "Already compressed row {$row->old_id}?\n";
+ if ( false !== strpos( $row->old_flags, 'gzip' ) || false !== strpos( $row->old_flags, 'object' ) ) {
+ #print "Already compressed row {$row->old_id}\n";
return false;
}
$dbw =& wfGetDB( DB_MASTER );
$flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip";
$compress = gzdeflate( $row->old_text );
- $dbw->update( 'old',
+
+ # Store in external storage if required
+ if ( $extdb !== '' ) {
+ $storeObj = new ExternalStoreDB;
+ $compress = $storeObj->store( $extdb, $compress );
+ if ( $compress === false ) {
+ print "Unable to store object\n";
+ return false;
+ }
+ }
+
+ # Update text row
+ $dbw->update( 'text',
array( /* SET */
'old_flags' => $flags,
'old_text' => $compress
$maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', $fname );
$pageConds = array();
+ /*
if ( $exclude_ns0 ) {
print "Excluding main namespace\n";
$pageConds[] = 'page_namespace<>0';
if ( $queryExtra ) {
$pageConds[] = $queryExtra;
}
+ */
# For each article, get a list of revisions which fit the criteria
# No recompression, use a condition on old_flags
- $conds = array("old_flags NOT LIKE '%object%'");
+ $conds = array(
+ "old_flags NOT LIKE '%object%' " .
+ " AND (old_flags NOT LIKE '%external%' OR old_text NOT LIKE 'DB://%/%/%')");
if ( $beginDate ) {
$conds[] = "rev_timestamp>'" . $beginDate . "'";
$dbw->update( 'text',
array( /* SET */
'old_text' => serialize( $chunk ),
- 'old_flags' => 'object',
+ 'old_flags' => 'object,utf-8',
), array( /* WHERE */
'old_id' => $primaryOldid
)
$dbw->update( 'text',
array( /* SET */
'old_text' => $stubs[$j],
- 'old_flags' => 'object',
+ 'old_flags' => 'object,utf-8',
), array( /* WHERE */
'old_id' => $revs[$i + $j]->rev_text_id
)
<?php
/**
- * Compress the old table, old_flags=gzip
- *
+ * Compress the text of a wiki
+ *
* @package MediaWiki
* @subpackage Maintenance
*/
* Usage:
*
* Non-wikimedia
- * php compressOld.php [-t <type>] [-c <chunk-size>] [-b <begin-date>] [-e <end-date>] [-s <start-id>]
+ * php compressOld.php [options...]
*
* Wikimedia
- * php compressOld.php <database> [-t <type>] [-c <chunk-size>] [-b <begin-date>] [-e <end-date>] [-s <start-id>]
- * [-f <max-factor>] [-h <factor-threshold>]
+ * php compressOld.php <database> [options...]
*
- * <type> is either:
- * gzip: compress revisions independently
- * concat: concatenate revisions and compress in chunks (default)
- *
- * <start-id> is the old_id to start from
- *
- * The following options apply only to the concat type:
- * <begin-date> is the earliest date to check for uncompressed revisions
- * <end-date> is the latest revision date to compress
- * <chunk-size> is the maximum number of revisions in a concat chunk
- * <max-factor> is the maximum ratio of compressed chunk bytes to uncompressed avg. revision bytes
- * <factor-threshold> is a minimum number of KB, where <max-factor> cuts in
+ * Options are:
+ * -t <type> set compression type to either:
+ * gzip: compress revisions independently
+ * concat: concatenate revisions and compress in chunks (default)
+ * -c <chunk-size> maximum number of revisions in a concat chunk
+ * -b <begin-date> earliest date to check for uncompressed revisions
+ * -e <end-date> latest revision date to compress
+ * -s <start-id> the old_id to start from
+ * -f <max-factor> the maximum ratio of compressed chunk bytes to uncompressed avg. revision bytes
+ * -h <threshold> is a minimum number of KB, where <max-factor> cuts in
+ * --extdb <cluster> store specified revisions in an external cluster (untested)
*
*/
-die( 'compressOld is known to be broken at the moment.' );
-
-$optionsWithArgs = array( 't', 'c', 's', 'f', 'h' );
+$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb' );
require_once( "../commandLine.inc" );
require_once( "compressOld.inc" );
'f' => 3,
'h' => 100,
'b' => '',
- 'e' => '',
+ 'e' => '',
+ 'extdb' => '',
);
-$args = $args + $defaults;
+$options = $options + $defaults;
-if ( $args['t'] != 'concat' && $args['t'] != 'gzip' ) {
- print "Type \"{$args['t']}\" not supported\n";
+if ( $options['t'] != 'concat' && $options['t'] != 'gzip' ) {
+ print "Type \"{$options['t']}\" not supported\n";
}
print "Depending on the size of your database this may take a while!\n";
#sleep(5);
$success = true;
-if ( $args['t'] == 'concat' ) {
- $success = compressWithConcat( $args['s'], $args['c'], $args['f'], $args['h'], $args['b'], $args['e'] );
+if ( $options['t'] == 'concat' ) {
+ $success = compressWithConcat( $options['s'], $options['c'], $options['f'], $options['h'], $options['b'],
+ $options['e'], $options['extdb'] );
} else {
- compressOldPages( $args['s'] );
+ compressOldPages( $options['s'], $options['extdb'] );
}
if ( $success ) {