X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=maintenance%2FupdateCollation.php;h=186feb222f001f45783fa11b4c6ade3527337193;hb=5fdf7d1918d6aee64fa3ece5c0d7690a10bea2c5;hp=37c9948b24738654acc530de166293fd079af610;hpb=7489189d7c13448114963520fb383cb2c7765b1e;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index 37c9948b24..186feb222f 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -33,10 +33,10 @@ require_once __DIR__ . '/Maintenance.php'; * @ingroup Maintenance */ class UpdateCollation extends Maintenance { - const BATCH_SIZE = 10000; // Number of rows to process in one batch + const BATCH_SIZE = 100; // Number of rows to process in one batch const SYNC_INTERVAL = 20; // Wait for slaves after this many batches - public $sizeHistogram = array(); + public $sizeHistogram = []; public function __construct() { parent::__construct(); @@ -85,21 +85,29 @@ TEXT // but this will raise an exception, breaking all category pages $collation->getFirstLetter( 'MediaWiki' ); - $options = array( + // Locally at least, (my local is a rather old version of mysql) + // mysql seems to filesort if there is both an equality + // (but not for an inequality) condition on cl_collation in the + // WHERE and it is also the first item in the ORDER BY. + if ( $this->hasOption( 'previous-collation' ) ) { + $orderBy = 'cl_to, cl_type, cl_from'; + } else { + $orderBy = 'cl_collation, cl_to, cl_type, cl_from'; + } + $options = [ 'LIMIT' => self::BATCH_SIZE, - 'ORDER BY' => 'cl_from, cl_to', - 'STRAIGHT_JOIN', - ); + 'ORDER BY' => $orderBy, + ]; if ( $force || $dryRun ) { - $collationConds = array(); + $collationConds = []; } else { if ( $this->hasOption( 'previous-collation' ) ) { $collationConds['cl_collation'] = $this->getOption( 'previous-collation' ); } else { - $collationConds = array( 0 => + $collationConds = [ 0 => 'cl_collation != ' . $dbw->addQuotes( $collationName ) - ); + ]; } $count = $dbw->estimateRowCount( @@ -124,18 +132,26 @@ TEXT } $this->output( "Fixing collation for $count rows.\n" ); } - $count = 0; $batchCount = 0; - $batchConds = array(); + $batchConds = []; do { $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." ); + + // cl_type must be selected as a number for proper paging because + // enums suck. + if ( $dbw->getType() === 'mysql' ) { + $clType = 'cl_type+0 AS "cl_type_numeric"'; + } else { + $clType = 'cl_type'; + } $res = $dbw->select( - array( 'categorylinks', 'page' ), - array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', - 'cl_sortkey', 'page_namespace', 'page_title' - ), - array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ), + [ 'categorylinks', 'page' ], + [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', + 'cl_sortkey', $clType, + 'page_namespace', 'page_title' + ], + array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ), __METHOD__, $options ); @@ -178,19 +194,19 @@ TEXT if ( !$dryRun ) { $dbw->update( 'categorylinks', - array( + [ 'cl_sortkey' => $newSortKey, 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $collationName, 'cl_type' => $type, 'cl_timestamp = cl_timestamp', - ), - array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ), + ], + [ 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ], __METHOD__ ); } if ( $row ) { - $batchConds = array( $this->getBatchCondition( $row, $dbw ) ); + $batchConds = [ $this->getBatchCondition( $row, $dbw ) ]; } } if ( !$dryRun ) { @@ -217,18 +233,28 @@ TEXT /** * Return an SQL expression selecting rows which sort above the given row, - * assuming an ordering of cl_from, cl_to + * assuming an ordering of cl_collation, cl_to, cl_type, cl_from * @param stdClass $row * @param DatabaseBase $dbw * @return string */ function getBatchCondition( $row, $dbw ) { - $fields = array( 'cl_from', 'cl_to' ); + if ( $this->hasOption( 'previous-collation' ) ) { + $fields = [ 'cl_to', 'cl_type', 'cl_from' ]; + } else { + $fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ]; + } $first = true; $cond = false; $prefix = false; foreach ( $fields as $field ) { - $encValue = $dbw->addQuotes( $row->$field ); + if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) { + // Range conditions with enums are weird in mysql + // This must be a numeric literal, or it won't work. + $encValue = intval( $row->cl_type_numeric ); + } else { + $encValue = $dbw->addQuotes( $row->$field ); + } $inequality = "$field > $encValue"; $equality = "$field = $encValue"; if ( $first ) { @@ -259,7 +285,7 @@ TEXT } $numBins = 20; $coarseHistogram = array_fill( 0, $numBins, 0 ); - $coarseBoundaries = array(); + $coarseBoundaries = []; $boundary = 0; for ( $i = 0; $i < $numBins - 1; $i++ ) { $boundary += $maxLength / $numBins;