X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=maintenance%2FupdateCollation.php;h=186feb222f001f45783fa11b4c6ade3527337193;hb=5fdf7d1918d6aee64fa3ece5c0d7690a10bea2c5;hp=af666b06cf03ea27f00231683c5693f7872fd6bc;hpb=96906168cac0e9a7748511b48c369cf22c8406d5;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index af666b06cf..186feb222f 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -33,7 +33,7 @@ require_once __DIR__ . '/Maintenance.php'; * @ingroup Maintenance */ class UpdateCollation extends Maintenance { - const BATCH_SIZE = 10000; // Number of rows to process in one batch + const BATCH_SIZE = 100; // Number of rows to process in one batch const SYNC_INTERVAL = 20; // Wait for slaves after this many batches public $sizeHistogram = []; @@ -85,10 +85,18 @@ TEXT // but this will raise an exception, breaking all category pages $collation->getFirstLetter( 'MediaWiki' ); + // Locally at least, (my local is a rather old version of mysql) + // mysql seems to filesort if there is both an equality + // (but not for an inequality) condition on cl_collation in the + // WHERE and it is also the first item in the ORDER BY. + if ( $this->hasOption( 'previous-collation' ) ) { + $orderBy = 'cl_to, cl_type, cl_from'; + } else { + $orderBy = 'cl_collation, cl_to, cl_type, cl_from'; + } $options = [ 'LIMIT' => self::BATCH_SIZE, - 'ORDER BY' => 'cl_from, cl_to', - 'STRAIGHT_JOIN', + 'ORDER BY' => $orderBy, ]; if ( $force || $dryRun ) { @@ -124,16 +132,24 @@ TEXT } $this->output( "Fixing collation for $count rows.\n" ); } - $count = 0; $batchCount = 0; $batchConds = []; do { $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." ); + + // cl_type must be selected as a number for proper paging because + // enums suck. + if ( $dbw->getType() === 'mysql' ) { + $clType = 'cl_type+0 AS "cl_type_numeric"'; + } else { + $clType = 'cl_type'; + } $res = $dbw->select( [ 'categorylinks', 'page' ], [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', - 'cl_sortkey', 'page_namespace', 'page_title' + 'cl_sortkey', $clType, + 'page_namespace', 'page_title' ], array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ), __METHOD__, @@ -217,18 +233,28 @@ TEXT /** * Return an SQL expression selecting rows which sort above the given row, - * assuming an ordering of cl_from, cl_to + * assuming an ordering of cl_collation, cl_to, cl_type, cl_from * @param stdClass $row * @param DatabaseBase $dbw * @return string */ function getBatchCondition( $row, $dbw ) { - $fields = [ 'cl_from', 'cl_to' ]; + if ( $this->hasOption( 'previous-collation' ) ) { + $fields = [ 'cl_to', 'cl_type', 'cl_from' ]; + } else { + $fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ]; + } $first = true; $cond = false; $prefix = false; foreach ( $fields as $field ) { - $encValue = $dbw->addQuotes( $row->$field ); + if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) { + // Range conditions with enums are weird in mysql + // This must be a numeric literal, or it won't work. + $encValue = intval( $row->cl_type_numeric ); + } else { + $encValue = $dbw->addQuotes( $row->$field ); + } $inequality = "$field > $encValue"; $equality = "$field = $encValue"; if ( $first ) {