Avoid creating lots and lots of cat_id gaps
authorKevin Israel <pleasestand@live.com>
Fri, 28 Aug 2015 07:57:01 +0000 (03:57 -0400)
committerKevin Israel <pleasestand@live.com>
Sat, 17 Oct 2015 03:31:08 +0000 (23:31 -0400)
Currently, INSERT...ON DUPLICATE KEY UPDATE is used to update the page
counts in the category table. However, MySQL 5.1.22 and newer, by default,
increment the counter for cat_id before checking for duplicate key errors.
This creates many gaps in the cat_id sequence.

To avoid this, check for existing category rows, and instead UPDATE any
that were found. It is hoped that the extra queries will not significantly
harm performance.

Change-Id: Ic2ab9ff14f04a0c7ea90a5b6756cade0c78e2885

includes/Category.php
includes/page/WikiPage.php

index c3e8a4e..1b05f33 100644 (file)
@@ -304,11 +304,11 @@ class Category {
                        return false;
                }
 
-               # Note, we must use names for this, since categorylinks does.
-               if ( $this->mName === null ) {
-                       if ( !$this->initialize() ) {
-                               return false;
-                       }
+               # If we have just a category name, find out whether there is an
+               # existing row. Or if we have just an ID, get the name, because
+               # that's what categorylinks uses.
+               if ( !$this->initialize() ) {
+                       return false;
                }
 
                $dbw = wfGetDB( DB_MASTER );
@@ -327,25 +327,38 @@ class Category {
                        array( 'LOCK IN SHARE MODE' )
                );
 
-               # TODO: This will cause lots and lots of gaps on MySQL unless
-               # innodb_autoinc_lock_mode is 0 (and also on some non-MySQL
-               # DBMSes) if you run populateCategory.php repeatedly.
-               $dbw->upsert(
-                       'category',
-                       array(
-                               'cat_title' => $this->mName,
-                               'cat_pages' => $result->pages,
-                               'cat_subcats' => $result->subcats,
-                               'cat_files' => $result->files
-                       ),
-                       array( 'cat_title' ),
-                       array(
-                               'cat_pages' => $result->pages,
-                               'cat_subcats' => $result->subcats,
-                               'cat_files' => $result->files
-                       ),
-                       __METHOD__
-               );
+               if ( $this->mId ) {
+                       # The category row already exists, so do a plain UPDATE instead
+                       # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
+                       # in the cat_id sequence. The row may or may not be "affected".
+                       $dbw->update(
+                               'category',
+                               array(
+                                       'cat_pages' => $result->pages,
+                                       'cat_subcats' => $result->subcats,
+                                       'cat_files' => $result->files
+                               ),
+                               array( 'cat_title' => $this->mName ),
+                               __METHOD__
+                       );
+               } else {
+                       $dbw->upsert(
+                               'category',
+                               array(
+                                       'cat_title' => $this->mName,
+                                       'cat_pages' => $result->pages,
+                                       'cat_subcats' => $result->subcats,
+                                       'cat_files' => $result->files
+                               ),
+                               array( 'cat_title' ),
+                               array(
+                                       'cat_pages' => $result->pages,
+                                       'cat_subcats' => $result->subcats,
+                                       'cat_files' => $result->files
+                               ),
+                               __METHOD__
+                       );
+               }
 
                $dbw->endAtomic( __METHOD__ );
 
index 3f6a4db..3ec3e89 100644 (file)
@@ -3395,22 +3395,44 @@ class WikiPage implements Page, IDBAccessObject {
                                }
 
                                if ( count( $added ) ) {
-                                       $insertRows = array();
-                                       foreach ( $added as $cat ) {
-                                               $insertRows[] = array(
-                                                       'cat_title'   => $cat,
-                                                       'cat_pages'   => 1,
-                                                       'cat_subcats' => ( $ns == NS_CATEGORY ) ? 1 : 0,
-                                                       'cat_files'   => ( $ns == NS_FILE ) ? 1 : 0,
-                                               );
-                                       }
-                                       $dbw->upsert(
+                                       $existingAdded = $dbw->selectFieldValues(
                                                'category',
-                                               $insertRows,
-                                               array( 'cat_title' ),
-                                               $addFields,
-                                               $method
+                                               'cat_title',
+                                               array( 'cat_title' => $added ),
+                                               __METHOD__
                                        );
+
+                                       // For category rows that already exist, do a plain
+                                       // UPDATE instead of INSERT...ON DUPLICATE KEY UPDATE
+                                       // to avoid creating gaps in the cat_id sequence.
+                                       if ( count( $existingAdded ) ) {
+                                               $dbw->update(
+                                                       'category',
+                                                       $addFields,
+                                                       array( 'cat_title' => $existingAdded ),
+                                                       __METHOD__
+                                               );
+                                       }
+
+                                       $missingAdded = array_diff( $added, $existingAdded );
+                                       if ( count( $missingAdded ) ) {
+                                               $insertRows = array();
+                                               foreach ( $missingAdded as $cat ) {
+                                                       $insertRows[] = array(
+                                                               'cat_title'   => $cat,
+                                                               'cat_pages'   => 1,
+                                                               'cat_subcats' => ( $ns == NS_CATEGORY ) ? 1 : 0,
+                                                               'cat_files'   => ( $ns == NS_FILE ) ? 1 : 0,
+                                                       );
+                                               }
+                                               $dbw->upsert(
+                                                       'category',
+                                                       $insertRows,
+                                                       array( 'cat_title' ),
+                                                       $addFields,
+                                                       $method
+                                               );
+                                       }
                                }
 
                                if ( count( $deleted ) ) {