'ClassCollector' => __DIR__ . '/includes/utils/AutoloadGenerator.php',
'CleanupAncientTables' => __DIR__ . '/maintenance/cleanupAncientTables.php',
'CleanupBlocks' => __DIR__ . '/maintenance/cleanupBlocks.php',
+ 'CleanupEmptyCategories' => __DIR__ . '/maintenance/cleanupEmptyCategories.php',
'CleanupPreferences' => __DIR__ . '/maintenance/cleanupPreferences.php',
'CleanupRemovedModules' => __DIR__ . '/maintenance/cleanupRemovedModules.php',
'CleanupSpam' => __DIR__ . '/maintenance/cleanupSpam.php',
$this->mSubcats = 0;
$this->mFiles = 0;
+ # If the title exists, call refreshCounts to add a row for it.
+ if ( $this->mTitle->exists() ) {
+ DeferredUpdates::addCallableUpdate( [ $this, 'refreshCounts' ] );
+ }
+
return true;
} else {
return false; # Fail
[ 'LOCK IN SHARE MODE' ]
);
+ $shouldExist = $result->pages > 0 || $this->getTitle()->exists();
+
if ( $this->mID ) {
- # The category row already exists, so do a plain UPDATE instead
- # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
- # in the cat_id sequence. The row may or may not be "affected".
- $dbw->update(
- 'category',
- [
- 'cat_pages' => $result->pages,
- 'cat_subcats' => $result->subcats,
- 'cat_files' => $result->files
- ],
- [ 'cat_title' => $this->mName ],
- __METHOD__
- );
- } else {
+ if ( $shouldExist ) {
+ # The category row already exists, so do a plain UPDATE instead
+ # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
+ # in the cat_id sequence. The row may or may not be "affected".
+ $dbw->update(
+ 'category',
+ [
+ 'cat_pages' => $result->pages,
+ 'cat_subcats' => $result->subcats,
+ 'cat_files' => $result->files
+ ],
+ [ 'cat_title' => $this->mName ],
+ __METHOD__
+ );
+ } else {
+ # The category is empty and has no description page, delete it
+ $dbw->delete(
+ 'category',
+ [ 'cat_title' => $this->mName ],
+ __METHOD__
+ );
+ $this->mID = false;
+ }
+ } elseif ( $shouldExist ) {
+ # The category row doesn't exist but should, so create it. Use
+ # upsert in case of races.
$dbw->upsert(
'category',
[
],
__METHOD__
);
+ // @todo: Should we update $this->mID here? Or not since Category
+ // objects tend to be short lived enough to not matter?
}
$dbw->endAtomic( __METHOD__ );
// This handles the case when updates have to batched into several COMMITs.
$scopedLock = LinksUpdate::acquirePageLock( $this->mDb, $id );
+ $title = $this->page->getTitle();
+
// Delete restrictions for it
$this->mDb->delete( 'page_restrictions', [ 'pr_page' => $id ], __METHOD__ );
}
}
+ // Refresh the category table entry if it seems to have no pages. Check
+ // master for the most up-to-date cat_pages count.
+ if ( $title->getNamespace() === NS_CATEGORY ) {
+ $row = $this->mDb->selectRow(
+ 'category',
+ [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+ [ 'cat_title' => $title->getDBkey(), 'cat_pages <= 0' ],
+ __METHOD__
+ );
+ if ( $row ) {
+ $cat = Category::newFromRow( $row, $title )->refreshCounts();
+ }
+ }
+
// If using cascading deletes, we can skip some explicit deletes
if ( !$this->mDb->cascadingDeletes() ) {
// Delete outgoing links
// If using cleanup triggers, we can skip some manual deletes
if ( !$this->mDb->cleanupTriggers() ) {
- $title = $this->page->getTitle();
// Find recentchanges entries to clean up...
$rcIdsForTitle = $this->mDb->selectFieldValues(
'recentchanges',
PopulateFilearchiveSha1::class,
PopulateBacklinkNamespace::class,
FixDefaultJsonContentPages::class,
+ CleanupEmptyCategories::class,
];
/**
$title->touchLinks();
$title->purgeSquid();
$title->deleteTitleProtection();
+
+ if ( $title->getNamespace() == NS_CATEGORY ) {
+ // Load the Category object, which will schedule a job to create
+ // the category table row if necessary. Checking a slave is ok
+ // here, in the worst case it'll run an unnecessary recount job on
+ // a category that probably doesn't have many members.
+ Category::newFromTitle( $title )->getID();
+ }
}
/**
$cat = Category::newFromName( $catName );
Hooks::run( 'CategoryAfterPageRemoved', [ $cat, $this, $id ] );
}
+
+ // Refresh counts on categories that should be empty now, to
+ // trigger possible deletion. Check master for the most
+ // up-to-date cat_pages.
+ if ( count( $deleted ) ) {
+ $rows = $dbw->select(
+ 'category',
+ [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+ [ 'cat_title' => $deleted, 'cat_pages <= 0' ],
+ $method
+ );
+ foreach ( $rows as $row ) {
+ $cat = Category::newFromRow( $row );
+ $cat->refreshCounts();
+ }
+ }
}
);
}
--- /dev/null
+<?php
+/**
+ * Clean up empty categories in the category table.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script to clean up empty categories in the category table.
+ *
+ * @ingroup Maintenance
+ * @since 1.28
+ */
+class CleanupEmptyCategories extends LoggedUpdateMaintenance {
+
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription(
+ <<<TEXT
+This script will clean up the category table by removing entries for empty
+categories without a description page and adding entries for empty categories
+with a description page. It will print out progress indicators every batch. The
+script is perfectly safe to run on large, live wikis, and running it multiple
+times is harmless. You may want to use the throttling options if it's causing
+too much load; they will not affect correctness.
+
+If the script is stopped and later resumed, you can use the --mode and --begin
+options with the last printed progress indicator to pick up where you left off.
+
+When the script has finished, it will make a note of this in the database, and
+will not run again without the --force option.
+TEXT
+ );
+
+ $this->addOption(
+ 'mode',
+ '"add" empty categories with description pages, "remove" empty categories '
+ . 'without description pages, or "both"',
+ false,
+ true
+ );
+ $this->addOption(
+ 'begin',
+ 'Only do categories whose names are alphabetically after the provided name',
+ false,
+ true
+ );
+ $this->addOption(
+ 'throttle',
+ 'Wait this many milliseconds after each batch. Default: 0',
+ false,
+ true
+ );
+ }
+
+ protected function getUpdateKey() {
+ return 'cleanup empty categories';
+ }
+
+ protected function doDBUpdates() {
+ $mode = $this->getOption( 'mode', 'both' );
+ $begin = $this->getOption( 'begin', '' );
+ $throttle = $this->getOption( 'throttle', 0 );
+
+ if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
+ $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
+ return false;
+ }
+
+ $dbw = $this->getDB( DB_MASTER );
+
+ $throttle = intval( $throttle );
+
+ if ( $mode === 'add' || $mode === 'both' ) {
+ if ( $begin !== '' ) {
+ $where = [ 'page_title > ' . $dbw->addQuotes( $begin ) ];
+ } else {
+ $where = [];
+ }
+
+ $this->output( "Adding empty categories with description pages...\n" );
+ while ( true ) {
+ # Find which category to update
+ $rows = $dbw->select(
+ [ 'page', 'category' ],
+ 'page_title',
+ array_merge( $where, [
+ 'page_namespace' => NS_CATEGORY,
+ 'cat_title' => null,
+ ] ),
+ __METHOD__,
+ [
+ 'ORDER BY' => 'page_title',
+ 'LIMIT' => $this->mBatchSize,
+ ],
+ [
+ 'category' => [ 'LEFT JOIN', 'page_title = cat_title' ],
+ ]
+ );
+ if ( !$rows || $rows->numRows() <= 0 ) {
+ # Done, hopefully.
+ break;
+ }
+
+ foreach ( $rows as $row ) {
+ $name = $row->page_title;
+ $where = [ 'page_title > ' . $dbw->addQuotes( $name ) ];
+
+ # Use the row to update the category count
+ $cat = Category::newFromName( $name );
+ if ( !is_object( $cat ) ) {
+ $this->output( "The category named $name is not valid?!\n" );
+ } else {
+ $cat->refreshCounts();
+ }
+ }
+ $this->output( "--mode=$mode --begin=$name\n" );
+
+ wfWaitForSlaves();
+ usleep( $throttle * 1000 );
+ }
+
+ $begin = '';
+ }
+
+ if ( $mode === 'remove' || $mode === 'both' ) {
+ if ( $begin !== '' ) {
+ $where = [ 'cat_title > ' . $dbw->addQuotes( $begin ) ];
+ } else {
+ $where = [];
+ }
+ $i = 0;
+
+ $this->output( "Removing empty categories without description pages...\n" );
+ while ( true ) {
+ # Find which category to update
+ $rows = $dbw->select(
+ [ 'category', 'page' ],
+ 'cat_title',
+ array_merge( $where, [
+ 'page_title' => null,
+ 'cat_pages' => 0,
+ ] ),
+ __METHOD__,
+ [
+ 'ORDER BY' => 'cat_title',
+ 'LIMIT' => $this->mBatchSize,
+ ],
+ [
+ 'page' => [ 'LEFT JOIN', [
+ 'page_namespace' => NS_CATEGORY, 'page_title = cat_title'
+ ] ],
+ ]
+ );
+ if ( !$rows || $rows->numRows() <= 0 ) {
+ # Done, hopefully.
+ break;
+ }
+ foreach ( $rows as $row ) {
+ $name = $row->cat_title;
+ $where = [ 'cat_title > ' . $dbw->addQuotes( $name ) ];
+
+ # Use the row to update the category count
+ $cat = Category::newFromName( $name );
+ if ( !is_object( $cat ) ) {
+ $this->output( "The category named $name is not valid?!\n" );
+ } else {
+ $cat->refreshCounts();
+ }
+ }
+
+ $this->output( "--mode=remove --begin=$name\n" );
+
+ wfWaitForSlaves();
+ usleep( $throttle * 1000 );
+ }
+ }
+
+ $this->output( "Category cleanup complete.\n" );
+
+ return true;
+ }
+}
+
+$maintClass = 'CleanupEmptyCategories';
+require_once RUN_MAINTENANCE_IF_MAIN;
CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
--
--- Track all existing categories. Something is a category if 1) it has an en-
--- try somewhere in categorylinks, or 2) it once did. Categories might not
--- have corresponding pages, so they need to be tracked separately.
+-- Track all existing categories. Something is a category if 1) it has an entry
+-- somewhere in categorylinks, or 2) it has a description page. Categories
+-- might not have corresponding pages, so they need to be tracked separately.
--
CREATE TABLE /*_*/category (
-- Primary key
CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
--
--- Track all existing categories. Something is a category if 1) it has an en-
--- try somewhere in categorylinks, or 2) it once did. Categories might not
--- have corresponding pages, so they need to be tracked separately.
+-- Track all existing categories. Something is a category if 1) it has an entry
+-- somewhere in categorylinks, or 2) it has a description page. Categories
+-- might not have corresponding pages, so they need to be tracked separately.
--
CREATE TABLE /*_*/category (
-- Primary key