* Adds Category and CategoryList classes to represent categories themselves.
* Adds a category table, giving each category a name, ID, and counts of all members, subcats only, and files.
* Adds a maintenance script to populate the category table efficiently. This script is careful to wait for slaves and should be safe to run on a live database. The maintenance script's includes file is called by update.php.
* Until the category table is populated, the patch handles weird category table rows gracefully. It detects whether they're obviously impossible, and if so, it outputs appropriate messages.
link on diffs
* Magic word formatnum can now take raw suffix to undo formatting
* Add updatelog table to reliably permit updates that don't change the schema
+* Add category table to allow better tracking of category membership counts
+** (bug 1212) Give correct membership counts on the pages of large categories
=== Bug fixes in 1.13 ===
# Delete restrictions for it
$dbw->delete( 'page_restrictions', array ( 'pr_page' => $id ), __METHOD__ );
+ # Fix category table counts
+ $cats = array();
+ $res = $dbw->select( 'categorylinks', 'cl_to',
+ array( 'cl_from' => $id ), __METHOD__ );
+ foreach( $res as $row ) {
+ $cats []= $row->cl_to;
+ }
+ $this->updateCategoryCounts( array(), $cats, $dbw );
+
# Now that it's safely backed up, delete it
$dbw->delete( 'page', array( 'page_id' => $id ), __METHOD__);
# If using cascading deletes, we can skip some explicit deletes
if ( !$dbw->cascadingDeletes() ) {
-
$dbw->delete( 'revision', array( 'rev_page' => $id ), __METHOD__ );
if ($wgUseTrackbacks)
$wgOut->addParserOutput( $parserOutput );
}
+ /**
+ * Update all the appropriate counts in the category table, given that
+ * we've added the categories $added and deleted the categories $deleted.
+ *
+ * @param $added array The names of categories that were added
+ * @param $deleted array The names of categories that were deleted
+ * @param $dbw Database Optional database connection to use
+ * @return null
+ */
+ public function updateCategoryCounts( $added, $deleted, $dbw = null ) {
+ $ns = $this->mTitle->getNamespace();
+ if( !$dbw ) {
+ $dbw = wfGetDB( DB_MASTER );
+ }
+
+ # First make sure the rows exist. If one of the "deleted" ones didn't
+ # exist, we might legitimately not create it, but it's simpler to just
+ # create it and then give it a negative value, since the value is bogus
+ # anyway.
+ #
+ # Sometimes I wish we had INSERT ... ON DUPLICATE KEY UPDATE.
+ $insertCats = array_merge( $added, $deleted );
+ $insertRows = array();
+ foreach( $insertCats as $cat ) {
+ $insertRows []= array( 'cat_title' => $cat );
+ }
+ $dbw->insert( 'category', $insertRows, __METHOD__, 'IGNORE' );
+
+ $addFields = array( 'cat_pages = cat_pages + 1' );
+ $removeFields = array( 'cat_pages = cat_pages - 1' );
+ if( $ns == NS_CATEGORY ) {
+ $addFields []= 'cat_subcats = cat_subcats + 1';
+ $removeFields []= 'cat_subcats = cat_subcats - 1';
+ } elseif( $ns == NS_IMAGE ) {
+ $addFields []= 'cat_files = cat_files + 1';
+ $removeFields []= 'cat_files = cat_files - 1';
+ }
+
+ $dbw->update(
+ 'category',
+ $addFields,
+ array( 'cat_title' => $added ),
+ __METHOD__
+ );
+ $dbw->update(
+ 'category',
+ $removeFields,
+ array( 'cat_title' => $deleted ),
+ __METHOD__
+ );
+ }
}
'BagOStuff' => 'includes/BagOStuff.php',
'Block' => 'includes/Block.php',
'BrokenRedirectsPage' => 'includes/SpecialBrokenRedirects.php',
+ 'Category' => 'includes/Category.php',
'Categoryfinder' => 'includes/Categoryfinder.php',
+ 'CategoryList' => 'includes/Category.php',
'CategoryPage' => 'includes/CategoryPage.php',
'CategoryViewer' => 'includes/CategoryPage.php',
'ChangesList' => 'includes/ChangesList.php',
--- /dev/null
+<?php
+/**
+ * Two classes, Category and CategoryList, to deal with categories. To reduce
+ * code duplication, most of the logic is implemented for lists of categories,
+ * and then single categories are a special case. We use a separate class for
+ * CategoryList so as to discourage stupid slow memory-hogging stuff like manu-
+ * ally iterating through arrays of Titles and Articles, which we do way too
+ * much, when a smarter class can do stuff all in one query.
+ *
+ * Category(List) objects are immutable, strictly speaking. If you call me-
+ * thods that change the database, like to refresh link counts, the objects
+ * will be appropriately reinitialized. Member variables are lazy-initialized.
+ *
+ * TODO: Move some stuff from CategoryPage.php to here, and use that.
+ *
+ * @author Simetrical
+ */
+
+abstract class CategoryListBase {
+ # FIXME: Is storing all member variables as simple arrays a good idea?
+ # Should we use some kind of associative array instead?
+ /** Names of all member categories, normalized to DB-key form */
+ protected $mNames = null;
+ /** IDs of all member categories */
+ protected $mIDs = null;
+ /**
+ * Counts of membership (cat_pages, cat_subcats, cat_files) for all member
+ * categories
+ */
+ protected $mPages = null, $mSubcats = null, $mFiles = null;
+
+ protected function __construct() {}
+
+ /** See CategoryList::newFromNames for details. */
+ protected function setNames( $names ) {
+ if( !is_array( $names ) ) {
+ throw new MWException( __METHOD__.' passed non-array' );
+ }
+ $this->mNames = array_diff(
+ array_map(
+ array( 'CategoryListBase', 'setNamesCallback' ),
+ $names
+ ),
+ array( false )
+ );
+ }
+
+ /**
+ * @param string $name Name of a putative category
+ * @return mixed Normalized name, or false if the name was invalid.
+ */
+ private static function setNamesCallback( $name ) {
+ $title = Title::newFromText( $name );
+ if( !is_object( $title ) ) {
+ return false;
+ }
+ return $title->getDBKey();
+ }
+
+ /**
+ * Set up all member variables using a database query.
+ * @return bool True on success, false on failure.
+ */
+ protected function initialize() {
+ if( $this->mNames === null && $this->mIDs === null ) {
+ throw new MWException( __METHOD__.' has both names and IDs null' );
+ }
+ $dbr = wfGetDB( DB_SLAVE );
+ if( $this->mIDs === null ) {
+ $where = array( 'cat_title' => $this->mNames );
+ } elseif( $this->mNames === null ) {
+ $where = array( 'cat_id' => $this->mIDs );
+ } else {
+ # Already initialized
+ return true;
+ }
+ $res = $dbr->select(
+ 'category',
+ array( 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats',
+ 'cat_files' ),
+ $where,
+ __METHOD__
+ );
+ if( !$res->fetchRow() ) {
+ # Okay, there were no contents. Nothing to initialize.
+ return false;
+ }
+ $res->rewind();
+ $this->mIDs = $this->mNames = $this->mPages = $this->mSubcats =
+ $this->mFiles = array();
+ while( $row = $res->fetchRow() ) {
+ $this->mIDs []= $row['cat_id'];
+ $this->mNames []= $row['cat_title'];
+ $this->mPages []= $row['cat_pages'];
+ $this->mSubcats []= $row['cat_subcats'];
+ $this->mFiles []= $row['cat_files'];
+ }
+ $res->free();
+ }
+}
+
+/** @todo make iterable. */
+class CategoryList extends CategoryListBase {
+ /**
+ * Factory function. Any provided elements that don't correspond to a cat-
+ * egory that actually exists will be silently dropped. FIXME: Is this
+ * sane error-handling?
+ *
+ * @param array $names An array of category names. They need not be norma-
+ * lized, with spaces replaced by underscores.
+ * @return CategoryList
+ */
+ public static function newFromNames( $names ) {
+ $cat = new self();
+ $cat->setNames( $names );
+ return $cat;
+ }
+
+ /**
+ * Factory function. Any provided elements that don't correspond to a cat-
+ * egory that actually exists will be silently dropped. FIXME: Is this
+ * sane error-handling?
+ *
+ * @param array $ids An array of category ids
+ * @return CategoryList
+ */
+ public static function newFromIDs( $ids ) {
+ if( !is_array( $ids ) ) {
+ throw new MWException( __METHOD__.' passed non-array' );
+ }
+ $cat = new self();
+ $cat->mIds = $ids;
+ return $cat;
+ }
+
+ /** @return array Simple array of DB key names */
+ public function getNames() {
+ $this->initialize();
+ return $this->mNames;
+ }
+ /**
+ * FIXME: Is this a good return type?
+ *
+ * @return array Associative array of DB key name => ID
+ */
+ public function getIDs() {
+ $this->initialize();
+ return array_fill_keys( $this->mNames, $this->mIDs );
+ }
+ /**
+ * FIXME: Is this a good return type?
+ *
+ * @return array Associative array of DB key name => array(pages, subcats,
+ * files)
+ */
+ public function getCounts() {
+ $this->initialize();
+ $ret = array();
+ foreach( array_keys( $this->mNames ) as $i ) {
+ $ret[$this->mNames[$i]] = array(
+ $this->mPages[$i],
+ $this->mSubcats[$i],
+ $this->mFiles[$i]
+ );
+ }
+ return $ret;
+ }
+}
+
+class Category extends CategoryListBase {
+ /**
+ * Factory function.
+ *
+ * @param array $name A category name (no "Category:" prefix). It need
+ * not be normalized, with spaces replaced by underscores.
+ * @return mixed Category, or false on a totally invalid name
+ */
+ public static function newFromName( $name ) {
+ $cat = new self();
+ $cat->setNames( array( $name ) );
+ if( count( $cat->mNames ) !== 1 ) {
+ return false;
+ }
+ return $cat;
+ }
+
+ /**
+ * Factory function.
+ *
+ * @param array $id A category id
+ * @return Category
+ */
+ public static function newFromIDs( $id ) {
+ $cat = new self();
+ $cat->mIDs = array( $id );
+ return $cat;
+ }
+
+ /** @return mixed DB key name, or false on failure */
+ public function getName() { return $this->getX( 'mNames' ); }
+ /** @return mixed Category ID, or false on failure */
+ public function getID() { return $this->getX( 'mIDs' ); }
+ /** @return mixed Total number of member pages, or false on failure */
+ public function getPageCount() { return $this->getX( 'mPages' ); }
+ /** @return mixed Number of subcategories, or false on failure */
+ public function getSubcatCount() { return $this->getX( 'mSubcats' ); }
+ /** @return mixed Number of member files, or false on failure */
+ public function getFileCount() { return $this->getX( 'mFiles' ); }
+ /**
+ * This is not implemented in the base class, because arrays of Titles are
+ * evil.
+ *
+ * @return mixed The Title for this category, or false on failure.
+ */
+ public function getTitle() {
+ if( !$this->initialize() ) {
+ return false;
+ }
+ # FIXME is there a better way to do this?
+ return Title::newFromText( "Category:{$this->mNames[0]}" );
+ }
+
+ /** Generic accessor */
+ private function getX( $key ) {
+ if( !$this->initialize() ) {
+ return false;
+ }
+ return $this->{$key}[0];
+ }
+
+ /**
+ * Override the parent class so that we can return false if things muck
+ * up, i.e., the name/ID we got was invalid. Currently CategoryList si-
+ * lently eats errors so as not to kill the whole array for one bad name.
+ *
+ * @return bool True on success, false on failure.
+ */
+ protected function initialize() {
+ parent::initialize();
+ if( count( $this->mNames ) != 1 || count( $this->mIDs ) != 1 ) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Refresh the counts for this category.
+ *
+ * FIXME: If there were some way to do this in MySQL 4 without an UPDATE
+ * for every row, it would be nice to move this to the parent class.
+ *
+ * @return bool True on success, false on failure
+ */
+ public function refreshCounts() {
+ if( wfReadOnly() ) {
+ return false;
+ }
+ $dbw = wfGetDB( DB_MASTER );
+ $dbw->begin();
+ # Note, we must use names for this, since categorylinks does.
+ if( $this->mNames === null ) {
+ if( !$this->initialize() ) {
+ return false;
+ }
+ } else {
+ # Let's be sure that the row exists in the table. We don't need to
+ # do this if we got the row from the table in initialization!
+ $dbw->insert(
+ 'category',
+ array( 'cat_title' => $this->mNames[0] ),
+ __METHOD__,
+ 'IGNORE'
+ );
+ }
+
+ $result = $dbw->selectRow(
+ array( 'categorylinks', 'page' ),
+ array( 'COUNT(*) AS pages',
+ 'COUNT(IF(page_namespace='.NS_CATEGORY.',1,NULL)) AS subcats',
+ 'COUNT(IF(page_namespace='.NS_IMAGE.',1,NULL)) AS files'
+ ),
+ array( 'cl_to' => $this->mNames[0], 'page_id = cl_from' ),
+ __METHOD__,
+ 'LOCK IN SHARE MODE'
+ );
+ $ret = $dbw->update(
+ 'category',
+ array(
+ 'cat_pages' => $result->pages,
+ 'cat_subcats' => $result->subcats,
+ 'cat_files' => $result->files
+ ),
+ array( 'cat_title' => $this->mNames[0] ),
+ __METHOD__
+ );
+ $dbw->commit();
+
+ # Now we should update our local counts.
+ $this->mPages = array( $result->pages );
+ $this->mSubcats = array( $result->subcats );
+ $this->mFiles = array( $result->files );
+
+ return $ret;
+ }
+}
$children, $children_start_char,
$showGallery, $gallery,
$skin;
+ /** Category object for this page */
+ private $cat;
function __construct( $title, $from = '', $until = '' ) {
global $wgCategoryPagingLimit;
$this->from = $from;
$this->until = $until;
$this->limit = $wgCategoryPagingLimit;
+ $this->cat = Category::newFromName( $title->getDBKey() );
}
/**
function getSubcategorySection() {
# Don't show subcategories section if there are none.
$r = '';
- $c = count( $this->children );
- if( $c > 0 ) {
+ $rescnt = count( $this->children );
+ $dbcnt = $this->cat->getSubcatCount();
+ $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'subcat' );
+ if( $rescnt > 0 ) {
# Showing subcategories
$r .= "<div id=\"mw-subcategories\">\n";
$r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
- $r .= wfMsgExt( 'subcategorycount', array( 'parse' ), $c );
+ $r .= $countmsg;
$r .= $this->formatList( $this->children, $this->children_start_char );
$r .= "\n</div>";
}
$ti = htmlspecialchars( $this->title->getText() );
# Don't show articles section if there are none.
$r = '';
- $c = count( $this->articles );
- if( $c > 0 ) {
+
+ # FIXME, here and in the other two sections: we don't need to bother
+ # with this rigamarole if the entire category contents fit on one page
+ # and have already been retrieved. We can just use $rescnt in that
+ # case and save a query and some logic.
+ $dbcnt = $this->cat->getPageCount() - $this->cat->getSubcatCount()
+ - $this->cat->getFileCount();
+ $rescnt = count( $this->articles );
+ $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'article' );
+
+ if( $rescnt > 0 ) {
$r = "<div id=\"mw-pages\">\n";
$r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
- $r .= wfMsgExt( 'categoryarticlecount', array( 'parse' ), $c );
+ $r .= $countmsg;
$r .= $this->formatList( $this->articles, $this->articles_start_char );
$r .= "\n</div>";
}
function getImageSection() {
if( $this->showGallery && ! $this->gallery->isEmpty() ) {
+ $dbcnt = $this->cat->getFileCount();
+ $rescnt = $this->gallery->count();
+ $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'file' );
+
return "<div id=\"mw-category-media\">\n" .
'<h2>' . wfMsg( 'category-media-header', htmlspecialchars($this->title->getText()) ) . "</h2>\n" .
- wfMsgExt( 'category-media-count', array( 'parse' ), $this->gallery->count() ) .
- $this->gallery->toHTML() . "\n</div>";
+ $countmsg . $this->gallery->toHTML() . "\n</div>";
} else {
return '';
}
return "($prevLink) ($nextLink)";
}
+
+ /**
+ * What to do if the category table conflicts with the number of results
+ * returned? This function says what. It works the same whether the
+ * things being counted are articles, subcategories, or files.
+ *
+ * Note for grepping: uses the messages category-article-count,
+ * category-article-count-limited, category-subcat-count,
+ * category-subcat-count-limited, category-file-count,
+ * category-file-count-limited.
+ *
+ * @param int $rescnt The number of items returned by our database query.
+ * @param int $dbcnt The number of items according to the category table.
+ * @param string $type 'subcat', 'article', or 'file'
+ * @return string A message giving the number of items, to output to HTML.
+ */
+ private function getCountMessage( $rescnt, $dbcnt, $type ) {
+ # There are three cases:
+ # 1) The category table figure seems sane. It might be wrong, but
+ # we can't do anything about it if we don't recalculate it on ev-
+ # ery category view.
+ # 2) The category table figure isn't sane, like it's smaller than the
+ # number of actual results, *but* the number of results is less
+ # than $this->limit and there's no offset. In this case we still
+ # know the right figure.
+ # 3) We have no idea.
+ $totalrescnt = count( $this->articles ) + count( $this->children ) +
+ $this->gallery->count();
+ if($dbcnt == $rescnt || (($totalrescnt == $this->limit || $this->from
+ || $this->until) && $dbcnt > $rescnt)){
+ # Case 1: seems sane.
+ $totalcnt = $dbcnt;
+ } elseif($totalrescnt < $this->limit && !$this->from && !$this->until){
+ # Case 2: not sane, but salvageable.
+ $totalcnt = $rescnt;
+ } else {
+ # Case 3: hopeless. Don't give a total count at all.
+ return wfMsgExt("category-$type-count-limited", 'parse', $rescnt);
+ }
+ return wfMsgExt( "category-$type-count", 'parse', $rescnt, $totalcnt );
+ }
}
$this->getCategoryInsertions( $existing ) );
# Invalidate all categories which were added, deleted or changed (set symmetric difference)
- $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
+ $categoryInserts = array_diff_assoc( $this->mCategories, $existing );
+ $categoryDeletes = array_diff_assoc( $existing, $this->mCategories );
+ $categoryUpdates = $categoryInserts + $categoryDeletes;
$this->invalidateCategories( $categoryUpdates );
+ $this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
# Page properties
$existing = $this->getExistingProperties();
# Refresh category pages and image description pages
$existing = $this->getExistingCategories();
- $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
+ $categoryInserts = array_diff_assoc( $this->mCategories, $existing );
+ $categoryDeletes = array_diff_assoc( $existing, $this->mCategoties );
+ $categoryUpdates = $categoryInserts + $categoryDeletes;
$existing = $this->getExistingImages();
$imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing );
$this->dumbTableUpdate( 'langlinks', $this->getInterlangInsertions(),'ll_from' );
$this->dumbTableUpdate( 'page_props', $this->getPropertyInsertions(), 'pp_page' );
- # Update the cache of all the category pages and image description pages which were changed
+ # Update the cache of all the category pages and image description
+ # pages which were changed, and fix the category table count
$this->invalidateCategories( $categoryUpdates );
+ $this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
$this->invalidateImageDescriptions( $imageUpdates );
# Refresh links of all pages including this page
$this->invalidatePages( NS_CATEGORY, array_keys( $cats ) );
}
+ /**
+ * Update all the appropriate counts in the category table.
+ * @param $added associative array of category name => sort key
+ * @param $deleted associative array of category name => sort key
+ */
+ function updateCategoryCounts( $added, $deleted ) {
+ $a = new Article($this->mTitle);
+ $a->updateCategoryCounts(
+ array_keys( $added ), array_keys( $deleted ), $this->mDb
+ );
+ }
+
function invalidateImageDescriptions( $images ) {
$this->invalidatePages( NS_IMAGE, array_keys( $images ) );
}
function dumbTableUpdate( $table, $insertions, $fromField ) {
$this->mDb->delete( $table, array( $fromField => $this->mId ), __METHOD__ );
if ( count( $insertions ) ) {
- # The link array was constructed without FOR UPDATE, so there may be collisions
- # This may cause minor link table inconsistencies, which is better than
- # crippling the site with lock contention.
+ # The link array was constructed without FOR UPDATE, so there may
+ # be collisions. This may cause minor link table inconsistencies,
+ # which is better than crippling the site with lock contention.
$this->mDb->insert( $table, $insertions, __METHOD__, array( 'IGNORE' ) );
}
}
'nocredits' => 'There is no credits info available for this page.',
# Spam protection
-'spamprotectiontitle' => 'Spam protection filter',
-'spamprotectiontext' => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.',
-'spamprotectionmatch' => 'The following text is what triggered our spam filter: $1',
-'subcategorycount' => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.',
-'categoryarticlecount' => 'There {{PLURAL:$1|is one page|are $1 pages}} in this category.',
-'category-media-count' => 'There {{PLURAL:$1|is one file|are $1 files}} in this category.',
-'listingcontinuesabbrev' => 'cont.',
-'spambot_username' => 'MediaWiki spam cleanup',
-'spam_reverting' => 'Reverting to last version not containing links to $1',
-'spam_blanking' => 'All revisions contained links to $1, blanking',
+'spamprotectiontitle' => 'Spam protection filter',
+'spamprotectiontext' => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.',
+'spamprotectionmatch' => 'The following text is what triggered our spam filter: $1',
+'subcategorycount' => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.',
+'category-subcat-count' => '{{PLURAL:$2|This category has only the following subcategory.|This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}, out of $2 total.}}',
+'category-subcat-count-limited' => 'This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}.',
+'category-article-count' => '{{PLURAL:$2|This category contains only the following page.|The following {{PLURAL:$1|page is|$1 pages are}} in this category, out of $2 total.}}',
+'category-article-count-limited' => 'The following {{PLURAL:$1|page is|$1 pages are}} in the current category.',
+'category-media-count' => '{{PLURAL:$2|This category contains only the following file.|The following {{PLURAL:$1|file is|$1 files are}} in this category, out of $2 total.}}',
+'category-media-count-limited' => 'The following {{PLURAL:$1|file is|$1 files are}} in the current category.',
+'listingcontinuesabbrev' => 'cont.',
+'spambot_username' => 'MediaWiki spam cleanup',
+'spam_reverting' => 'Reverting to last version not containing links to $1',
+'spam_blanking' => 'All revisions contained links to $1, blanking',
# Info page
'infosubtitle' => 'Information for page',
--- /dev/null
+CREATE TABLE /*$wgDBprefix*/category (
+ cat_id int unsigned NOT NULL auto_increment,
+
+ cat_title varchar(255) binary NOT NULL,
+
+ cat_pages int signed NOT NULL default 0,
+ cat_subcats int signed NOT NULL default 0,
+ cat_files int signed NOT NULL default 0,
+
+ cat_hidden tinyint(1) unsigned NOT NULL default 0,
+
+ PRIMARY KEY (cat_id),
+ UNIQUE KEY (cat_title),
+
+ KEY (cat_pages)
+) /*$wgDBTableOptions*/;
+
--- /dev/null
+<?php
+/**
+ * @addtogroup Maintenance
+ * @author Simetrical
+ */
+
+define( 'REPORTING_INTERVAL', 1000 );
+
+function populateCategory( $begin, $maxlag, $throttle, $force ) {
+ $dbw = wfGetDB( DB_MASTER );
+
+ if( !$force ) {
+ $row = $dbw->selectRow(
+ 'updatelog',
+ '1',
+ array( 'ul_key' => 'populate category' ),
+ __FUNCTION__
+ );
+ if( $row ) {
+ echo "Category table already populated. Use php ".
+ "maintenace/populateCategory.php\n--force from the command line ".
+ "to override.\n";
+ return true;
+ }
+ }
+
+ $maxlag = intval( $maxlag );
+ $throttle = intval( $throttle );
+ $force = (bool)$force;
+ if( $begin !== '' ) {
+ $where = 'cl_to > '.$dbw->addQuotes( $begin );
+ } else {
+ $where = null;
+ }
+ $i = 0;
+
+ while( true ) {
+ # Find which category to update
+ $row = $dbw->selectRow(
+ 'categorylinks',
+ 'cl_to',
+ $where,
+ __FUNCTION__,
+ array(
+ 'ORDER BY' => 'cl_to'
+ )
+ );
+ if( !$row ) {
+ # Done, hopefully.
+ break;
+ }
+ $name = $row->cl_to;
+ $where = 'cl_to > '.$dbw->addQuotes( $name );
+
+ # Use the row to update the category count
+ $cat = Category::newFromName( $name );
+ if( !is_object( $cat ) ) {
+ var_dump( $cat );
+ throw new MWException( "The category named $name is not valid?!" );
+ }
+ $cat->refreshCounts();
+
+ ++$i;
+ if( !($i % REPORTING_INTERVAL) ) {
+ echo "$name\n";
+ wfWaitForSlaves( $maxlag );
+ }
+ usleep( $throttle*1000 );
+ }
+
+ if( $dbw->insert(
+ 'updatelog',
+ array( 'ul_key' => 'populate category' ),
+ __FUNCTION__,
+ 'IGNORE'
+ )
+ ) {
+ echo "Category population complete.\n";
+ return true;
+ } else {
+ echo "Could not insert category population row.\n";
+ return false;
+ }
+}
--- /dev/null
+<?php
+/**
+ * @addtogroup Maintenance
+ * @author Simetrical
+ */
+
+$optionsWithArgs = array( 'begin', 'max-slave-lag', 'throttle' );
+
+require_once "commandLine.inc";
+require_once "populateCategory.inc";
+
+if( isset( $options['help'] ) ) {
+ echo <<<TEXT
+This script will populate the category table, added in MediaWiki 1.13. It will
+print out progress indicators every 1000 categories it adds to the table. The
+script is perfectly safe to run on large, live wikis, and running it multiple
+times is harmless. You may want to use the throttling options if it's causing
+too much load; they will not affect correctness.
+
+If the script is stopped and later resumed, you can use the --begin option with
+the last printed progress indicator to pick up where you left off. This is
+safe, because any newly-added categories before this cutoff will have been
+added after the software update and so will be populated anyway.
+
+When the script has finished, it will make a note of this in the database, and
+will not run again without the --force option.
+
+Usage:
+ php populateCategory.php [--max-slave-lag <seconds>] [--begin <name>]
+[--throttle <seconds>] [--force]
+
+ --begin: Only do categories whose names are alphabetically after the pro-
+vided name. Default: empty (start from beginning).
+ --max-slave-lag: If slave lag exceeds this many seconds, wait until it
+drops before continuing. Default: 10.
+ --throttle: Wait this many milliseconds after each category. Default: 0.
+ --force: Run regardless of whether the database says it's been run already.
+TEXT;
+ exit( 0 );
+}
+
+$defaults = array(
+ 'begin' => '',
+ 'max-slave-length' => 10,
+ 'throttle' => 0,
+ 'force' => false
+);
+$options = array_merge( $defaults, $options );
+
+populateCategory( $options['begin'], $options['max-slave-length'],
+ $options['throttle'], $options['force'] );
) /*$wgDBTableOptions*/;
+--
+-- Track all existing categories. Something is a category if 1) it has an en-
+-- try somewhere in categorylinks, or 2) it once did. Categories might not
+-- have corresponding pages, so they need to be tracked separately.
+--
+CREATE TABLE /*$wgDBprefix*/category (
+ -- Primary key
+ cat_id int unsigned NOT NULL auto_increment,
+
+ -- Name of the category, in the same form as page_title (with underscores).
+ -- If there is a category page corresponding to this category, by definition,
+ -- it has this name (in the Category namespace).
+ cat_title varchar(255) binary NOT NULL,
+
+ -- The numbers of member pages (including categories and media), subcatego-
+ -- ries, and Image: namespace members, respectively. These are signed to
+ -- make underflow more obvious. We make the first number include the second
+ -- two for better sorting: subtracting for display is easy, adding for order-
+ -- ing is not.
+ cat_pages int signed NOT NULL default 0,
+ cat_subcats int signed NOT NULL default 0,
+ cat_files int signed NOT NULL default 0,
+
+ -- Should the category be hidden from article views?
+ cat_hidden tinyint(1) unsigned NOT NULL default 0,
+
+ PRIMARY KEY (cat_id),
+ UNIQUE KEY (cat_title),
+
+ -- For Special:Mostlinkedcategories
+ KEY (cat_pages)
+) /*$wgDBTableOptions*/;
+
--
-- Track links to external URLs
--
array( 'add_field', 'ipblocks', 'ipb_by_text', 'patch-ipb_by_text.sql' ),
array( 'add_table', 'page_props', 'patch-page_props.sql' ),
array( 'add_table', 'updatelog', 'patch-updatelog.sql' ),
+ array( 'add_table', 'category', 'patch-category.sql' ),
+ array( 'do_category_population' ),
);
}
}
+function do_category_population() {
+ if( update_row_exists( 'populate category' ) ) {
+ echo "...category table already populated.\n";
+ return;
+ }
+ require_once( 'populateCategory.inc' );
+ echo "Populating category table, printing progress markers. ".
+"For large databases, you\n".
+"may want to hit Ctrl-C and do this manually with maintenance/\n".
+"populateCategory.php.\n";
+ populateCategory( '', 10, 0, true );
+ echo "Done populating category table.\n";
+}
+
function
pg_describe_table($table)
{