From: Aryeh Gregor Date: Tue, 18 Mar 2008 00:17:28 +0000 (+0000) Subject: This is a schema change. It's only a table creation, but the table must be created... X-Git-Tag: 1.31.0-rc.0~49036 X-Git-Url: http://git.cyclocoop.org/%22%2C%20generer_url_ecrire%28?a=commitdiff_plain;h=80a5874828c2ea32c8b5cee19dac23685924bc13;p=lhc%2Fweb%2Fwiklou.git This is a schema change. It's only a table creation, but the table must be created on Wikimedia servers before this revision goes live. The maintenance script populateCategory.php should be run when convenient. If it's not run, there's only one substantial case where display will be harmed: the page of a category with more than 200 net pages added since the patch goes live will give an erroneously low count. In other cases category pages will just be better-worded, and it will recognize the count in the table is bogus. * Adds Category and CategoryList classes to represent categories themselves. * Adds a category table, giving each category a name, ID, and counts of all members, subcats only, and files. * Adds a maintenance script to populate the category table efficiently. This script is careful to wait for slaves and should be safe to run on a live database. The maintenance script's includes file is called by update.php. * Until the category table is populated, the patch handles weird category table rows gracefully. It detects whether they're obviously impossible, and if so, it outputs appropriate messages. --- diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 5e226435d5..86cc26cc9e 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -46,6 +46,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN link on diffs * Magic word formatnum can now take raw suffix to undo formatting * Add updatelog table to reliably permit updates that don't change the schema +* Add category table to allow better tracking of category membership counts +** (bug 1212) Give correct membership counts on the pages of large categories === Bug fixes in 1.13 === diff --git a/includes/Article.php b/includes/Article.php index 805a5cdaab..33e21ec5ec 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -2259,12 +2259,20 @@ class Article { # Delete restrictions for it $dbw->delete( 'page_restrictions', array ( 'pr_page' => $id ), __METHOD__ ); + # Fix category table counts + $cats = array(); + $res = $dbw->select( 'categorylinks', 'cl_to', + array( 'cl_from' => $id ), __METHOD__ ); + foreach( $res as $row ) { + $cats []= $row->cl_to; + } + $this->updateCategoryCounts( array(), $cats, $dbw ); + # Now that it's safely backed up, delete it $dbw->delete( 'page', array( 'page_id' => $id ), __METHOD__); # If using cascading deletes, we can skip some explicit deletes if ( !$dbw->cascadingDeletes() ) { - $dbw->delete( 'revision', array( 'rev_page' => $id ), __METHOD__ ); if ($wgUseTrackbacks) @@ -3340,4 +3348,55 @@ class Article { $wgOut->addParserOutput( $parserOutput ); } + /** + * Update all the appropriate counts in the category table, given that + * we've added the categories $added and deleted the categories $deleted. + * + * @param $added array The names of categories that were added + * @param $deleted array The names of categories that were deleted + * @param $dbw Database Optional database connection to use + * @return null + */ + public function updateCategoryCounts( $added, $deleted, $dbw = null ) { + $ns = $this->mTitle->getNamespace(); + if( !$dbw ) { + $dbw = wfGetDB( DB_MASTER ); + } + + # First make sure the rows exist. If one of the "deleted" ones didn't + # exist, we might legitimately not create it, but it's simpler to just + # create it and then give it a negative value, since the value is bogus + # anyway. + # + # Sometimes I wish we had INSERT ... ON DUPLICATE KEY UPDATE. + $insertCats = array_merge( $added, $deleted ); + $insertRows = array(); + foreach( $insertCats as $cat ) { + $insertRows []= array( 'cat_title' => $cat ); + } + $dbw->insert( 'category', $insertRows, __METHOD__, 'IGNORE' ); + + $addFields = array( 'cat_pages = cat_pages + 1' ); + $removeFields = array( 'cat_pages = cat_pages - 1' ); + if( $ns == NS_CATEGORY ) { + $addFields []= 'cat_subcats = cat_subcats + 1'; + $removeFields []= 'cat_subcats = cat_subcats - 1'; + } elseif( $ns == NS_IMAGE ) { + $addFields []= 'cat_files = cat_files + 1'; + $removeFields []= 'cat_files = cat_files - 1'; + } + + $dbw->update( + 'category', + $addFields, + array( 'cat_title' => $added ), + __METHOD__ + ); + $dbw->update( + 'category', + $removeFields, + array( 'cat_title' => $deleted ), + __METHOD__ + ); + } } diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index ca583d004a..c702586430 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -25,7 +25,9 @@ function __autoload($className) { 'BagOStuff' => 'includes/BagOStuff.php', 'Block' => 'includes/Block.php', 'BrokenRedirectsPage' => 'includes/SpecialBrokenRedirects.php', + 'Category' => 'includes/Category.php', 'Categoryfinder' => 'includes/Categoryfinder.php', + 'CategoryList' => 'includes/Category.php', 'CategoryPage' => 'includes/CategoryPage.php', 'CategoryViewer' => 'includes/CategoryPage.php', 'ChangesList' => 'includes/ChangesList.php', diff --git a/includes/Category.php b/includes/Category.php new file mode 100644 index 0000000000..98c65a1d58 --- /dev/null +++ b/includes/Category.php @@ -0,0 +1,305 @@ +mNames = array_diff( + array_map( + array( 'CategoryListBase', 'setNamesCallback' ), + $names + ), + array( false ) + ); + } + + /** + * @param string $name Name of a putative category + * @return mixed Normalized name, or false if the name was invalid. + */ + private static function setNamesCallback( $name ) { + $title = Title::newFromText( $name ); + if( !is_object( $title ) ) { + return false; + } + return $title->getDBKey(); + } + + /** + * Set up all member variables using a database query. + * @return bool True on success, false on failure. + */ + protected function initialize() { + if( $this->mNames === null && $this->mIDs === null ) { + throw new MWException( __METHOD__.' has both names and IDs null' ); + } + $dbr = wfGetDB( DB_SLAVE ); + if( $this->mIDs === null ) { + $where = array( 'cat_title' => $this->mNames ); + } elseif( $this->mNames === null ) { + $where = array( 'cat_id' => $this->mIDs ); + } else { + # Already initialized + return true; + } + $res = $dbr->select( + 'category', + array( 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', + 'cat_files' ), + $where, + __METHOD__ + ); + if( !$res->fetchRow() ) { + # Okay, there were no contents. Nothing to initialize. + return false; + } + $res->rewind(); + $this->mIDs = $this->mNames = $this->mPages = $this->mSubcats = + $this->mFiles = array(); + while( $row = $res->fetchRow() ) { + $this->mIDs []= $row['cat_id']; + $this->mNames []= $row['cat_title']; + $this->mPages []= $row['cat_pages']; + $this->mSubcats []= $row['cat_subcats']; + $this->mFiles []= $row['cat_files']; + } + $res->free(); + } +} + +/** @todo make iterable. */ +class CategoryList extends CategoryListBase { + /** + * Factory function. Any provided elements that don't correspond to a cat- + * egory that actually exists will be silently dropped. FIXME: Is this + * sane error-handling? + * + * @param array $names An array of category names. They need not be norma- + * lized, with spaces replaced by underscores. + * @return CategoryList + */ + public static function newFromNames( $names ) { + $cat = new self(); + $cat->setNames( $names ); + return $cat; + } + + /** + * Factory function. Any provided elements that don't correspond to a cat- + * egory that actually exists will be silently dropped. FIXME: Is this + * sane error-handling? + * + * @param array $ids An array of category ids + * @return CategoryList + */ + public static function newFromIDs( $ids ) { + if( !is_array( $ids ) ) { + throw new MWException( __METHOD__.' passed non-array' ); + } + $cat = new self(); + $cat->mIds = $ids; + return $cat; + } + + /** @return array Simple array of DB key names */ + public function getNames() { + $this->initialize(); + return $this->mNames; + } + /** + * FIXME: Is this a good return type? + * + * @return array Associative array of DB key name => ID + */ + public function getIDs() { + $this->initialize(); + return array_fill_keys( $this->mNames, $this->mIDs ); + } + /** + * FIXME: Is this a good return type? + * + * @return array Associative array of DB key name => array(pages, subcats, + * files) + */ + public function getCounts() { + $this->initialize(); + $ret = array(); + foreach( array_keys( $this->mNames ) as $i ) { + $ret[$this->mNames[$i]] = array( + $this->mPages[$i], + $this->mSubcats[$i], + $this->mFiles[$i] + ); + } + return $ret; + } +} + +class Category extends CategoryListBase { + /** + * Factory function. + * + * @param array $name A category name (no "Category:" prefix). It need + * not be normalized, with spaces replaced by underscores. + * @return mixed Category, or false on a totally invalid name + */ + public static function newFromName( $name ) { + $cat = new self(); + $cat->setNames( array( $name ) ); + if( count( $cat->mNames ) !== 1 ) { + return false; + } + return $cat; + } + + /** + * Factory function. + * + * @param array $id A category id + * @return Category + */ + public static function newFromIDs( $id ) { + $cat = new self(); + $cat->mIDs = array( $id ); + return $cat; + } + + /** @return mixed DB key name, or false on failure */ + public function getName() { return $this->getX( 'mNames' ); } + /** @return mixed Category ID, or false on failure */ + public function getID() { return $this->getX( 'mIDs' ); } + /** @return mixed Total number of member pages, or false on failure */ + public function getPageCount() { return $this->getX( 'mPages' ); } + /** @return mixed Number of subcategories, or false on failure */ + public function getSubcatCount() { return $this->getX( 'mSubcats' ); } + /** @return mixed Number of member files, or false on failure */ + public function getFileCount() { return $this->getX( 'mFiles' ); } + /** + * This is not implemented in the base class, because arrays of Titles are + * evil. + * + * @return mixed The Title for this category, or false on failure. + */ + public function getTitle() { + if( !$this->initialize() ) { + return false; + } + # FIXME is there a better way to do this? + return Title::newFromText( "Category:{$this->mNames[0]}" ); + } + + /** Generic accessor */ + private function getX( $key ) { + if( !$this->initialize() ) { + return false; + } + return $this->{$key}[0]; + } + + /** + * Override the parent class so that we can return false if things muck + * up, i.e., the name/ID we got was invalid. Currently CategoryList si- + * lently eats errors so as not to kill the whole array for one bad name. + * + * @return bool True on success, false on failure. + */ + protected function initialize() { + parent::initialize(); + if( count( $this->mNames ) != 1 || count( $this->mIDs ) != 1 ) { + return false; + } + return true; + } + + /** + * Refresh the counts for this category. + * + * FIXME: If there were some way to do this in MySQL 4 without an UPDATE + * for every row, it would be nice to move this to the parent class. + * + * @return bool True on success, false on failure + */ + public function refreshCounts() { + if( wfReadOnly() ) { + return false; + } + $dbw = wfGetDB( DB_MASTER ); + $dbw->begin(); + # Note, we must use names for this, since categorylinks does. + if( $this->mNames === null ) { + if( !$this->initialize() ) { + return false; + } + } else { + # Let's be sure that the row exists in the table. We don't need to + # do this if we got the row from the table in initialization! + $dbw->insert( + 'category', + array( 'cat_title' => $this->mNames[0] ), + __METHOD__, + 'IGNORE' + ); + } + + $result = $dbw->selectRow( + array( 'categorylinks', 'page' ), + array( 'COUNT(*) AS pages', + 'COUNT(IF(page_namespace='.NS_CATEGORY.',1,NULL)) AS subcats', + 'COUNT(IF(page_namespace='.NS_IMAGE.',1,NULL)) AS files' + ), + array( 'cl_to' => $this->mNames[0], 'page_id = cl_from' ), + __METHOD__, + 'LOCK IN SHARE MODE' + ); + $ret = $dbw->update( + 'category', + array( + 'cat_pages' => $result->pages, + 'cat_subcats' => $result->subcats, + 'cat_files' => $result->files + ), + array( 'cat_title' => $this->mNames[0] ), + __METHOD__ + ); + $dbw->commit(); + + # Now we should update our local counts. + $this->mPages = array( $result->pages ); + $this->mSubcats = array( $result->subcats ); + $this->mFiles = array( $result->files ); + + return $ret; + } +} diff --git a/includes/CategoryPage.php b/includes/CategoryPage.php index 6fbcd3c17c..315d435a75 100644 --- a/includes/CategoryPage.php +++ b/includes/CategoryPage.php @@ -70,6 +70,8 @@ class CategoryViewer { $children, $children_start_char, $showGallery, $gallery, $skin; + /** Category object for this page */ + private $cat; function __construct( $title, $from = '', $until = '' ) { global $wgCategoryPagingLimit; @@ -77,6 +79,7 @@ class CategoryViewer { $this->from = $from; $this->until = $until; $this->limit = $wgCategoryPagingLimit; + $this->cat = Category::newFromName( $title->getDBKey() ); } /** @@ -261,12 +264,14 @@ class CategoryViewer { function getSubcategorySection() { # Don't show subcategories section if there are none. $r = ''; - $c = count( $this->children ); - if( $c > 0 ) { + $rescnt = count( $this->children ); + $dbcnt = $this->cat->getSubcatCount(); + $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'subcat' ); + if( $rescnt > 0 ) { # Showing subcategories $r .= "
\n"; $r .= '

' . wfMsg( 'subcategories' ) . "

\n"; - $r .= wfMsgExt( 'subcategorycount', array( 'parse' ), $c ); + $r .= $countmsg; $r .= $this->formatList( $this->children, $this->children_start_char ); $r .= "\n
"; } @@ -277,11 +282,20 @@ class CategoryViewer { $ti = htmlspecialchars( $this->title->getText() ); # Don't show articles section if there are none. $r = ''; - $c = count( $this->articles ); - if( $c > 0 ) { + + # FIXME, here and in the other two sections: we don't need to bother + # with this rigamarole if the entire category contents fit on one page + # and have already been retrieved. We can just use $rescnt in that + # case and save a query and some logic. + $dbcnt = $this->cat->getPageCount() - $this->cat->getSubcatCount() + - $this->cat->getFileCount(); + $rescnt = count( $this->articles ); + $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'article' ); + + if( $rescnt > 0 ) { $r = "
\n"; $r .= '

' . wfMsg( 'category_header', $ti ) . "

\n"; - $r .= wfMsgExt( 'categoryarticlecount', array( 'parse' ), $c ); + $r .= $countmsg; $r .= $this->formatList( $this->articles, $this->articles_start_char ); $r .= "\n
"; } @@ -290,10 +304,13 @@ class CategoryViewer { function getImageSection() { if( $this->showGallery && ! $this->gallery->isEmpty() ) { + $dbcnt = $this->cat->getFileCount(); + $rescnt = $this->gallery->count(); + $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'file' ); + return "
\n" . '

' . wfMsg( 'category-media-header', htmlspecialchars($this->title->getText()) ) . "

\n" . - wfMsgExt( 'category-media-count', array( 'parse' ), $this->gallery->count() ) . - $this->gallery->toHTML() . "\n
"; + $countmsg . $this->gallery->toHTML() . "\n"; } else { return ''; } @@ -440,6 +457,47 @@ class CategoryViewer { return "($prevLink) ($nextLink)"; } + + /** + * What to do if the category table conflicts with the number of results + * returned? This function says what. It works the same whether the + * things being counted are articles, subcategories, or files. + * + * Note for grepping: uses the messages category-article-count, + * category-article-count-limited, category-subcat-count, + * category-subcat-count-limited, category-file-count, + * category-file-count-limited. + * + * @param int $rescnt The number of items returned by our database query. + * @param int $dbcnt The number of items according to the category table. + * @param string $type 'subcat', 'article', or 'file' + * @return string A message giving the number of items, to output to HTML. + */ + private function getCountMessage( $rescnt, $dbcnt, $type ) { + # There are three cases: + # 1) The category table figure seems sane. It might be wrong, but + # we can't do anything about it if we don't recalculate it on ev- + # ery category view. + # 2) The category table figure isn't sane, like it's smaller than the + # number of actual results, *but* the number of results is less + # than $this->limit and there's no offset. In this case we still + # know the right figure. + # 3) We have no idea. + $totalrescnt = count( $this->articles ) + count( $this->children ) + + $this->gallery->count(); + if($dbcnt == $rescnt || (($totalrescnt == $this->limit || $this->from + || $this->until) && $dbcnt > $rescnt)){ + # Case 1: seems sane. + $totalcnt = $dbcnt; + } elseif($totalrescnt < $this->limit && !$this->from && !$this->until){ + # Case 2: not sane, but salvageable. + $totalcnt = $rescnt; + } else { + # Case 3: hopeless. Don't give a total count at all. + return wfMsgExt("category-$type-count-limited", 'parse', $rescnt); + } + return wfMsgExt( "category-$type-count", 'parse', $rescnt, $totalcnt ); + } } diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index 28dbf5bfc5..efda9c46b3 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -124,8 +124,11 @@ class LinksUpdate { $this->getCategoryInsertions( $existing ) ); # Invalidate all categories which were added, deleted or changed (set symmetric difference) - $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing ); + $categoryInserts = array_diff_assoc( $this->mCategories, $existing ); + $categoryDeletes = array_diff_assoc( $existing, $this->mCategories ); + $categoryUpdates = $categoryInserts + $categoryDeletes; $this->invalidateCategories( $categoryUpdates ); + $this->updateCategoryCounts( $categoryInserts, $categoryDeletes ); # Page properties $existing = $this->getExistingProperties(); @@ -155,7 +158,9 @@ class LinksUpdate { # Refresh category pages and image description pages $existing = $this->getExistingCategories(); - $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing ); + $categoryInserts = array_diff_assoc( $this->mCategories, $existing ); + $categoryDeletes = array_diff_assoc( $existing, $this->mCategoties ); + $categoryUpdates = $categoryInserts + $categoryDeletes; $existing = $this->getExistingImages(); $imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing ); @@ -167,8 +172,10 @@ class LinksUpdate { $this->dumbTableUpdate( 'langlinks', $this->getInterlangInsertions(),'ll_from' ); $this->dumbTableUpdate( 'page_props', $this->getPropertyInsertions(), 'pp_page' ); - # Update the cache of all the category pages and image description pages which were changed + # Update the cache of all the category pages and image description + # pages which were changed, and fix the category table count $this->invalidateCategories( $categoryUpdates ); + $this->updateCategoryCounts( $categoryInserts, $categoryDeletes ); $this->invalidateImageDescriptions( $imageUpdates ); # Refresh links of all pages including this page @@ -261,6 +268,18 @@ class LinksUpdate { $this->invalidatePages( NS_CATEGORY, array_keys( $cats ) ); } + /** + * Update all the appropriate counts in the category table. + * @param $added associative array of category name => sort key + * @param $deleted associative array of category name => sort key + */ + function updateCategoryCounts( $added, $deleted ) { + $a = new Article($this->mTitle); + $a->updateCategoryCounts( + array_keys( $added ), array_keys( $deleted ), $this->mDb + ); + } + function invalidateImageDescriptions( $images ) { $this->invalidatePages( NS_IMAGE, array_keys( $images ) ); } @@ -268,9 +287,9 @@ class LinksUpdate { function dumbTableUpdate( $table, $insertions, $fromField ) { $this->mDb->delete( $table, array( $fromField => $this->mId ), __METHOD__ ); if ( count( $insertions ) ) { - # The link array was constructed without FOR UPDATE, so there may be collisions - # This may cause minor link table inconsistencies, which is better than - # crippling the site with lock contention. + # The link array was constructed without FOR UPDATE, so there may + # be collisions. This may cause minor link table inconsistencies, + # which is better than crippling the site with lock contention. $this->mDb->insert( $table, $insertions, __METHOD__, array( 'IGNORE' ) ); } } diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php index b651546cb1..4dae5498e0 100644 --- a/languages/messages/MessagesEn.php +++ b/languages/messages/MessagesEn.php @@ -2416,16 +2416,20 @@ All transwiki import actions are logged at the [[Special:Log/import|import log]] 'nocredits' => 'There is no credits info available for this page.', # Spam protection -'spamprotectiontitle' => 'Spam protection filter', -'spamprotectiontext' => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.', -'spamprotectionmatch' => 'The following text is what triggered our spam filter: $1', -'subcategorycount' => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.', -'categoryarticlecount' => 'There {{PLURAL:$1|is one page|are $1 pages}} in this category.', -'category-media-count' => 'There {{PLURAL:$1|is one file|are $1 files}} in this category.', -'listingcontinuesabbrev' => 'cont.', -'spambot_username' => 'MediaWiki spam cleanup', -'spam_reverting' => 'Reverting to last version not containing links to $1', -'spam_blanking' => 'All revisions contained links to $1, blanking', +'spamprotectiontitle' => 'Spam protection filter', +'spamprotectiontext' => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.', +'spamprotectionmatch' => 'The following text is what triggered our spam filter: $1', +'subcategorycount' => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.', +'category-subcat-count' => '{{PLURAL:$2|This category has only the following subcategory.|This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}, out of $2 total.}}', +'category-subcat-count-limited' => 'This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}.', +'category-article-count' => '{{PLURAL:$2|This category contains only the following page.|The following {{PLURAL:$1|page is|$1 pages are}} in this category, out of $2 total.}}', +'category-article-count-limited' => 'The following {{PLURAL:$1|page is|$1 pages are}} in the current category.', +'category-media-count' => '{{PLURAL:$2|This category contains only the following file.|The following {{PLURAL:$1|file is|$1 files are}} in this category, out of $2 total.}}', +'category-media-count-limited' => 'The following {{PLURAL:$1|file is|$1 files are}} in the current category.', +'listingcontinuesabbrev' => 'cont.', +'spambot_username' => 'MediaWiki spam cleanup', +'spam_reverting' => 'Reverting to last version not containing links to $1', +'spam_blanking' => 'All revisions contained links to $1, blanking', # Info page 'infosubtitle' => 'Information for page', diff --git a/maintenance/archives/patch-category.sql b/maintenance/archives/patch-category.sql new file mode 100644 index 0000000000..416500c3cc --- /dev/null +++ b/maintenance/archives/patch-category.sql @@ -0,0 +1,17 @@ +CREATE TABLE /*$wgDBprefix*/category ( + cat_id int unsigned NOT NULL auto_increment, + + cat_title varchar(255) binary NOT NULL, + + cat_pages int signed NOT NULL default 0, + cat_subcats int signed NOT NULL default 0, + cat_files int signed NOT NULL default 0, + + cat_hidden tinyint(1) unsigned NOT NULL default 0, + + PRIMARY KEY (cat_id), + UNIQUE KEY (cat_title), + + KEY (cat_pages) +) /*$wgDBTableOptions*/; + diff --git a/maintenance/populateCategory.inc b/maintenance/populateCategory.inc new file mode 100644 index 0000000000..f17e5e827f --- /dev/null +++ b/maintenance/populateCategory.inc @@ -0,0 +1,84 @@ +selectRow( + 'updatelog', + '1', + array( 'ul_key' => 'populate category' ), + __FUNCTION__ + ); + if( $row ) { + echo "Category table already populated. Use php ". + "maintenace/populateCategory.php\n--force from the command line ". + "to override.\n"; + return true; + } + } + + $maxlag = intval( $maxlag ); + $throttle = intval( $throttle ); + $force = (bool)$force; + if( $begin !== '' ) { + $where = 'cl_to > '.$dbw->addQuotes( $begin ); + } else { + $where = null; + } + $i = 0; + + while( true ) { + # Find which category to update + $row = $dbw->selectRow( + 'categorylinks', + 'cl_to', + $where, + __FUNCTION__, + array( + 'ORDER BY' => 'cl_to' + ) + ); + if( !$row ) { + # Done, hopefully. + break; + } + $name = $row->cl_to; + $where = 'cl_to > '.$dbw->addQuotes( $name ); + + # Use the row to update the category count + $cat = Category::newFromName( $name ); + if( !is_object( $cat ) ) { + var_dump( $cat ); + throw new MWException( "The category named $name is not valid?!" ); + } + $cat->refreshCounts(); + + ++$i; + if( !($i % REPORTING_INTERVAL) ) { + echo "$name\n"; + wfWaitForSlaves( $maxlag ); + } + usleep( $throttle*1000 ); + } + + if( $dbw->insert( + 'updatelog', + array( 'ul_key' => 'populate category' ), + __FUNCTION__, + 'IGNORE' + ) + ) { + echo "Category population complete.\n"; + return true; + } else { + echo "Could not insert category population row.\n"; + return false; + } +} diff --git a/maintenance/populateCategory.php b/maintenance/populateCategory.php new file mode 100644 index 0000000000..2b007bde35 --- /dev/null +++ b/maintenance/populateCategory.php @@ -0,0 +1,51 @@ +] [--begin ] +[--throttle ] [--force] + + --begin: Only do categories whose names are alphabetically after the pro- +vided name. Default: empty (start from beginning). + --max-slave-lag: If slave lag exceeds this many seconds, wait until it +drops before continuing. Default: 10. + --throttle: Wait this many milliseconds after each category. Default: 0. + --force: Run regardless of whether the database says it's been run already. +TEXT; + exit( 0 ); +} + +$defaults = array( + 'begin' => '', + 'max-slave-length' => 10, + 'throttle' => 0, + 'force' => false +); +$options = array_merge( $defaults, $options ); + +populateCategory( $options['begin'], $options['max-slave-length'], + $options['throttle'], $options['force'] ); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 135c28e27d..111e3975ce 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -486,6 +486,39 @@ CREATE TABLE /*$wgDBprefix*/categorylinks ( ) /*$wgDBTableOptions*/; +-- +-- Track all existing categories. Something is a category if 1) it has an en- +-- try somewhere in categorylinks, or 2) it once did. Categories might not +-- have corresponding pages, so they need to be tracked separately. +-- +CREATE TABLE /*$wgDBprefix*/category ( + -- Primary key + cat_id int unsigned NOT NULL auto_increment, + + -- Name of the category, in the same form as page_title (with underscores). + -- If there is a category page corresponding to this category, by definition, + -- it has this name (in the Category namespace). + cat_title varchar(255) binary NOT NULL, + + -- The numbers of member pages (including categories and media), subcatego- + -- ries, and Image: namespace members, respectively. These are signed to + -- make underflow more obvious. We make the first number include the second + -- two for better sorting: subtracting for display is easy, adding for order- + -- ing is not. + cat_pages int signed NOT NULL default 0, + cat_subcats int signed NOT NULL default 0, + cat_files int signed NOT NULL default 0, + + -- Should the category be hidden from article views? + cat_hidden tinyint(1) unsigned NOT NULL default 0, + + PRIMARY KEY (cat_id), + UNIQUE KEY (cat_title), + + -- For Special:Mostlinkedcategories + KEY (cat_pages) +) /*$wgDBTableOptions*/; + -- -- Track links to external URLs -- diff --git a/maintenance/updaters.inc b/maintenance/updaters.inc index a1498e9145..dc88aa2bf6 100644 --- a/maintenance/updaters.inc +++ b/maintenance/updaters.inc @@ -133,6 +133,8 @@ $wgMysqlUpdates = array( array( 'add_field', 'ipblocks', 'ipb_by_text', 'patch-ipb_by_text.sql' ), array( 'add_table', 'page_props', 'patch-page_props.sql' ), array( 'add_table', 'updatelog', 'patch-updatelog.sql' ), + array( 'add_table', 'category', 'patch-category.sql' ), + array( 'do_category_population' ), ); @@ -1135,6 +1137,20 @@ function do_restrictions_update() { } } +function do_category_population() { + if( update_row_exists( 'populate category' ) ) { + echo "...category table already populated.\n"; + return; + } + require_once( 'populateCategory.inc' ); + echo "Populating category table, printing progress markers. ". +"For large databases, you\n". +"may want to hit Ctrl-C and do this manually with maintenance/\n". +"populateCategory.php.\n"; + populateCategory( '', 10, 0, true ); + echo "Done populating category table.\n"; +} + function pg_describe_table($table) {