This is a schema change. It's only a table creation, but the table must be created...
authorAryeh Gregor <simetrical@users.mediawiki.org>
Tue, 18 Mar 2008 00:17:28 +0000 (00:17 +0000)
committerAryeh Gregor <simetrical@users.mediawiki.org>
Tue, 18 Mar 2008 00:17:28 +0000 (00:17 +0000)
* Adds Category and CategoryList classes to represent categories themselves.
* Adds a category table, giving each category a name, ID, and counts of all members, subcats only, and files.
* Adds a maintenance script to populate the category table efficiently.  This script is careful to wait for slaves and should be safe to run on a live database.  The maintenance script's includes file is called by update.php.
* Until the category table is populated, the patch handles weird category table rows gracefully.  It detects whether they're obviously impossible, and if so, it outputs appropriate messages.

12 files changed:
RELEASE-NOTES
includes/Article.php
includes/AutoLoader.php
includes/Category.php [new file with mode: 0644]
includes/CategoryPage.php
includes/LinksUpdate.php
languages/messages/MessagesEn.php
maintenance/archives/patch-category.sql [new file with mode: 0644]
maintenance/populateCategory.inc [new file with mode: 0644]
maintenance/populateCategory.php [new file with mode: 0644]
maintenance/tables.sql
maintenance/updaters.inc

index 5e22643..86cc26c 100644 (file)
@@ -46,6 +46,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
   link on diffs
 * Magic word formatnum can now take raw suffix to undo formatting
 * Add updatelog table to reliably permit updates that don't change the schema
+* Add category table to allow better tracking of category membership counts
+** (bug 1212) Give correct membership counts on the pages of large categories
 
 === Bug fixes in 1.13 ===
 
index 805a5cd..33e21ec 100644 (file)
@@ -2259,12 +2259,20 @@ class Article {
                # Delete restrictions for it
                $dbw->delete( 'page_restrictions', array ( 'pr_page' => $id ), __METHOD__ );
 
+               # Fix category table counts
+               $cats = array();
+               $res = $dbw->select( 'categorylinks', 'cl_to',
+                       array( 'cl_from' => $id ), __METHOD__ );
+               foreach( $res as $row ) {
+                       $cats []= $row->cl_to;
+               }
+               $this->updateCategoryCounts( array(), $cats, $dbw );
+
                # Now that it's safely backed up, delete it
                $dbw->delete( 'page', array( 'page_id' => $id ), __METHOD__);
 
                # If using cascading deletes, we can skip some explicit deletes
                if ( !$dbw->cascadingDeletes() ) {
-
                        $dbw->delete( 'revision', array( 'rev_page' => $id ), __METHOD__ );
 
                        if ($wgUseTrackbacks)
@@ -3340,4 +3348,55 @@ class Article {
                $wgOut->addParserOutput( $parserOutput );
        }
 
+       /**
+        * Update all the appropriate counts in the category table, given that
+        * we've added the categories $added and deleted the categories $deleted.
+        *
+        * @param $added array   The names of categories that were added
+        * @param $deleted array The names of categories that were deleted
+        * @param $dbw Database  Optional database connection to use
+        * @return null
+        */
+       public function updateCategoryCounts( $added, $deleted, $dbw = null ) {
+               $ns = $this->mTitle->getNamespace();
+               if( !$dbw ) {
+                       $dbw = wfGetDB( DB_MASTER );
+               }
+
+               # First make sure the rows exist.  If one of the "deleted" ones didn't
+               # exist, we might legitimately not create it, but it's simpler to just
+               # create it and then give it a negative value, since the value is bogus
+               # anyway.
+               #
+               # Sometimes I wish we had INSERT ... ON DUPLICATE KEY UPDATE.
+               $insertCats = array_merge( $added, $deleted );
+               $insertRows = array();
+               foreach( $insertCats as $cat ) {
+                       $insertRows []= array( 'cat_title' => $cat );
+               }
+               $dbw->insert( 'category', $insertRows, __METHOD__, 'IGNORE' );
+
+               $addFields    = array( 'cat_pages = cat_pages + 1' );
+               $removeFields = array( 'cat_pages = cat_pages - 1' );
+               if( $ns == NS_CATEGORY ) {
+                       $addFields    []= 'cat_subcats = cat_subcats + 1';
+                       $removeFields []= 'cat_subcats = cat_subcats - 1';
+               } elseif( $ns == NS_IMAGE ) {
+                       $addFields    []= 'cat_files = cat_files + 1';
+                       $removeFields []= 'cat_files = cat_files - 1';
+               }
+
+               $dbw->update(
+                       'category',
+                       $addFields,
+                       array( 'cat_title' => $added ),
+                       __METHOD__
+               );
+               $dbw->update(
+                       'category',
+                       $removeFields,
+                       array( 'cat_title' => $deleted ),
+                       __METHOD__
+               );
+       }
 }
index ca583d0..c702586 100644 (file)
@@ -25,7 +25,9 @@ function __autoload($className) {
                'BagOStuff' => 'includes/BagOStuff.php',
                'Block' => 'includes/Block.php',
                'BrokenRedirectsPage' => 'includes/SpecialBrokenRedirects.php',
+               'Category' => 'includes/Category.php',
                'Categoryfinder' => 'includes/Categoryfinder.php',
+               'CategoryList' => 'includes/Category.php',
                'CategoryPage' => 'includes/CategoryPage.php',
                'CategoryViewer' => 'includes/CategoryPage.php',
                'ChangesList' => 'includes/ChangesList.php',
diff --git a/includes/Category.php b/includes/Category.php
new file mode 100644 (file)
index 0000000..98c65a1
--- /dev/null
@@ -0,0 +1,305 @@
+<?php
+/**
+ * Two classes, Category and CategoryList, to deal with categories.  To reduce
+ * code duplication, most of the logic is implemented for lists of categories,
+ * and then single categories are a special case.  We use a separate class for
+ * CategoryList so as to discourage stupid slow memory-hogging stuff like manu-
+ * ally iterating through arrays of Titles and Articles, which we do way too
+ * much, when a smarter class can do stuff all in one query.
+ *
+ * Category(List) objects are immutable, strictly speaking.  If you call me-
+ * thods that change the database, like to refresh link counts, the objects
+ * will be appropriately reinitialized.  Member variables are lazy-initialized.
+ *
+ * TODO: Move some stuff from CategoryPage.php to here, and use that.
+ *
+ * @author Simetrical
+ */
+
+abstract class CategoryListBase {
+       # FIXME: Is storing all member variables as simple arrays a good idea?
+       # Should we use some kind of associative array instead?
+       /** Names of all member categories, normalized to DB-key form */
+       protected $mNames = null;
+       /** IDs of all member categories */
+       protected $mIDs = null;
+       /**
+        * Counts of membership (cat_pages, cat_subcats, cat_files) for all member
+        * categories
+        */
+       protected $mPages = null, $mSubcats = null, $mFiles = null;
+
+       protected function __construct() {}
+
+       /** See CategoryList::newFromNames for details. */
+       protected function setNames( $names ) {
+               if( !is_array( $names ) ) {
+                       throw new MWException( __METHOD__.' passed non-array' );
+               }
+               $this->mNames = array_diff(
+                       array_map(
+                               array( 'CategoryListBase', 'setNamesCallback' ),
+                               $names
+                       ),
+                       array( false )
+               );
+       }
+
+       /**
+        * @param string $name Name of a putative category
+        * @return mixed Normalized name, or false if the name was invalid.
+        */
+       private static function setNamesCallback( $name ) {
+               $title = Title::newFromText( $name );
+               if( !is_object( $title ) ) {
+                       return false;
+               }
+               return $title->getDBKey();
+       }
+
+       /**
+        * Set up all member variables using a database query.
+        * @return bool True on success, false on failure.
+        */
+       protected function initialize() {
+               if( $this->mNames === null && $this->mIDs === null ) {
+                       throw new MWException( __METHOD__.' has both names and IDs null' );
+               }
+               $dbr = wfGetDB( DB_SLAVE );
+               if( $this->mIDs === null ) {
+                       $where = array( 'cat_title' => $this->mNames );
+               } elseif( $this->mNames === null ) {
+                       $where = array( 'cat_id' => $this->mIDs );
+               } else {
+                       # Already initialized
+                       return true;
+               }
+               $res = $dbr->select(
+                       'category',
+                       array( 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats',
+                               'cat_files' ),
+                       $where,
+                       __METHOD__
+               );
+               if( !$res->fetchRow() ) {
+                       # Okay, there were no contents.  Nothing to initialize.
+                       return false;
+               }
+               $res->rewind();
+               $this->mIDs = $this->mNames = $this->mPages = $this->mSubcats =
+               $this->mFiles = array();
+               while( $row = $res->fetchRow() ) {
+                       $this->mIDs     []= $row['cat_id'];
+                       $this->mNames   []= $row['cat_title'];
+                       $this->mPages   []= $row['cat_pages'];
+                       $this->mSubcats []= $row['cat_subcats'];
+                       $this->mFiles   []= $row['cat_files'];
+               }
+               $res->free();
+       }
+}
+
+/** @todo make iterable. */
+class CategoryList extends CategoryListBase {
+       /**
+        * Factory function.  Any provided elements that don't correspond to a cat-
+        * egory that actually exists will be silently dropped.  FIXME: Is this
+        * sane error-handling?
+        *
+        * @param array $names An array of category names.  They need not be norma-
+        *   lized, with spaces replaced by underscores.
+        * @return CategoryList
+        */
+       public static function newFromNames( $names ) {
+               $cat = new self();
+               $cat->setNames( $names );
+               return $cat;
+       }
+
+       /**
+        * Factory function.  Any provided elements that don't correspond to a cat-
+        * egory that actually exists will be silently dropped.  FIXME: Is this
+        * sane error-handling?
+        *
+        * @param array $ids An array of category ids
+        * @return CategoryList
+        */
+       public static function newFromIDs( $ids ) {
+               if( !is_array( $ids ) ) {
+                       throw new MWException( __METHOD__.' passed non-array' );
+               }
+               $cat = new self();
+               $cat->mIds = $ids;
+               return $cat;
+       }
+
+       /** @return array Simple array of DB key names */
+       public function getNames() {
+               $this->initialize();
+               return $this->mNames;
+       }
+       /**
+        * FIXME: Is this a good return type?
+        *
+        * @return array Associative array of DB key name => ID
+        */
+       public function getIDs() {
+               $this->initialize();
+               return array_fill_keys( $this->mNames, $this->mIDs );
+       }
+       /**
+        * FIXME: Is this a good return type?
+        *
+        * @return array Associative array of DB key name => array(pages, subcats,
+        *   files)
+        */
+       public function getCounts() {
+               $this->initialize();
+               $ret = array();
+               foreach( array_keys( $this->mNames ) as $i ) {
+                       $ret[$this->mNames[$i]] = array(
+                               $this->mPages[$i],
+                               $this->mSubcats[$i],
+                               $this->mFiles[$i]
+                       );
+               }
+               return $ret;
+       }
+}
+
+class Category extends CategoryListBase {
+       /**
+        * Factory function.
+        *
+        * @param array $name A category name (no "Category:" prefix).  It need
+        *   not be normalized, with spaces replaced by underscores.
+        * @return mixed Category, or false on a totally invalid name
+        */
+       public static function newFromName( $name ) {
+               $cat = new self();
+               $cat->setNames( array( $name ) );
+               if( count( $cat->mNames ) !== 1 ) {
+                       return false;
+               }
+               return $cat;
+       }
+
+       /**
+        * Factory function.
+        *
+        * @param array $id A category id
+        * @return Category
+        */
+       public static function newFromIDs( $id ) {
+               $cat = new self();
+               $cat->mIDs = array( $id );
+               return $cat;
+       }
+
+       /** @return mixed DB key name, or false on failure */
+       public function getName() { return $this->getX( 'mNames' ); }
+       /** @return mixed Category ID, or false on failure */
+       public function getID() { return $this->getX( 'mIDs' ); }
+       /** @return mixed Total number of member pages, or false on failure */
+       public function getPageCount() { return $this->getX( 'mPages' ); }
+       /** @return mixed Number of subcategories, or false on failure */
+       public function getSubcatCount() { return $this->getX( 'mSubcats' ); }
+       /** @return mixed Number of member files, or false on failure */
+       public function getFileCount() { return $this->getX( 'mFiles' ); }
+       /**
+        * This is not implemented in the base class, because arrays of Titles are
+        * evil.
+        *
+        * @return mixed The Title for this category, or false on failure.
+        */
+       public function getTitle() {
+               if( !$this->initialize() ) {
+                       return false;
+               }
+               # FIXME is there a better way to do this?
+               return Title::newFromText( "Category:{$this->mNames[0]}" );
+       }
+
+       /** Generic accessor */
+       private function getX( $key ) {
+               if( !$this->initialize() ) {
+                       return false;
+               }
+               return $this->{$key}[0];
+       }
+
+       /**
+        * Override the parent class so that we can return false if things muck
+        * up, i.e., the name/ID we got was invalid.  Currently CategoryList si-
+        * lently eats errors so as not to kill the whole array for one bad name.
+        *
+        * @return bool True on success, false on failure.
+        */
+       protected function initialize() {
+               parent::initialize();
+               if( count( $this->mNames ) != 1 || count( $this->mIDs ) != 1 ) {
+                       return false;
+               }
+               return true;
+       }
+
+       /**
+        * Refresh the counts for this category.
+        *
+        * FIXME: If there were some way to do this in MySQL 4 without an UPDATE
+        * for every row, it would be nice to move this to the parent class.
+        *
+        * @return bool True on success, false on failure
+        */
+       public function refreshCounts() {
+               if( wfReadOnly() ) {
+                       return false;
+               }
+               $dbw = wfGetDB( DB_MASTER );
+               $dbw->begin();
+               # Note, we must use names for this, since categorylinks does.
+               if( $this->mNames === null ) {
+                       if( !$this->initialize() ) {
+                               return false;
+                       }
+               } else {
+                       # Let's be sure that the row exists in the table.  We don't need to
+                       # do this if we got the row from the table in initialization!
+                       $dbw->insert(
+                               'category',
+                               array( 'cat_title' => $this->mNames[0] ),
+                               __METHOD__,
+                               'IGNORE'
+                       );
+               }
+
+               $result = $dbw->selectRow(
+                       array( 'categorylinks', 'page' ),
+                       array( 'COUNT(*) AS pages',
+                               'COUNT(IF(page_namespace='.NS_CATEGORY.',1,NULL)) AS subcats',
+                               'COUNT(IF(page_namespace='.NS_IMAGE.',1,NULL)) AS files'
+                       ),
+                       array( 'cl_to' => $this->mNames[0], 'page_id = cl_from' ),
+                       __METHOD__,
+                       'LOCK IN SHARE MODE'
+               );
+               $ret = $dbw->update(
+                       'category',
+                       array(
+                               'cat_pages' => $result->pages,
+                               'cat_subcats' => $result->subcats,
+                               'cat_files' => $result->files
+                       ),
+                       array( 'cat_title' => $this->mNames[0] ),
+                       __METHOD__
+               );
+               $dbw->commit();
+
+               # Now we should update our local counts.
+               $this->mPages   = array( $result->pages );
+               $this->mSubcats = array( $result->subcats );
+               $this->mFiles   = array( $result->files );
+
+               return $ret;
+       }
+}
index 6fbcd3c..315d435 100644 (file)
@@ -70,6 +70,8 @@ class CategoryViewer {
                $children, $children_start_char,
                $showGallery, $gallery,
                $skin;
+       /** Category object for this page */
+       private $cat;
 
        function __construct( $title, $from = '', $until = '' ) {
                global $wgCategoryPagingLimit;
@@ -77,6 +79,7 @@ class CategoryViewer {
                $this->from = $from;
                $this->until = $until;
                $this->limit = $wgCategoryPagingLimit;
+               $this->cat = Category::newFromName( $title->getDBKey() );
        }
        
        /**
@@ -261,12 +264,14 @@ class CategoryViewer {
        function getSubcategorySection() {
                # Don't show subcategories section if there are none.
                $r = '';
-               $c = count( $this->children );
-               if( $c > 0 ) {
+               $rescnt = count( $this->children );
+               $dbcnt = $this->cat->getSubcatCount();
+               $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'subcat' );
+               if( $rescnt > 0 ) {
                        # Showing subcategories
                        $r .= "<div id=\"mw-subcategories\">\n";
                        $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
-                       $r .= wfMsgExt( 'subcategorycount', array( 'parse' ), $c );
+                       $r .= $countmsg;
                        $r .= $this->formatList( $this->children, $this->children_start_char );
                        $r .= "\n</div>";
                }
@@ -277,11 +282,20 @@ class CategoryViewer {
                $ti = htmlspecialchars( $this->title->getText() );
                # Don't show articles section if there are none.
                $r = '';
-               $c = count( $this->articles );
-               if( $c > 0 ) {
+
+               # FIXME, here and in the other two sections: we don't need to bother
+               # with this rigamarole if the entire category contents fit on one page
+               # and have already been retrieved.  We can just use $rescnt in that
+               # case and save a query and some logic.
+               $dbcnt = $this->cat->getPageCount() - $this->cat->getSubcatCount()
+                       - $this->cat->getFileCount();
+               $rescnt = count( $this->articles );
+               $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'article' );
+
+               if( $rescnt > 0 ) {
                        $r = "<div id=\"mw-pages\">\n";
                        $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
-                       $r .= wfMsgExt( 'categoryarticlecount', array( 'parse' ), $c );
+                       $r .= $countmsg;
                        $r .= $this->formatList( $this->articles, $this->articles_start_char );
                        $r .= "\n</div>";
                }
@@ -290,10 +304,13 @@ class CategoryViewer {
 
        function getImageSection() {
                if( $this->showGallery && ! $this->gallery->isEmpty() ) {
+                       $dbcnt = $this->cat->getFileCount();
+                       $rescnt = $this->gallery->count();
+                       $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'file' );
+
                        return "<div id=\"mw-category-media\">\n" .
                        '<h2>' . wfMsg( 'category-media-header', htmlspecialchars($this->title->getText()) ) . "</h2>\n" .
-                       wfMsgExt( 'category-media-count', array( 'parse' ), $this->gallery->count() ) .
-                       $this->gallery->toHTML() . "\n</div>";
+                       $countmsg . $this->gallery->toHTML() . "\n</div>";
                } else {
                        return '';
                }
@@ -440,6 +457,47 @@ class CategoryViewer {
 
                return "($prevLink) ($nextLink)";
        }
+
+       /**
+        * What to do if the category table conflicts with the number of results
+        * returned?  This function says what.  It works the same whether the
+        * things being counted are articles, subcategories, or files.
+        *
+        * Note for grepping: uses the messages category-article-count,
+        * category-article-count-limited, category-subcat-count,
+        * category-subcat-count-limited, category-file-count,
+        * category-file-count-limited.
+        *
+        * @param int $rescnt The number of items returned by our database query.
+        * @param int $dbcnt The number of items according to the category table.
+        * @param string $type 'subcat', 'article', or 'file'
+        * @return string A message giving the number of items, to output to HTML.
+        */
+       private function getCountMessage( $rescnt, $dbcnt, $type ) {
+               # There are three cases:
+               #   1) The category table figure seems sane.  It might be wrong, but
+               #      we can't do anything about it if we don't recalculate it on ev-
+               #      ery category view.
+               #   2) The category table figure isn't sane, like it's smaller than the
+               #      number of actual results, *but* the number of results is less
+               #      than $this->limit and there's no offset.  In this case we still
+               #      know the right figure.
+               #   3) We have no idea.
+               $totalrescnt = count( $this->articles ) + count( $this->children ) +
+                       $this->gallery->count();
+               if($dbcnt == $rescnt || (($totalrescnt == $this->limit || $this->from
+               || $this->until) && $dbcnt > $rescnt)){
+                       # Case 1: seems sane.
+                       $totalcnt = $dbcnt;
+               } elseif($totalrescnt < $this->limit && !$this->from && !$this->until){
+                       # Case 2: not sane, but salvageable.
+                       $totalcnt = $rescnt;
+               } else {
+                       # Case 3: hopeless.  Don't give a total count at all.
+                       return wfMsgExt("category-$type-count-limited", 'parse', $rescnt);
+               }
+               return wfMsgExt( "category-$type-count", 'parse', $rescnt, $totalcnt );
+       }
 }
 
 
index 28dbf5b..efda9c4 100644 (file)
@@ -124,8 +124,11 @@ class LinksUpdate {
                        $this->getCategoryInsertions( $existing ) );
 
                # Invalidate all categories which were added, deleted or changed (set symmetric difference)
-               $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
+               $categoryInserts = array_diff_assoc( $this->mCategories, $existing );
+               $categoryDeletes = array_diff_assoc( $existing, $this->mCategories );
+               $categoryUpdates = $categoryInserts + $categoryDeletes;
                $this->invalidateCategories( $categoryUpdates );
+               $this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
 
                # Page properties
                $existing = $this->getExistingProperties();
@@ -155,7 +158,9 @@ class LinksUpdate {
 
                # Refresh category pages and image description pages
                $existing = $this->getExistingCategories();
-               $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
+               $categoryInserts = array_diff_assoc( $this->mCategories, $existing );
+               $categoryDeletes = array_diff_assoc( $existing, $this->mCategoties );
+               $categoryUpdates = $categoryInserts + $categoryDeletes;
                $existing = $this->getExistingImages();
                $imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing );
 
@@ -167,8 +172,10 @@ class LinksUpdate {
                $this->dumbTableUpdate( 'langlinks',     $this->getInterlangInsertions(),'ll_from' );
                $this->dumbTableUpdate( 'page_props',    $this->getPropertyInsertions(), 'pp_page' );
 
-               # Update the cache of all the category pages and image description pages which were changed
+               # Update the cache of all the category pages and image description
+               # pages which were changed, and fix the category table count
                $this->invalidateCategories( $categoryUpdates );
+               $this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
                $this->invalidateImageDescriptions( $imageUpdates );
 
                # Refresh links of all pages including this page
@@ -261,6 +268,18 @@ class LinksUpdate {
                $this->invalidatePages( NS_CATEGORY, array_keys( $cats ) );
        }
 
+       /**
+        * Update all the appropriate counts in the category table.
+        * @param $added associative array of category name => sort key
+        * @param $deleted associative array of category name => sort key
+        */
+       function updateCategoryCounts( $added, $deleted ) {
+               $a = new Article($this->mTitle);
+               $a->updateCategoryCounts(
+                       array_keys( $added ), array_keys( $deleted ), $this->mDb
+               );
+       }
+
        function invalidateImageDescriptions( $images ) {
                $this->invalidatePages( NS_IMAGE, array_keys( $images ) );
        }
@@ -268,9 +287,9 @@ class LinksUpdate {
        function dumbTableUpdate( $table, $insertions, $fromField ) {
                $this->mDb->delete( $table, array( $fromField => $this->mId ), __METHOD__ );
                if ( count( $insertions ) ) {
-                       # The link array was constructed without FOR UPDATE, so there may be collisions
-                       # This may cause minor link table inconsistencies, which is better than
-                       # crippling the site with lock contention.
+                       # The link array was constructed without FOR UPDATE, so there may
+                       # be collisions.  This may cause minor link table inconsistencies,
+                       # which is better than crippling the site with lock contention.
                        $this->mDb->insert( $table, $insertions, __METHOD__, array( 'IGNORE' ) );
                }
        }
index b651546..4dae549 100644 (file)
@@ -2416,16 +2416,20 @@ All transwiki import actions are logged at the [[Special:Log/import|import log]]
 'nocredits'        => 'There is no credits info available for this page.',
 
 # Spam protection
-'spamprotectiontitle'    => 'Spam protection filter',
-'spamprotectiontext'     => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.',
-'spamprotectionmatch'    => 'The following text is what triggered our spam filter: $1',
-'subcategorycount'       => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.',
-'categoryarticlecount'   => 'There {{PLURAL:$1|is one page|are $1 pages}} in this category.',
-'category-media-count'   => 'There {{PLURAL:$1|is one file|are $1 files}} in this category.',
-'listingcontinuesabbrev' => 'cont.',
-'spambot_username'       => 'MediaWiki spam cleanup',
-'spam_reverting'         => 'Reverting to last version not containing links to $1',
-'spam_blanking'          => 'All revisions contained links to $1, blanking',
+'spamprotectiontitle'            => 'Spam protection filter',
+'spamprotectiontext'             => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.',
+'spamprotectionmatch'            => 'The following text is what triggered our spam filter: $1',
+'subcategorycount'               => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.',
+'category-subcat-count'          => '{{PLURAL:$2|This category has only the following subcategory.|This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}, out of $2 total.}}',
+'category-subcat-count-limited'  => 'This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}.',
+'category-article-count'         => '{{PLURAL:$2|This category contains only the following page.|The following {{PLURAL:$1|page is|$1 pages are}} in this category, out of $2 total.}}',
+'category-article-count-limited' => 'The following {{PLURAL:$1|page is|$1 pages are}} in the current category.',
+'category-media-count'         => '{{PLURAL:$2|This category contains only the following file.|The following {{PLURAL:$1|file is|$1 files are}} in this category, out of $2 total.}}',
+'category-media-count-limited' => 'The following {{PLURAL:$1|file is|$1 files are}} in the current category.',
+'listingcontinuesabbrev'         => 'cont.',
+'spambot_username'               => 'MediaWiki spam cleanup',
+'spam_reverting'                 => 'Reverting to last version not containing links to $1',
+'spam_blanking'                  => 'All revisions contained links to $1, blanking',
 
 # Info page
 'infosubtitle'   => 'Information for page',
diff --git a/maintenance/archives/patch-category.sql b/maintenance/archives/patch-category.sql
new file mode 100644 (file)
index 0000000..416500c
--- /dev/null
@@ -0,0 +1,17 @@
+CREATE TABLE /*$wgDBprefix*/category (
+  cat_id int unsigned NOT NULL auto_increment,
+
+  cat_title varchar(255) binary NOT NULL,
+
+  cat_pages int signed NOT NULL default 0,
+  cat_subcats int signed NOT NULL default 0,
+  cat_files int signed NOT NULL default 0,
+
+  cat_hidden tinyint(1) unsigned NOT NULL default 0,
+  
+  PRIMARY KEY (cat_id),
+  UNIQUE KEY (cat_title),
+
+  KEY (cat_pages)
+) /*$wgDBTableOptions*/;
+
diff --git a/maintenance/populateCategory.inc b/maintenance/populateCategory.inc
new file mode 100644 (file)
index 0000000..f17e5e8
--- /dev/null
@@ -0,0 +1,84 @@
+<?php
+/**
+ * @addtogroup Maintenance
+ * @author Simetrical
+ */
+
+define( 'REPORTING_INTERVAL', 1000 );
+
+function populateCategory( $begin, $maxlag, $throttle, $force ) {
+       $dbw = wfGetDB( DB_MASTER );
+
+       if( !$force ) {
+               $row = $dbw->selectRow(
+                       'updatelog',
+                       '1',
+                       array( 'ul_key' => 'populate category' ),
+                       __FUNCTION__
+               );
+               if( $row ) {
+                       echo "Category table already populated.  Use php ".
+                       "maintenace/populateCategory.php\n--force from the command line ".
+                       "to override.\n";
+                       return true;
+               }
+       }
+
+       $maxlag = intval( $maxlag );
+       $throttle = intval( $throttle );
+       $force = (bool)$force;
+       if( $begin !== '' ) {
+               $where = 'cl_to > '.$dbw->addQuotes( $begin );
+       } else {
+               $where = null;
+       }
+       $i = 0;
+
+       while( true ) {
+               # Find which category to update
+               $row = $dbw->selectRow(
+                       'categorylinks',
+                       'cl_to',
+                       $where,
+                       __FUNCTION__,
+                       array(
+                               'ORDER BY' => 'cl_to'
+                       )
+               );
+               if( !$row ) {
+                       # Done, hopefully.
+                       break;
+               }
+               $name = $row->cl_to;
+               $where = 'cl_to > '.$dbw->addQuotes( $name );
+
+               # Use the row to update the category count
+               $cat = Category::newFromName( $name );
+               if( !is_object( $cat ) ) {
+                       var_dump( $cat );
+                       throw new MWException( "The category named $name is not valid?!" );
+               }
+               $cat->refreshCounts();
+
+               ++$i;
+               if( !($i % REPORTING_INTERVAL) ) {
+                       echo "$name\n";
+                       wfWaitForSlaves( $maxlag );
+               }
+               usleep( $throttle*1000 );
+       }
+
+       if( $dbw->insert(
+                       'updatelog',
+                       array( 'ul_key' => 'populate category' ),
+                       __FUNCTION__,
+                       'IGNORE'
+               )
+       ) {
+               echo "Category population complete.\n";
+               return true;
+       } else {
+               echo "Could not insert category population row.\n";
+               return false;
+       }
+}
diff --git a/maintenance/populateCategory.php b/maintenance/populateCategory.php
new file mode 100644 (file)
index 0000000..2b007bd
--- /dev/null
@@ -0,0 +1,51 @@
+<?php
+/**
+ * @addtogroup Maintenance
+ * @author Simetrical
+ */
+
+$optionsWithArgs = array( 'begin', 'max-slave-lag', 'throttle' );
+
+require_once "commandLine.inc";
+require_once "populateCategory.inc";
+
+if( isset( $options['help'] ) ) {
+       echo <<<TEXT
+This script will populate the category table, added in MediaWiki 1.13.  It will
+print out progress indicators every 1000 categories it adds to the table.  The
+script is perfectly safe to run on large, live wikis, and running it multiple
+times is harmless.  You may want to use the throttling options if it's causing
+too much load; they will not affect correctness.
+
+If the script is stopped and later resumed, you can use the --begin option with
+the last printed progress indicator to pick up where you left off.  This is
+safe, because any newly-added categories before this cutoff will have been
+added after the software update and so will be populated anyway.
+
+When the script has finished, it will make a note of this in the database, and
+will not run again without the --force option.
+
+Usage:
+    php populateCategory.php [--max-slave-lag <seconds>] [--begin <name>]
+[--throttle <seconds>] [--force]
+
+    --begin: Only do categories whose names are alphabetically after the pro-
+vided name.  Default: empty (start from beginning).
+    --max-slave-lag: If slave lag exceeds this many seconds, wait until it
+drops before continuing.  Default: 10.
+    --throttle: Wait this many milliseconds after each category.  Default: 0.
+    --force: Run regardless of whether the database says it's been run already.
+TEXT;
+       exit( 0 );
+}
+
+$defaults = array(
+       'begin' => '',
+       'max-slave-length' => 10,
+       'throttle' => 0,
+       'force' => false
+);
+$options = array_merge( $defaults, $options );
+
+populateCategory( $options['begin'], $options['max-slave-length'],
+       $options['throttle'], $options['force'] );
index 135c28e..111e397 100644 (file)
@@ -486,6 +486,39 @@ CREATE TABLE /*$wgDBprefix*/categorylinks (
 
 ) /*$wgDBTableOptions*/;
 
+-- 
+-- Track all existing categories.  Something is a category if 1) it has an en-
+-- try somewhere in categorylinks, or 2) it once did.  Categories might not
+-- have corresponding pages, so they need to be tracked separately.
+--
+CREATE TABLE /*$wgDBprefix*/category (
+  -- Primary key
+  cat_id int unsigned NOT NULL auto_increment,
+
+  -- Name of the category, in the same form as page_title (with underscores).
+  -- If there is a category page corresponding to this category, by definition,
+  -- it has this name (in the Category namespace).
+  cat_title varchar(255) binary NOT NULL,
+
+  -- The numbers of member pages (including categories and media), subcatego-
+  -- ries, and Image: namespace members, respectively.  These are signed to
+  -- make underflow more obvious.  We make the first number include the second
+  -- two for better sorting: subtracting for display is easy, adding for order-
+  -- ing is not.
+  cat_pages int signed NOT NULL default 0,
+  cat_subcats int signed NOT NULL default 0,
+  cat_files int signed NOT NULL default 0,
+
+  -- Should the category be hidden from article views?
+  cat_hidden tinyint(1) unsigned NOT NULL default 0,
+  
+  PRIMARY KEY (cat_id),
+  UNIQUE KEY (cat_title),
+
+  -- For Special:Mostlinkedcategories
+  KEY (cat_pages)
+) /*$wgDBTableOptions*/;
+
 --
 -- Track links to external URLs
 --
index a1498e9..dc88aa2 100644 (file)
@@ -133,6 +133,8 @@ $wgMysqlUpdates = array(
        array( 'add_field', 'ipblocks',      'ipb_by_text',      'patch-ipb_by_text.sql' ),
        array( 'add_table', 'page_props',                        'patch-page_props.sql' ),
        array( 'add_table', 'updatelog',                         'patch-updatelog.sql' ),
+       array( 'add_table', 'category',                          'patch-category.sql' ),
+       array( 'do_category_population' ),
 );
 
 
@@ -1135,6 +1137,20 @@ function do_restrictions_update() {
        }
 }
 
+function do_category_population() {
+       if( update_row_exists( 'populate category' ) ) {
+               echo "...category table already populated.\n";
+               return;
+       }
+       require_once( 'populateCategory.inc' );
+       echo "Populating category table, printing progress markers.  ".
+"For large databases, you\n".
+"may want to hit Ctrl-C and do this manually with maintenance/\n".
+"populateCategory.php.\n";
+       populateCategory( '', 10, 0, true );
+       echo "Done populating category table.\n";
+}
+
 function
 pg_describe_table($table)
 {