From 421948c2380fc44d35fcb08fd60174cf98adc276 Mon Sep 17 00:00:00 2001 From: Alexandre Emsenhuber Date: Sat, 14 May 2011 17:11:32 +0000 Subject: [PATCH] Rewrote the article counting code and related: * (bug 26033, bug 24754) Added $wgArticleCountMethod to have a more flexible way to define which method to use to define if a page is an article or not and deprecated $wgUseCommaCount. There is now a new 'any' method to count any article that is in a content namespace and not a redirect. * (bug 11868) If using links to count articles, Article::isCountable() will now use the ParserOutput to check if there's a link instead of checking for the "[[" string. Changed Article::isCountable() to take a stdObject or false for the first parameters. If false is passed, the result will be based on the current article's state (i.e. database). The only call outside of the Article class is in DeleteAction (including extensions). * Removed this horror of Article::$mGoodAdjustment and Article::$mTotalAdjustment, replaced by the new $created parameter on Article::editUpdates(); simplified Article::createUpdates() * Updated Import.php to take advantage of the new parameter and make a single call to Article::editUpdates() --- RELEASE-NOTES-1.19 | 6 +++ includes/Article.php | 83 ++++++++++++++++++++----------- includes/DefaultSettings.php | 32 +++++++++--- includes/Import.php | 26 +++++----- includes/Setup.php | 4 ++ includes/SiteStats.php | 12 +++-- includes/actions/DeleteAction.php | 2 +- 7 files changed, 111 insertions(+), 54 deletions(-) diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index 2ebd04edc3..2874e5bf1d 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -16,11 +16,15 @@ production. * $wgAllowUserSkin (deprecated in 1.16) has now been removed * $wgExtraRandompageSQL (deprecated in 1.16) has now been removed * LogReader and LogViewer classes (deprecated in 1.14) have now been removed +* (bug 26033) Added $wgArticleCountMethod to select the method to use to say + whether a page is an article or not. $wgUseCommaCount is now deprecated. === New features in 1.19 === * (bug 28916) A way to to toggle mw.config legacy globals settings from LocalSettings.php has been created by introducing $wgLegacyJavaScriptGlobals. * (bug 28503) Support for ircs:// URL protocols +* (bug 26033) It is now possible to count all non-redirect pages in content + namespaces as articles === Bug fixes in 1.19 === * (bug 10154) Don't allow user to specify days beyond $wgRCMaxAge. @@ -38,6 +42,8 @@ used in Tiff files. * (bug 27864) Transcluding {{Special:Prefix}} with empty prefix now lists all pages. * (bug 18803) JPEG2000 images can no longer be uploaded as JPEG image. +* (bug 11868) If using links to count articles, the checking will now be based + on the real presence of an internal link instead of the "[[" string === API changes in 1.19 === * (bug 27790) add query type for querymodules to action=paraminfo diff --git a/includes/Article.php b/includes/Article.php index 4f1d5466fa..1db124e1e0 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -27,7 +27,6 @@ class Article { var $mContentLoaded = false; // !< var $mCounter = -1; // !< Not loaded var $mDataLoaded = false; // !< - var $mGoodAdjustment = 0; // !< var $mIsRedirect = false; // !< var $mLatest = false; // !< var $mOldId; // !< @@ -61,7 +60,6 @@ class Article { var $mTimestamp = ''; // !< var $mTitle; // !< Title object - var $mTotalAdjustment = 0; // !< var $mTouched = '19700101000000'; // !< /** @@ -260,7 +258,6 @@ class Article { $this->mRedirectTarget = null; # Title object if set $this->mLastRevision = null; # Latest revision $this->mTimestamp = ''; - $this->mGoodAdjustment = $this->mTotalAdjustment = 0; $this->mTouched = '19700101000000'; $this->mIsRedirect = false; $this->mRevIdFetched = 0; @@ -644,15 +641,43 @@ class Article { * Determine whether a page would be suitable for being counted as an * article in the site_stats table based on the title & its content * - * @param $text String: text to analyze - * @return bool + * @param $editInfo Object or false: object returned by prepareTextForEdit(), + * if false, the current database state will be used + * @return Boolean */ - public function isCountable( $text ) { - global $wgUseCommaCount; + public function isCountable( $editInfo = false ) { + global $wgArticleCountMethod; + + if ( !$this->mTitle->isContentPage() ) { + return false; + } + + $text = $editInfo ? $editInfo->pst : false; - $token = $wgUseCommaCount ? ',' : '[['; + if ( $this->isRedirect( $text ) ) { + return false; + } - return $this->mTitle->isContentPage() && !$this->isRedirect( $text ) && in_string( $token, $text ); + switch ( $wgArticleCountMethod ) { + case 'any': + return true; + case 'comma': + if ( $text === false ) { + $text = $this->getRawText(); + } + return in_string( ',', $text ); + case 'link': + if ( $editInfo ) { + // ParserOutput::getLinks() is a 2D array of page links, so + // to be really correct we would need to recurse in the array + // but the main array should only have items in it if there are + // links. + return (bool)count( $editInfo->output->getLinks() ); + } else { + return (bool)wfGetDB( DB_SLAVE )->selectField( 'pagelinks', 1, + array( 'pl_from' => $this->getId() ), __METHOD__ ); + } + } } /** @@ -2067,10 +2092,6 @@ class Article { $changed = ( strcmp( $text, $oldtext ) != 0 ); if ( $changed ) { - $this->mGoodAdjustment = (int)$this->isCountable( $text ) - - (int)$this->isCountable( $oldtext ); - $this->mTotalAdjustment = 0; - if ( !$this->mLatest ) { # Article gone missing wfDebug( __METHOD__ . ": EDIT_UPDATE specified but article doesn't exist\n" ); @@ -2165,12 +2186,6 @@ class Article { # Create new article $status->value['new'] = true; - # Set statistics members - # We work out if it's countable after PST to avoid counter drift - # when articles are created with {{subst:}} - $this->mGoodAdjustment = (int)$this->isCountable( $text ); - $this->mTotalAdjustment = 1; - $dbw->begin(); # Add the page record; stake our claim on this title! @@ -2226,7 +2241,7 @@ class Article { $dbw->commit(); # Update links, etc. - $this->editUpdates( $text, $summary, $isminor, $now, $revisionId, true, $user ); + $this->editUpdates( $text, $summary, $isminor, $now, $revisionId, true, $user, true ); # Clear caches Article::onArticleCreate( $this->mTitle ); @@ -3064,7 +3079,7 @@ class Article { return false; } - $u = new SiteStatsUpdate( 0, 1, - (int)$this->isCountable( $this->getRawText() ), -1 ); + $u = new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(), -1 ); array_push( $wgDeferredUpdateList, $u ); // Bitfields to further suppress the content @@ -3511,8 +3526,11 @@ class Article { * @param $newid Integer: rev_id value of the new revision * @param $changed Boolean: Whether or not the content actually changed * @param $user User object: User doing the edit + * @param $created Boolean: Whether the edit created the page */ - public function editUpdates( $text, $summary, $minoredit, $timestamp_of_pagechange, $newid, $changed = true, User $user = null ) { + public function editUpdates( $text, $summary, $minoredit, $timestamp_of_pagechange, $newid, + $changed = true, User $user = null, $created = false ) + { global $wgDeferredUpdateList, $wgUser, $wgEnableParserCache; wfProfileIn( __METHOD__ ); @@ -3564,10 +3582,19 @@ class Article { return; } - $u = new SiteStatsUpdate( 0, 1, $this->mGoodAdjustment, $this->mTotalAdjustment ); - array_push( $wgDeferredUpdateList, $u ); - $u = new SearchUpdate( $id, $title, $text ); - array_push( $wgDeferredUpdateList, $u ); + if ( !$changed ) { + $good = 0; + $total = 0; + } elseif ( $created ) { + $good = (int)$this->isCountable( $editInfo ); + $total = 1; + } else { + $good = (int)$this->isCountable( $editInfo ) - (int)$this->isCountable(); + $total = 0; + } + + $wgDeferredUpdateList[] = new SiteStatsUpdate( 0, 1, $good, $total ); + $wgDeferredUpdateList[] = new SearchUpdate( $id, $title, $text ); # If this is another user's talk page, update newtalk # Don't do this if $changed = false otherwise some idiot can null-edit a @@ -3608,10 +3635,8 @@ class Article { * anymore. */ public function createUpdates( $rev ) { - $this->mGoodAdjustment = $this->isCountable( $rev->getText() ); - $this->mTotalAdjustment = 1; $this->editUpdates( $rev->getText(), $rev->getComment(), - $rev->isMinor(), wfTimestamp(), $rev->getId(), true ); + $rev->isMinor(), wfTimestamp(), $rev->getId(), true, null, true ); } /** diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index f1de742b27..0dffbc4d0f 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2974,14 +2974,30 @@ $wgTranscludeCacheExpiry = 3600; */ /** - * Under which condition should a page in the main namespace be counted - * as a valid article? If $wgUseCommaCount is set to true, it will be - * counted if it contains at least one comma. If it is set to false - * (default), it will only be counted if it contains at least one [[wiki - * link]]. See http://www.mediawiki.org/wiki/Manual:Article_count - * - * Retroactively changing this variable will not affect - * the existing count (cf. maintenance/recount.sql). + * Method used to determine if a page in a content namespace should be counted + * as a valid article. + * + * Redirect pages will never be counted as valid articles. + * + * This variable can have the following values: + * - 'any': all pages as considered as valid articles + * - 'comma': the page must contain a comma to be considered valid + * - 'link': the page must contain a [[wiki link]] to be considered valid + * - null: the value will be set at run time depending on $wgUseCommaCount: + * if $wgUseCommaCount is false, it will be 'link', if it is true + * it will be 'comma' + * + * See also See http://www.mediawiki.org/wiki/Manual:Article_count + * + * Retroactively changing this variable will not affect the existing count, + * to update it, you will need to run the maintenance/updateArticleCount.php + * script. + */ +$wgArticleCountMethod = null; + +/** + * Backward compatibility setting, will set $wgArticleCountMethod if it is null. + * @deprecated in 1.19; use $wgArticleCountMethod instead */ $wgUseCommaCount = false; diff --git a/includes/Import.php b/includes/Import.php index 1c3bbd9a61..757b3d2f8b 100644 --- a/includes/Import.php +++ b/includes/Import.php @@ -1027,25 +1027,25 @@ class WikiRevision { $tempTitle = $GLOBALS['wgTitle']; $GLOBALS['wgTitle'] = $this->title; - if( $created ) { + if ( $created ) { wfDebug( __METHOD__ . ": running onArticleCreate\n" ); Article::onArticleCreate( $this->title ); - - wfDebug( __METHOD__ . ": running create updates\n" ); - $article->createUpdates( $revision ); - } elseif( $changed ) { wfDebug( __METHOD__ . ": running onArticleEdit\n" ); Article::onArticleEdit( $this->title ); - - wfDebug( __METHOD__ . ": running edit updates\n" ); - $article->editUpdates( - $this->getText(), - $this->getComment(), - $this->minor, - $this->timestamp, - $revId ); } + + wfDebug( __METHOD__ . ": running updates\n" ); + $article->editUpdates( + $this->getText(), + $this->getComment(), + $this->minor, + $this->timestamp, + $revId, + true, + null, + $created ); + $GLOBALS['wgTitle'] = $tempTitle; return true; diff --git a/includes/Setup.php b/includes/Setup.php index 36a617215b..e3be061c85 100644 --- a/includes/Setup.php +++ b/includes/Setup.php @@ -293,6 +293,10 @@ if ( !$wgHtml5Version && $wgHtml5 && $wgAllowRdfaAttributes ) { # Blacklisted file extensions shouldn't appear on the "allowed" list $wgFileExtensions = array_diff ( $wgFileExtensions, $wgFileBlacklist ); +if ( $wgArticleCountMethod === null ) { + $wgArticleCountMethod = $wgUseCommaCount ? 'comma' : 'link'; +} + if ( $wgInvalidateCacheOnLocalSettingsChange ) { $wgCacheEpoch = max( $wgCacheEpoch, gmdate( 'YmdHis', @filemtime( "$IP/LocalSettings.php" ) ) ); } diff --git a/includes/SiteStats.php b/includes/SiteStats.php index 9914a3b343..974c62dbc1 100644 --- a/includes/SiteStats.php +++ b/includes/SiteStats.php @@ -285,18 +285,24 @@ class SiteStatsInit { * @return Integer */ public function articles() { - global $wgUseCommaCount; + global $wgArticleCountMethod; $tables = array( 'page' ); $conds = array( 'page_namespace' => MWNamespace::getContentNamespaces(), 'page_is_redirect' => 0, - 'page_len > 0' ); - if ( !$wgUseCommaCount ) { + if ( $wgArticleCountMethod == 'link' ) { $tables[] = 'pagelinks'; $conds[] = 'pl_from=page_id'; + } elseif ( $wgArticleCountMethod == 'comma' ) { + // To make a correct check for this, we would need, for each page, + // to load the text, maybe uncompress it, maybe decode it and then + // check if there's one comma. + // But one thing we are sure is that if the page is empty, it can't + // contain a comma :) + $conds[] = 'page_len > 0'; } $this->mArticles = $this->db->selectField( $tables, 'COUNT(DISTINCT page_id)', diff --git a/includes/actions/DeleteAction.php b/includes/actions/DeleteAction.php index 41ac4becdb..87b3c1c1a9 100644 --- a/includes/actions/DeleteAction.php +++ b/includes/actions/DeleteAction.php @@ -245,7 +245,7 @@ class DeleteAction extends Action { return false; } - $updates = new SiteStatsUpdate( 0, 1, - (int)$page->isCountable( $page->getRawText() ), -1 ); + $updates = new SiteStatsUpdate( 0, 1, - (int)$page->isCountable(), -1 ); array_push( $wgDeferredUpdateList, $updates ); // Bitfields to further suppress the content -- 2.20.1