Patrol overhaul phase 1: Remove rcid parameters
authorMarius Hoch <hoo@online.de>
Sat, 29 Dec 2012 02:53:18 +0000 (03:53 +0100)
committerTim Starling <tstarling@wikimedia.org>
Sat, 25 May 2013 10:04:01 +0000 (12:04 +0200)
I've changed the logic in Article::showPatrolFooter to be able
to fetch the recent changes id and to only show the patrol link
in case the change hasn't yet been patrolled.

In case recentchanges patrolling is enabled this will try to
create a patrol link for the revision the user is currently
viewing. If only new page patrolling is enabled it tries to
create a patrol link for the first revision of the page.

Furthermore I've removed the passing around of &rcid parameters
within MediaWiki as those had several issues (some even security
related) and were only a workaround to protect the DB from some
queries, which is no longer needed.

This has already been partly implemented in a different manner in
r45778 but had to be reverted in r46542 due to performance issues.
This version shouldn't cause such issues as I'm only adding one or
two indexed database queries per page view.

I've written this new version of the patch with mostly
performance in mind and even tested the database queries it uses
against the replicated databases of enwiki on the toolserver. I'm
pretty sure this can't be implemented any faster without creating
a new index on the recentchanges table.

As I was on it I've implemented RecentChange::isInRCLifespan which
checks whether the given timestamp is new enough to may have a RC
row. That way we can avoid some DB queries for timestamps which are
older than the max RC age.

Fixes bugs:
(bug 15936) New page's patrol button should always be visible
(bug 35810) ! N pages non-patrol-able
(bug 36641) Patrol page link shows on non-existent revs

Change-Id: I1e24733cafbfdc51b7a5a9a1c1baf948e760fe1a

includes/Article.php
includes/ChangesList.php
includes/RecentChange.php
includes/diff/DifferenceEngine.php
includes/specials/SpecialNewpages.php

index a0d4438..9f54a52 100644 (file)
@@ -1037,8 +1037,7 @@ class Article implements Page {
                        $this->getContext()->getOutput()->addWikiMsg( 'anontalkpagetext' );
                }
 
-               # If we have been passed an &rcid= parameter, we want to give the user a
-               # chance to mark this new article as patrolled.
+               // Show a footer allowing the user to patrol the shown revision or page if possible
                $this->showPatrolFooter();
 
                wfRunHooks( 'ArticleViewFooter', array( $this ) );
@@ -1053,15 +1052,118 @@ class Article implements Page {
         * OutputPage::preventClickjacking() and load mediawiki.page.patrol.ajax.
         */
        public function showPatrolFooter() {
+               global $wgUseRCPatrol, $wgUseNPPatrol, $wgRCMaxAge;
+
                $request = $this->getContext()->getRequest();
                $outputPage = $this->getContext()->getOutput();
                $user = $this->getContext()->getUser();
-               $rcid = $request->getVal( 'rcid' );
+               $cache = wfGetMainCache();
+
+               // Conditions to potentially patrol the current revision
+               // patrolPage is set in case we want to patrol the first
+               // revision and not the current one (set in Special:NewPages)
+               $useRCPatrol = $wgUseRCPatrol && !$request->getBool( 'patrolpage' );
+
+               if ( !$this->getTitle()->quickUserCan( 'patrol', $user ) || ( !$wgUseNPPatrol && !$wgUseRCPatrol ) ) {
+                       // Patrolling is fully disabled or the user isn't allowed to
+                       return;
+               }
+
+               wfProfileIn( __METHOD__ );
+
+               if ( $useRCPatrol ) {
+                       // Check for cached results
+                       if ( $cache->get( wfMemcKey( 'NotPatrollableRevId', $this->getRevIdFetched() ) ) ) {
+                               wfProfileOut( __METHOD__ );
+                               return;
+                       }
+
+                       // We make use of a little index trick over here:
+                       // First we get the timestamp of the last revision and then
+                       // we look up the RC row by that as the timestamp is indexed
+                       // and usually very few rows exist for one timestamp
+                       // (While several thousand can exists for a single page)
+                       if ( !$this->mRevision ) {
+                               $this->mRevision = Revision::newFromId( $this->getRevIdFetched() );
+                       }
+
+                       if ( !$this->mRevision || !RecentChange::isInRCLifespan( $this->mRevision->getTimestamp(), 21600 )  ) {
+                               // The revision is more than 6 hours older than the Max RC age
+                               // no need to torture the DB any further (6h because the RC might not be cleaned out regularly)
+                               wfProfileOut( __METHOD__ );
+                               return;
+                       }
+                       $rc = RecentChange::newFromConds(
+                               array(
+                                       'rc_this_oldid' => $this->getRevIdFetched(),
+                                       'rc_timestamp' => $this->mRevision->getTimestamp(),
+                                       'rc_cur_id' => $this->getTitle()->getArticleID(),
+                                       'rc_patrolled' => 0
+                               ),
+                               __METHOD__,
+                               array( 'USE INDEX' => 'rc_timestamp' )
+                       );
+               } else {
+                       // RC patrol is disabled so we have to patrol the first
+                       // revision (new page patrol) in case it's in the RC table.
+                       // To achieve this we get the timestamp of the oldest revison
+                       // the revision table holds for the given page. Then we look
+                       // whether it's within the RC lifespan and if it is, we try
+                       // to get the recentchanges row belonging to that entry
+                       // (with rc_new = 1).
+
+                       // Check for cached results
+                       if ( $cache->get( wfMemcKey( 'NotPatrollablePage', $this->getTitle()->getArticleID() ) ) ) {
+                               wfProfileOut( __METHOD__ );
+                               return;
+                       }
+
+                       $dbr = wfGetDB( DB_SLAVE );
+                       $oldestRevisionTimestamp = $dbr->selectField(
+                               'revision',
+                               'MIN( rev_timestamp )',
+                               array( 'rev_page' => $this->getTitle()->getArticleID() ),
+                               __METHOD__
+                       );
+
+                       if ( !$oldestRevisionTimestamp || !RecentChange::isInRCLifespan( $oldestRevisionTimestamp, 21600 ) ) {
+                               // We either didn't find the oldest revision for the given page
+                               // or it's to old for the RC table (with 6h tolerance)
+                               wfProfileOut( __METHOD__ );
+                               return;
+                       }
+
+                       $rc = RecentChange::newFromConds(
+                               array(
+                                       'rc_new' => 1,
+                                       'rc_timestamp' => $oldestRevisionTimestamp,
+                                       'rc_namespace' => $this->getTitle()->getNamespace(),
+                                       'rc_cur_id' => $this->getTitle()->getArticleID(),
+                                       'rc_patrolled' => 0
+                               ),
+                               __METHOD__,
+                               array( 'USE INDEX' => 'new_name_timestamp' )
+                       );
+               }
+
+               wfProfileOut( __METHOD__ );
+
+               if ( !$rc ) {
+                       // No RC entry around
+
+                       // Cache the information we gathered above in case we can't patrol
+                       // Don't cache in case we can patrol as this could change
+                       if( $useRCPatrol ) {
+                               $cache->set( wfMemcKey( 'NotPatrollableRevId', $this->getRevIdFetched() ), '1', $wgRCMaxAge );
+                       } else {
+                               $cache->set( wfMemcKey( 'NotPatrollablePage', $this->getTitle()->getArticleID() ), '1', $wgRCMaxAge );
+                       }
 
-               if ( !$rcid || !$this->getTitle()->quickUserCan( 'patrol', $user ) ) {
                        return;
                }
 
+               $rcid = $rc->getAttribute( 'rc_id' );
+
                $token = $user->getEditToken( $rcid );
 
                $outputPage->preventClickjacking();
index 1b6b396..796b92a 100644 (file)
@@ -334,10 +334,6 @@ class ChangesList extends ContextSource {
                                'oldid' => $rc->mAttribs['rc_last_oldid']
                        );
 
-                       if ( $unpatrolled ) {
-                               $query['rcid'] = $rc->mAttribs['rc_id'];
-                       };
-
                        $diffLink = Linker::linkKnown(
                                $rc->getTitle(),
                                $this->message['diff'],
@@ -370,10 +366,6 @@ class ChangesList extends ContextSource {
                # patrolled yet, we need to give users a way to do so
                $params = array();
 
-               if ( $unpatrolled && $rc->mAttribs['rc_type'] == RC_NEW ) {
-                       $params['rcid'] = $rc->mAttribs['rc_id'];
-               }
-
                $articlelink = Linker::linkKnown(
                        $rc->getTitle(),
                        null,
@@ -752,8 +744,7 @@ class EnhancedChangesList extends ChangesList {
                if ( $type == RC_MOVE || $type == RC_MOVE_OVER_REDIRECT ) {
                // New unpatrolled pages
                } elseif ( $rc->unpatrolled && $type == RC_NEW ) {
-                       $clink = Linker::linkKnown( $rc->getTitle(), null, array(),
-                               array( 'rcid' => $rc->mAttribs['rc_id'] ) );
+                       $clink = Linker::linkKnown( $rc->getTitle() );
                // Log entries
                } elseif ( $type == RC_LOG ) {
                        if ( $logType ) {
@@ -789,14 +780,9 @@ class EnhancedChangesList extends ChangesList {
                # called too many times (50% of CPU time on RecentChanges!).
                $thisOldid = $rc->mAttribs['rc_this_oldid'];
                $lastOldid = $rc->mAttribs['rc_last_oldid'];
-               if ( $rc->unpatrolled ) {
-                       $rcIdQuery = array( 'rcid' => $rc->mAttribs['rc_id'] );
-               } else {
-                       $rcIdQuery = array();
-               }
+
                $querycur = $curIdEq + array( 'diff' => '0', 'oldid' => $thisOldid );
-               $querydiff = $curIdEq + array( 'diff' => $thisOldid, 'oldid' =>
-                       $lastOldid ) + $rcIdQuery;
+               $querydiff = $curIdEq + array( 'diff' => $thisOldid, 'oldid' => $lastOldid );
 
                if ( !$showdifflinks ) {
                        $curLink = $this->message['cur'];
@@ -823,7 +809,7 @@ class EnhancedChangesList extends ChangesList {
                        $lastLink = $this->message['last'];
                } else {
                        $lastLink = Linker::linkKnown( $rc->getTitle(), $this->message['last'],
-                               array(), $curIdEq + array( 'diff' => $thisOldid, 'oldid' => $lastOldid ) + $rcIdQuery );
+                               array(), $curIdEq + array( 'diff' => $thisOldid, 'oldid' => $lastOldid ) );
                }
 
                # Make user links
@@ -1088,9 +1074,6 @@ class EnhancedChangesList extends ChangesList {
                        } elseif ( !ChangesList::userCan( $rcObj, Revision::DELETED_TEXT, $this->getUser() ) ) {
                                $link = '<span class="history-deleted">' . $rcObj->timestamp . '</span> ';
                        } else {
-                               if ( $rcObj->unpatrolled && $type == RC_NEW ) {
-                                       $params['rcid'] = $rcObj->mAttribs['rc_id'];
-                               }
 
                                $link = Linker::linkKnown(
                                                $rcObj->getTitle(),
index b5d4a1c..953d9e9 100644 (file)
@@ -122,11 +122,12 @@ class RecentChange {
         *
         * @param array $conds of conditions
         * @param $fname Mixed: override the method name in profiling/logs
+        * @param $options Array Query options
         * @return RecentChange
         */
-       public static function newFromConds( $conds, $fname = __METHOD__ ) {
+       public static function newFromConds( $conds, $fname = __METHOD__, $options = array() ) {
                $dbr = wfGetDB( DB_SLAVE );
-               $row = $dbr->selectRow( 'recentchanges', self::selectFields(), $conds, $fname );
+               $row = $dbr->selectRow( 'recentchanges', self::selectFields(), $conds, $fname, $options );
                if ( $row !== false ) {
                        return self::newFromRow( $row );
                } else {
@@ -409,6 +410,9 @@ class RecentChange {
                        ),
                        __METHOD__
                );
+               // Invalidate the page cache after the page has been patrolled
+               // to make sure that the Patrol link isn't visible any longer!
+               $this->getTitle()->invalidateCache();
                return $dbw->affectedRows();
        }
 
@@ -862,4 +866,18 @@ class RecentChange {
                }
                return $ip;
        }
+
+       /**
+        * Check whether the given timestamp is new enough to have a RC row with a given tolerance
+        * as the recentchanges table might not be cleared out regularly (so older entries might exist)
+        * or rows which will be deleted soon shouldn't be included.
+        *
+        * @param $timestamp mixed MWTimestamp compatible timestamp
+        * @param $tolerance integer Tolerance in seconds
+        * @return bool
+        */
+       public static function isInRCLifespan( $timestamp, $tolerance = 0 ) {
+               global $wgRCMaxAge;
+               return wfTimestamp( TS_UNIX, $timestamp ) > time() - $tolerance - $wgRCMaxAge;
+       }
 }
index 4ee5014..c551107 100644 (file)
@@ -48,7 +48,6 @@ class DifferenceEngine extends ContextSource {
         * @var Title
         */
        var $mOldPage, $mNewPage;
-       var $mRcidMarkPatrolled;
 
        /**
         * @var Revision
@@ -80,8 +79,8 @@ class DifferenceEngine extends ContextSource {
         * Constructor
         * @param $context IContextSource context to use, anything else will be ignored
         * @param $old Integer old ID we want to show and diff with.
-        * @param string $new either 'prev' or 'next'.
-        * @param $rcid Integer ??? FIXME (default 0)
+        * @param $new String either 'prev' or 'next'.
+        * @param $rcid Integer Deprecated, no longer used!
         * @param $refreshCache boolean If set, refreshes the diff cache
         * @param $unhide boolean If set, allow viewing deleted revs
         */
@@ -96,7 +95,6 @@ class DifferenceEngine extends ContextSource {
 
                $this->mOldid = $old;
                $this->mNewid = $new;
-               $this->mRcidMarkPatrolled = intval( $rcid );  # force it to be an integer
                $this->mRefreshCache = $refreshCache;
                $this->unhide = $unhide;
        }
@@ -412,38 +410,39 @@ class DifferenceEngine extends ContextSource {
         * @return String
         */
        protected function markPatrolledLink() {
-               global $wgUseRCPatrol;
+               global $wgUseRCPatrol, $wgRCMaxAge;
+               $cache = wfGetMainCache();
 
                if ( $this->mMarkPatrolledLink === null ) {
                        // Prepare a change patrol link, if applicable
-                       if ( $wgUseRCPatrol && $this->mNewPage->quickUserCan( 'patrol', $this->getUser() ) ) {
-                               // If we've been given an explicit change identifier, use it; saves time
-                               if ( $this->mRcidMarkPatrolled ) {
-                                       $rcid = $this->mRcidMarkPatrolled;
-                                       $rc = RecentChange::newFromId( $rcid );
-                                       // Already patrolled?
-                                       $rcid = is_object( $rc ) && !$rc->getAttribute( 'rc_patrolled' ) ? $rcid : 0;
+                       if (
+                               // Is patrolling enabled and the user allowed to?
+                               $wgUseRCPatrol && $this->mNewPage->quickUserCan( 'patrol', $this->getUser() ) &&
+                               // Only do this if the revision isn't more than 6 hours older
+                               // than the Max RC age (6h because the RC might not be cleaned out regularly)
+                               RecentChange::isInRCLifespan( $this->mNewRev->getTimestamp(), 21600 ) &&
+                               // Maybe the result is cached
+                               !$cache->get( wfMemcKey( 'NotPatrollableRevId', $this->mNewid ) )
+                       ) {
+                               // Look for an unpatrolled change corresponding to this diff
+
+                               $db = wfGetDB( DB_SLAVE );
+                               $change = RecentChange::newFromConds(
+                                       array(
+                                               'rc_timestamp' => $db->timestamp( $this->mNewRev->getTimestamp() ),
+                                               'rc_this_oldid' => $this->mNewid,
+                                               'rc_last_oldid' => $this->mOldid,
+                                               'rc_patrolled' => 0
+                                       ),
+                                       __METHOD__,
+                                       array( 'USE INDEX' => 'rc_timestamp' )
+                               );
+
+                               if ( $change ) {
+                                       $rcid = $change->getAttribute( 'rc_id' );
                                } else {
-                                       // Look for an unpatrolled change corresponding to this diff
-                                       $db = wfGetDB( DB_SLAVE );
-                                       $change = RecentChange::newFromConds(
-                                               array(
-                                               // Redundant user,timestamp condition so we can use the existing index
-                                                       'rc_user_text' => $this->mNewRev->getRawUserText(),
-                                                       'rc_timestamp' => $db->timestamp( $this->mNewRev->getTimestamp() ),
-                                                       'rc_this_oldid' => $this->mNewid,
-                                                       'rc_last_oldid' => $this->mOldid,
-                                                       'rc_patrolled' => 0
-                                               ),
-                                               __METHOD__
-                                       );
-                                       if ( $change instanceof RecentChange ) {
-                                               $rcid = $change->mAttribs['rc_id'];
-                                               $this->mRcidMarkPatrolled = $rcid;
-                                       } else {
-                                               // None found
-                                               $rcid = 0;
-                                       }
+                                       // None found
+                                       $rcid = 0;
                                }
                                // Build the link
                                if ( $rcid ) {
@@ -462,6 +461,7 @@ class DifferenceEngine extends ContextSource {
                                                )
                                        ) . ']</span>';
                                } else {
+                                       $cache->set( wfMemcKey( 'NotPatrollableRevId', $this->mNewid ), '1', $wgRCMaxAge );
                                        $this->mMarkPatrolledLink = '';
                                }
                        } else {
index fc6151f..ae04061 100644 (file)
@@ -334,8 +334,11 @@ class SpecialNewpages extends IncludableSpecialPage {
 
                $query = array( 'redirect' => 'no' );
 
-               if ( $this->patrollable( $result ) ) {
-                       $query['rcid'] = $result->rc_id;
+               if( $this->patrollable( $result ) ) {
+                       // Tell Article.php that we want to patrol the first revision
+                       // and not the current one. Has effect if both recentchages and new page
+                       // patrolling are enabled, we set it everytime for link consistency though.
+                       $query['patrolpage'] = 1;
                }
 
                // Linker::linkKnown() uses 'known' and 'noclasses' options.