From: daniel Date: Tue, 8 May 2012 15:09:30 +0000 (+0200) Subject: Generalizing LinksUpdate to allow extensions to add arbitrary update handlers. X-Git-Tag: 1.31.0-rc.0~23616^2 X-Git-Url: http://git.cyclocoop.org/%28?a=commitdiff_plain;h=a12ce17c6e7c7a4d16c57f848d98292f949cc4df;p=lhc%2Fweb%2Fwiklou.git Generalizing LinksUpdate to allow extensions to add arbitrary update handlers. This supercedes I6d03bf2a, using better names for the new classes and incorporating the changes requested by Aaron. This change introduces the base class SecondaryDataUpdate to be used for any updates that need to be applied when a page is changed or deleted. Until now, this was done by the LinksUpdate class for updates and WikiPage::doDeletionUpdates upon deletion. This patch uses a list of SecondaryDataUpdates in both cases. This allows extensions (e.g. via the ContentHandler facility, once that is in) to easily specify what needs to be done when a page is updated or deleted in order to keep any secondary data stores (such as link tables) in sync. Note that limited transactional logic is also introduced, so SecondaryDataUpdate can be implemented to only commit their changes if all updates were performed sucessfully. Patch Set 2: fixing some coding style issues mentioned by Nikerabbit. Patch Set 4: some stuff I kept from the old LinksUpdate class needs cleanup, but might break extensions when changed. Marking as todo for now. Patch Set 5: fixed misnamed member in LinksDeletionUpdate (thanks Aaron). Change-Id: Ibe3e88fadd8c1d4063cf13bb6972f2a23569a73f --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 6710796a19..d22e17dde8 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -144,6 +144,7 @@ $wgAutoloadLocalClasses = array( 'Linker' => 'includes/Linker.php', 'LinkFilter' => 'includes/LinkFilter.php', 'LinksUpdate' => 'includes/LinksUpdate.php', + 'LinksDeletionUpdate' => 'includes/LinksUpdate.php', 'LocalisationCache' => 'includes/LocalisationCache.php', 'LocalisationCache_BulkLoad' => 'includes/LocalisationCache.php', 'MagicWord' => 'includes/MagicWord.php', @@ -195,6 +196,8 @@ $wgAutoloadLocalClasses = array( 'RevisionList' => 'includes/RevisionList.php', 'RSSFeed' => 'includes/Feed.php', 'Sanitizer' => 'includes/Sanitizer.php', + 'DataUpdate' => 'includes/DataUpdate.php', + 'SqlDataUpdate' => 'includes/SqlDataUpdate.php', 'ScopedPHPTimeout' => 'includes/ScopedPHPTimeout.php', 'SiteConfiguration' => 'includes/SiteConfiguration.php', 'SiteStats' => 'includes/SiteStats.php', diff --git a/includes/DataUpdate.php b/includes/DataUpdate.php new file mode 100644 index 0000000000..07e8223fd8 --- /dev/null +++ b/includes/DataUpdate.php @@ -0,0 +1,115 @@ +beginTransaction(); + $open_transactions[] = $update; + } + + // do work + foreach ( $updates as $update ) { + $update->doUpdate(); + } + + // commit transactions + while ( count( $open_transactions ) > 0 ) { + $trans = array_pop( $open_transactions ); + $trans->commitTransaction(); + } + } catch ( Exception $ex ) { + $exception = $ex; + wfDebug( "Caught exception, will rethrow after rollback: " . $ex->getMessage() ); + } + + // rollback remaining transactions + while ( count( $open_transactions ) > 0 ) { + $trans = array_pop( $open_transactions ); + $trans->rollbackTransaction(); + } + + if ( $exception ) { + throw $exception; // rethrow after cleanup + } + } + +} diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index 716e7d8072..236d7e43eb 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -19,12 +19,11 @@ * * @todo document (e.g. one-sentence top-level class description). */ -class LinksUpdate { +class LinksUpdate extends SqlDataUpdate { - /**@{{ - * @private - */ - var $mId, //!< Page ID of the article linked from + // @todo: make members protected, but make sure extensions don't break + + public $mId, //!< Page ID of the article linked from $mTitle, //!< Title object of the article linked from $mParserOutput, //!< Parser output $mLinks, //!< Map of title strings to IDs for the links in the document @@ -37,7 +36,6 @@ class LinksUpdate { $mDb, //!< Database connection reference $mOptions, //!< SELECT options to be used (array) $mRecursive; //!< Whether to queue jobs for recursive updates - /**@}}*/ /** * Constructor @@ -47,19 +45,18 @@ class LinksUpdate { * @param $recursive Boolean: queue jobs for recursive updates? */ function __construct( $title, $parserOutput, $recursive = true ) { - global $wgAntiLockFlags; + parent::__construct( ); - if ( $wgAntiLockFlags & ALF_NO_LINK_LOCK ) { - $this->mOptions = array(); - } else { - $this->mOptions = array( 'FOR UPDATE' ); + if ( !( $title instanceof Title ) ) { + throw new MWException( "The calling convention to LinksUpdate::LinksUpdate() has changed. " . + "Please see Article::editUpdates() for an invocation example.\n" ); } - $this->mDb = wfGetDB( DB_MASTER ); - if ( !is_object( $title ) ) { + if ( !( $parserOutput instanceof ParserOutput ) ) { throw new MWException( "The calling convention to LinksUpdate::__construct() has changed. " . "Please see WikiPage::doEditUpdates() for an invocation example.\n" ); } + $this->mTitle = $title; $this->mId = $title->getArticleID(); @@ -253,51 +250,6 @@ class LinksUpdate { wfProfileOut( __METHOD__ ); } - /** - * Invalidate the cache of a list of pages from a single namespace - * - * @param $namespace Integer - * @param $dbkeys Array - */ - function invalidatePages( $namespace, $dbkeys ) { - if ( !count( $dbkeys ) ) { - return; - } - - /** - * Determine which pages need to be updated - * This is necessary to prevent the job queue from smashing the DB with - * large numbers of concurrent invalidations of the same page - */ - $now = $this->mDb->timestamp(); - $ids = array(); - $res = $this->mDb->select( 'page', array( 'page_id' ), - array( - 'page_namespace' => $namespace, - 'page_title IN (' . $this->mDb->makeList( $dbkeys ) . ')', - 'page_touched < ' . $this->mDb->addQuotes( $now ) - ), __METHOD__ - ); - foreach ( $res as $row ) { - $ids[] = $row->page_id; - } - if ( !count( $ids ) ) { - return; - } - - /** - * Do the update - * We still need the page_touched condition, in case the row has changed since - * the non-locking select above. - */ - $this->mDb->update( 'page', array( 'page_touched' => $now ), - array( - 'page_id IN (' . $this->mDb->makeList( $ids ) . ')', - 'page_touched < ' . $this->mDb->addQuotes( $now ) - ), __METHOD__ - ); - } - /** * @param $cats */ @@ -849,3 +801,74 @@ class LinksUpdate { } } } + +/** + * Update object handling the cleanup of links tables after a page was deleted. + **/ +class LinksDeletionUpdate extends SqlDataUpdate { + + protected $mPage; //!< WikiPage the wikipage that was deleted + + /** + * Constructor + * + * @param $title Title of the page we're updating + * @param $parserOutput ParserOutput: output from a full parse of this page + * @param $recursive Boolean: queue jobs for recursive updates? + */ + function __construct( WikiPage $page ) { + parent::__construct( ); + + $this->mPage = $page; + } + + /** + * Do some database updates after deletion + */ + public function doUpdate() { + $title = $this->mPage->getTitle(); + $id = $this->mPage->getId(); + + # Delete restrictions for it + $this->mDb->delete( 'page_restrictions', array ( 'pr_page' => $id ), __METHOD__ ); + + # Fix category table counts + $cats = array(); + $res = $this->mDb->select( 'categorylinks', 'cl_to', array( 'cl_from' => $id ), __METHOD__ ); + + foreach ( $res as $row ) { + $cats [] = $row->cl_to; + } + + $this->mPage->updateCategoryCounts( array(), $cats ); + + # If using cascading deletes, we can skip some explicit deletes + if ( !$this->mDb->cascadingDeletes() ) { + $this->mDb->delete( 'revision', array( 'rev_page' => $id ), __METHOD__ ); + + # Delete outgoing links + $this->mDb->delete( 'pagelinks', array( 'pl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'imagelinks', array( 'il_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'categorylinks', array( 'cl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'templatelinks', array( 'tl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'externallinks', array( 'el_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'langlinks', array( 'll_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'iwlinks', array( 'iwl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'page_props', array( 'pp_page' => $id ), __METHOD__ ); + } + + # If using cleanup triggers, we can skip some manual deletes + if ( !$this->mDb->cleanupTriggers() ) { + # Clean up recentchanges entries... + $this->mDb->delete( 'recentchanges', + array( 'rc_type != ' . RC_LOG, + 'rc_namespace' => $title->getNamespace(), + 'rc_title' => $title->getDBkey() ), + __METHOD__ ); + $this->mDb->delete( 'recentchanges', + array( 'rc_type != ' . RC_LOG, 'rc_cur_id' => $id ), + __METHOD__ ); + } + } +} \ No newline at end of file diff --git a/includes/SqlDataUpdate.php b/includes/SqlDataUpdate.php new file mode 100644 index 0000000000..f0204dfef2 --- /dev/null +++ b/includes/SqlDataUpdate.php @@ -0,0 +1,127 @@ +mOptions = array(); + } else { + $this->mOptions = array( 'FOR UPDATE' ); + } + + // @todo: get connection only when it's needed? make sure that doesn't break anything, especially transactions! + $this->mDb = wfGetDB( DB_MASTER ); + $this->mHasTransaction = false; + } + + /** + * Begin a database transaction. + * + * Because nested transactions are not supportred by the Database class, this implementation + * checkes Database::trxLevel() and only opens a transaction if none is yet active. + */ + public function beginTransaction() { + // NOTE: nested transactions are not supported, only start a transaction if none is open + if ( $this->mDb->trxLevel() === 0 ) { + $this->mDb->begin( get_class( $this ) . '::beginTransaction' ); + $this->mHasTransaction = true; + } + } + + /** + * Commit the database transaction started via beginTransaction (if any). + */ + public function commitTransaction() { + if ( $this->mHasTransaction ) { + $this->mDb->commit( get_class( $this ) . '::commitTransaction' ); + } + } + + /** + * Abort the database transaction started via beginTransaction (if any). + */ + public function abortTransaction() { + if ( $this->mHasTransaction ) { + $this->mDb->rollback( get_class( $this ) . '::abortTransaction' ); + } + } + + /** + * Invalidate the cache of a list of pages from a single namespace. + * This is intended for use by subclasses. + * + * @param $namespace Integer + * @param $dbkeys Array + */ + protected function invalidatePages( $namespace, Array $dbkeys ) { + if ( !count( $dbkeys ) ) { + return; + } + + /** + * Determine which pages need to be updated + * This is necessary to prevent the job queue from smashing the DB with + * large numbers of concurrent invalidations of the same page + */ + $now = $this->mDb->timestamp(); + $ids = array(); + $res = $this->mDb->select( 'page', array( 'page_id' ), + array( + 'page_namespace' => $namespace, + 'page_title' => $dbkeys, + 'page_touched < ' . $this->mDb->addQuotes( $now ) + ), __METHOD__ + ); + foreach ( $res as $row ) { + $ids[] = $row->page_id; + } + if ( !count( $ids ) ) { + return; + } + + /** + * Do the update + * We still need the page_touched condition, in case the row has changed since + * the non-locking select above. + */ + $this->mDb->update( 'page', array( 'page_touched' => $now ), + array( + 'page_id' => $ids, + 'page_touched < ' . $this->mDb->addQuotes( $now ) + ), __METHOD__ + ); + } + +} diff --git a/includes/WikiPage.php b/includes/WikiPage.php index 93e60e81b0..df610f1337 100644 --- a/includes/WikiPage.php +++ b/includes/WikiPage.php @@ -1679,9 +1679,9 @@ class WikiPage extends Page { $parserCache->save( $editInfo->output, $this, $editInfo->popts ); } - # Update the links tables - $u = new LinksUpdate( $this->mTitle, $editInfo->output ); - $u->doUpdate(); + # Update the links tables and other secondary data + $updates = $editInfo->output->getSecondaryDataUpdates( $this->mTitle ); + DataUpdate::runUpdates( $updates ); wfRunHooks( 'ArticleEditUpdates', array( &$this, &$editInfo, $options['changed'] ) ); @@ -2143,7 +2143,21 @@ class WikiPage extends Page { return WikiPage::DELETE_NO_REVISIONS; } - $this->doDeleteUpdates( $id ); + # update site status + DeferredUpdates::addUpdate( new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(), -1 ) ); + + # remove secondary indexes, etc + $updates = $this->getDeletionUpdates( ); + DataUpdate::runUpdates( $updates ); + + # Clear caches + WikiPage::onArticleDelete( $this->mTitle ); + + # Reset this object + $this->clear(); + + # Clear the cached article id so the interface doesn't act like we exist + $this->mTitle->resetArticleID( 0 ); # Log the deletion, if the page was suppressed, log it at Oversight instead $logtype = $suppress ? 'suppress' : 'delete'; @@ -2163,68 +2177,6 @@ class WikiPage extends Page { return WikiPage::DELETE_SUCCESS; } - /** - * Do some database updates after deletion - * - * @param $id Int: page_id value of the page being deleted - */ - public function doDeleteUpdates( $id ) { - DeferredUpdates::addUpdate( new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(), -1 ) ); - - $dbw = wfGetDB( DB_MASTER ); - - # Delete restrictions for it - $dbw->delete( 'page_restrictions', array ( 'pr_page' => $id ), __METHOD__ ); - - # Fix category table counts - $cats = array(); - $res = $dbw->select( 'categorylinks', 'cl_to', array( 'cl_from' => $id ), __METHOD__ ); - - foreach ( $res as $row ) { - $cats [] = $row->cl_to; - } - - $this->updateCategoryCounts( array(), $cats ); - - # If using cascading deletes, we can skip some explicit deletes - if ( !$dbw->cascadingDeletes() ) { - $dbw->delete( 'revision', array( 'rev_page' => $id ), __METHOD__ ); - - # Delete outgoing links - $dbw->delete( 'pagelinks', array( 'pl_from' => $id ), __METHOD__ ); - $dbw->delete( 'imagelinks', array( 'il_from' => $id ), __METHOD__ ); - $dbw->delete( 'categorylinks', array( 'cl_from' => $id ), __METHOD__ ); - $dbw->delete( 'templatelinks', array( 'tl_from' => $id ), __METHOD__ ); - $dbw->delete( 'externallinks', array( 'el_from' => $id ), __METHOD__ ); - $dbw->delete( 'langlinks', array( 'll_from' => $id ), __METHOD__ ); - $dbw->delete( 'iwlinks', array( 'iwl_from' => $id ), __METHOD__ ); - $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); - $dbw->delete( 'page_props', array( 'pp_page' => $id ), __METHOD__ ); - } - - # If using cleanup triggers, we can skip some manual deletes - if ( !$dbw->cleanupTriggers() ) { - # Clean up recentchanges entries... - $dbw->delete( 'recentchanges', - array( 'rc_type != ' . RC_LOG, - 'rc_namespace' => $this->mTitle->getNamespace(), - 'rc_title' => $this->mTitle->getDBkey() ), - __METHOD__ ); - $dbw->delete( 'recentchanges', - array( 'rc_type != ' . RC_LOG, 'rc_cur_id' => $id ), - __METHOD__ ); - } - - # Clear caches - self::onArticleDelete( $this->mTitle ); - - # Reset this object - $this->clear(); - - # Clear the cached article id so the interface doesn't act like we exist - $this->mTitle->resetArticleID( 0 ); - } - /** * Roll back the most recent consecutive set of edits to a page * from the same user; fails if there are no eligible edits to @@ -2793,6 +2745,7 @@ class WikiPage extends Page { if ( count( $templates_diff ) > 0 ) { # Whee, link updates time. + # Note: we are only interested in links here. We don't need to get other DataUpdate items from the parser output. $u = new LinksUpdate( $this->mTitle, $parserOutput, false ); $u->doUpdate(); } @@ -2921,6 +2874,16 @@ class WikiPage extends Page { global $wgUser; return $this->isParserCacheUsed( ParserOptions::newFromUser( $wgUser ), $oldid ); } + + public function getDeletionUpdates() { + $updates = array( + new LinksDeletionUpdate( $this ), + ); + + //@todo: make a hook to add update objects + //NOTE: deletion updates will be determined by the ContentHandler in the future + return $updates; + } } class PoolWorkArticleView extends PoolCounterWork { diff --git a/includes/api/ApiPurge.php b/includes/api/ApiPurge.php index 9e9320fb6c..7b421a6553 100644 --- a/includes/api/ApiPurge.php +++ b/includes/api/ApiPurge.php @@ -93,8 +93,8 @@ class ApiPurge extends ApiBase { true, true, $page->getLatest() ); # Update the links tables - $u = new LinksUpdate( $title, $p_result ); - $u->doUpdate(); + $updates = $p_result->getSecondaryDataUpdates( $title ); + DataUpdate::runUpdates( $updates ); $r['linkupdate'] = ''; diff --git a/includes/job/RefreshLinksJob.php b/includes/job/RefreshLinksJob.php index f711f8d021..7ccf00ddfc 100644 --- a/includes/job/RefreshLinksJob.php +++ b/includes/job/RefreshLinksJob.php @@ -61,8 +61,10 @@ class RefreshLinksJob extends Job { $parserOutput = $wgParser->parse( $revision->getText(), $this->title, $options, true, true, $revision->getId() ); wfProfileOut( __METHOD__.'-parse' ); wfProfileIn( __METHOD__.'-update' ); - $update = new LinksUpdate( $this->title, $parserOutput, false ); - $update->doUpdate(); + + $updates = $parserOutput->getSecondaryDataUpdates( $this->title, false ); + DataUpdate::runUpdates( $updates ); + wfProfileOut( __METHOD__.'-update' ); wfProfileOut( __METHOD__ ); return true; @@ -133,8 +135,10 @@ class RefreshLinksJob2 extends Job { $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() ); wfProfileOut( __METHOD__.'-parse' ); wfProfileIn( __METHOD__.'-update' ); - $update = new LinksUpdate( $title, $parserOutput, false ); - $update->doUpdate(); + + $updates = $parserOutput->getSecondaryDataUpdates( $title, false ); + DataUpdate::runUpdates( $updates ); + wfProfileOut( __METHOD__.'-update' ); wfWaitForSlaves(); } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 634078715c..d929f1a532 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -155,8 +155,9 @@ class ParserOutput extends CacheTime { $mProperties = array(), # Name/value pairs to be cached in the DB $mTOCHTML = '', # HTML of the TOC $mTimestamp; # Timestamp of the revision - private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. - private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) + private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. + private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) + private $mSecondaryDataUpdates = array(); # List of instances of SecondaryDataObject(), used to cause some information extracted from the page in a custom place. const EDITSECTION_REGEX = '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)())#'; @@ -463,4 +464,40 @@ class ParserOutput extends CacheTime { function recordOption( $option ) { $this->mAccessedOptions[$option] = true; } + + /** + * Adds an update job to the output. Any update jobs added to the output will eventually bexecuted in order to + * store any secondary information extracted from the page's content. + * + * @param StorageUpdate $update + */ + public function addSecondaryDataUpdate( DataUpdate $update ) { + $this->mSecondaryDataUpdates[] = $update; + } + + /** + * Returns any DataUpdate jobs to be executed in order to store secondary information + * extracted from the page's content, including a LinksUpdate object for all links stored in + * this ParserOutput object. + * + * @param $title Title of the page we're updating. If not given, a title object will be created based on $this->getTitleText() + * @param $recursive Boolean: queue jobs for recursive updates? + * + * @return Array. An array of instances of DataUpdate + */ + public function getSecondaryDataUpdates( Title $title = null, $recursive = true ) { + if ( !$title ) { + $title = Title::newFromText( $this->getTitleText() ); + } + + $linksUpdate = new LinksUpdate( $title, $this, $recursive ); + + if ( !$this->mSecondaryDataUpdates ) { + return array( $linksUpdate ); + } else { + $updates = array_merge( $this->mSecondaryDataUpdates, array( $linksUpdate ) ); + } + + return $updates; + } }