From 285930668495b47e3ffe4ca7a0ddebe3135dce52 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Fri, 12 Apr 2019 21:38:55 -0700 Subject: [PATCH] Refactor edit stashing into a PageEditStash service Additional code cleanup: * Call setCacheTime() in parseAndStash instead of relying on the one in DerivedPageDataUpdater. * Improve the SPI logging by adding more extra fields. * Treat requests in CLI/job mode (aside from tests) like those from bots with regard to checking the stash. This should avoid stats/logging pollution. Change-Id: I8c6be919e399378e401a60502add0ecec7764d2d --- includes/MediaWikiServices.php | 9 + includes/ServiceWiring.php | 15 + includes/Storage/DerivedPageDataUpdater.php | 11 +- includes/Storage/PageEditStash.php | 504 ++++++++++++++++++ includes/api/ApiFormatFeedWrapper.php | 5 +- includes/api/ApiStashEdit.php | 344 +----------- maintenance/runJobs.php | 9 +- .../phpunit/includes/api/ApiStashEditTest.php | 62 ++- 8 files changed, 593 insertions(+), 366 deletions(-) create mode 100644 includes/Storage/PageEditStash.php diff --git a/includes/MediaWikiServices.php b/includes/MediaWikiServices.php index 655946f51d..c13d33f348 100644 --- a/includes/MediaWikiServices.php +++ b/includes/MediaWikiServices.php @@ -63,6 +63,7 @@ use Wikimedia\Services\ServiceContainer; use Wikimedia\Services\NoSuchServiceException; use MediaWiki\Interwiki\InterwikiLookup; use MagicWordFactory; +use MediaWiki\Storage\PageEditStash; /** * Service locator for MediaWiki core services. @@ -690,6 +691,14 @@ class MediaWikiServices extends ServiceContainer { return $this->getService( 'OldRevisionImporter' ); } + /** + * @return PageEditStash + * @since 1.34 + */ + public function getPageEditStash() { + return $this->getService( 'PageEditStash' ); + } + /** * @since 1.29 * @return Parser diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index d39a0c79b2..a3a73e4346 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -62,6 +62,7 @@ use MediaWiki\Storage\BlobStore; use MediaWiki\Storage\BlobStoreFactory; use MediaWiki\Storage\NameTableStoreFactory; use MediaWiki\Storage\SqlBlobStore; +use MediaWiki\Storage\PageEditStash; return [ 'ActorMigration' => function ( MediaWikiServices $services ) : ActorMigration { @@ -350,6 +351,20 @@ return [ ); }, + 'PageEditStash' => function ( MediaWikiServices $services ) : PageEditStash { + $config = $services->getMainConfig(); + + return new PageEditStash( + ObjectCache::getLocalClusterInstance(), + $services->getDBLoadBalancer(), + LoggerFactory::getInstance( 'StashEdit' ), + $services->getStatsdDataFactory(), + defined( 'MEDIAWIKI_JOB_RUNNER' ) || $config->get( 'CommandLineMode' ) + ? PageEditStash::INITIATOR_JOB_OR_CLI + : PageEditStash::INITIATOR_USER + ); + }, + 'Parser' => function ( MediaWikiServices $services ) : Parser { return $services->getParserFactory()->create(); }, diff --git a/includes/Storage/DerivedPageDataUpdater.php b/includes/Storage/DerivedPageDataUpdater.php index 3dbe0a8573..401806b8ca 100644 --- a/includes/Storage/DerivedPageDataUpdater.php +++ b/includes/Storage/DerivedPageDataUpdater.php @@ -22,7 +22,6 @@ namespace MediaWiki\Storage; -use ApiStashEdit; use CategoryMembershipChangeJob; use Content; use ContentHandler; @@ -38,6 +37,7 @@ use LinksDeletionUpdate; use LinksUpdate; use LogicException; use MediaWiki\Edit\PreparedEdit; +use MediaWiki\MediaWikiServices; use MediaWiki\Revision\MutableRevisionRecord; use MediaWiki\Revision\RenderedRevision; use MediaWiki\Revision\RevisionRecord; @@ -755,9 +755,12 @@ class DerivedPageDataUpdater implements IDBAccessObject { // TODO: MCR: allow output for all slots to be stashed. if ( $useStash && $slotsUpdate->isModifiedSlot( SlotRecord::MAIN ) ) { - $mainContent = $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent(); - $legacyUser = User::newFromIdentity( $user ); - $stashedEdit = ApiStashEdit::checkCache( $title, $mainContent, $legacyUser ); + $editStash = MediaWikiServices::getInstance()->getPageEditStash(); + $stashedEdit = $editStash->checkCache( + $title, + $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent(), + User::newFromIdentity( $user ) + ); } $userPopts = ParserOptions::newFromUserAndLang( $user, $this->contLang ); diff --git a/includes/Storage/PageEditStash.php b/includes/Storage/PageEditStash.php new file mode 100644 index 0000000000..cc3e4bc45d --- /dev/null +++ b/includes/Storage/PageEditStash.php @@ -0,0 +1,504 @@ +cache = $cache; + $this->lb = $lb; + $this->logger = $logger; + $this->stats = $stats; + $this->initiator = $initiator; + } + + /** + * @param WikiPage $page + * @param Content $content Edit content + * @param User $user + * @param string $summary Edit summary + * @return string Class ERROR_* constant + */ + public function parseAndCache( WikiPage $page, Content $content, User $user, $summary ) { + $logger = $this->logger; + + $title = $page->getTitle(); + $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user ); + $fname = __METHOD__; + + // Use the master DB to allow for fast blocking locks on the "save path" where this + // value might actually be used to complete a page edit. If the edit submission request + // happens before this edit stash requests finishes, then the submission will block until + // the stash request finishes parsing. For the lock acquisition below, there is not much + // need to duplicate parsing of the same content/user/summary bundle, so try to avoid + // blocking at all here. + $dbw = $this->lb->getConnection( DB_MASTER ); + if ( !$dbw->lock( $key, $fname, 0 ) ) { + // De-duplicate requests on the same key + return self::ERROR_BUSY; + } + /** @noinspection PhpUnusedLocalVariableInspection */ + $unlocker = new ScopedCallback( function () use ( $dbw, $key, $fname ) { + $dbw->unlock( $key, $fname ); + } ); + + $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC; + + // Reuse any freshly build matching edit stash cache + $editInfo = $this->getStashValue( $key ); + if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) { + $alreadyCached = true; + } else { + $format = $content->getDefaultFormat(); + $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false ); + $editInfo->output->setCacheTime( $editInfo->timestamp ); + $alreadyCached = false; + } + + $context = [ 'cachekey' => $key, 'title' => $title->getPrefixedText() ]; + + if ( $editInfo && $editInfo->output ) { + // Let extensions add ParserOutput metadata or warm other caches + Hooks::run( 'ParserOutputStashForEdit', + [ $page, $content, $editInfo->output, $summary, $user ] ); + + if ( $alreadyCached ) { + $logger->debug( "Parser output for key '{cachekey}' already cached.", $context ); + + return self::ERROR_NONE; + } + + $code = $this->storeStashValue( + $key, + $editInfo->pstContent, + $editInfo->output, + $editInfo->timestamp, + $user + ); + + if ( $code === true ) { + $logger->debug( "Cached parser output for key '{cachekey}'.", $context ); + + return self::ERROR_NONE; + } elseif ( $code === 'uncacheable' ) { + $logger->info( + "Uncacheable parser output for key '{cachekey}' [{code}].", + $context + [ 'code' => $code ] + ); + + return self::ERROR_UNCACHEABLE; + } else { + $logger->error( + "Failed to cache parser output for key '{cachekey}'.", + $context + [ 'code' => $code ] + ); + + return self::ERROR_CACHE; + } + } + + return self::ERROR_PARSE; + } + + /** + * Check that a prepared edit is in cache and still up-to-date + * + * This method blocks if the prepared edit is already being rendered, + * waiting until rendering finishes before doing final validity checks. + * + * The cache is rejected if template or file changes are detected. + * Note that foreign template or file transclusions are not checked. + * + * This returns an object with the following fields: + * - pstContent: the Content after pre-save-transform + * - output: the ParserOutput instance + * - timestamp: the timestamp of the parse + * - edits: author edit count if they are logged in or NULL otherwise + * + * @param Title $title + * @param Content $content + * @param User $user User to get parser options from + * @return stdClass|bool Returns edit stash object or false on cache miss + */ + public function checkCache( Title $title, Content $content, User $user ) { + if ( + // The context is not an HTTP POST request + !$user->getRequest()->wasPosted() || + // The context is a CLI script or a job runner HTTP POST request + $this->initiator !== self::INITIATOR_USER || + // The editor account is a known bot + $user->isBot() + ) { + // Avoid wasted queries and statsd pollution + return false; + } + + $logger = $this->logger; + + $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user ); + $context = [ + 'key' => $key, + 'title' => $title->getPrefixedText(), + 'user' => $user->getName() + ]; + + $editInfo = $this->getAndWaitForStashValue( $key ); + if ( !is_object( $editInfo ) || !$editInfo->output ) { + $this->stats->increment( 'editstash.cache_misses.no_stash' ); + if ( $this->recentStashEntryCount( $user ) > 0 ) { + $logger->info( "Empty cache for key '{key}' but not for user.", $context ); + } else { + $logger->debug( "Empty cache for key '{key}'.", $context ); + } + + return false; + } + + $age = time() - wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() ); + $context['age'] = $age; + + $isCacheUsable = true; + if ( $age <= self::PRESUME_FRESH_TTL_SEC ) { + // Assume nothing changed in this time + $this->stats->increment( 'editstash.cache_hits.presumed_fresh' ); + $logger->debug( "Timestamp-based cache hit for key '{key}'.", $context ); + } elseif ( $user->isAnon() ) { + $lastEdit = $this->lastEditTime( $user ); + $cacheTime = $editInfo->output->getCacheTime(); + if ( $lastEdit < $cacheTime ) { + // Logged-out user made no local upload/template edits in the meantime + $this->stats->increment( 'editstash.cache_hits.presumed_fresh' ); + $logger->debug( "Edit check based cache hit for key '{key}'.", $context ); + } else { + $isCacheUsable = false; + $this->stats->increment( 'editstash.cache_misses.proven_stale' ); + $logger->info( "Stale cache for key '{key}' due to outside edits.", $context ); + } + } else { + if ( $editInfo->edits === $user->getEditCount() ) { + // Logged-in user made no local upload/template edits in the meantime + $this->stats->increment( 'editstash.cache_hits.presumed_fresh' ); + $logger->debug( "Edit count based cache hit for key '{key}'.", $context ); + } else { + $isCacheUsable = false; + $this->stats->increment( 'editstash.cache_misses.proven_stale' ); + $logger->info( "Stale cache for key '{key}'due to outside edits.", $context ); + } + } + + if ( !$isCacheUsable ) { + return false; + } + + if ( $editInfo->output->getFlag( 'vary-revision' ) ) { + // This can be used for the initial parse, e.g. for filters or doEditContent(), + // but a second parse will be triggered in doEditUpdates(). This is not optimal. + $logger->info( + "Cache for key '{key}' has vary_revision; post-insertion parse inevitable.", + $context + ); + } elseif ( $editInfo->output->getFlag( 'vary-revision-id' ) ) { + // Similar to the above if we didn't guess the ID correctly. + $logger->debug( + "Cache for key '{key}' has vary_revision_id; post-insertion parse possible.", + $context + ); + } + + return $editInfo; + } + + /** + * @param string $key + * @return bool|stdClass + */ + private function getAndWaitForStashValue( $key ) { + $editInfo = $this->getStashValue( $key ); + + if ( !$editInfo ) { + $start = microtime( true ); + // We ignore user aborts and keep parsing. Block on any prior parsing + // so as to use its results and make use of the time spent parsing. + // Skip this logic if there no master connection in case this method + // is called on an HTTP GET request for some reason. + $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() ); + if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) { + $editInfo = $this->getStashValue( $key ); + $dbw->unlock( $key, __METHOD__ ); + } + + $timeMs = 1000 * max( 0, microtime( true ) - $start ); + $this->stats->timing( 'editstash.lock_wait_time', $timeMs ); + } + + return $editInfo; + } + + /** + * @param string $textHash + * @return string|bool Text or false if missing + */ + public function fetchInputText( $textHash ) { + $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash ); + + return $this->cache->get( $textKey ); + } + + /** + * @param string $text + * @param string $textHash + * @return bool Success + */ + public function stashInputText( $text, $textHash ) { + $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash ); + + return $this->cache->set( $textKey, $text, self::MAX_CACHE_TTL ); + } + + /** + * @param User $user + * @return string|null TS_MW timestamp or null + */ + private function lastEditTime( User $user ) { + $db = $this->lb->getConnection( DB_REPLICA ); + $actorQuery = ActorMigration::newMigration()->getWhere( $db, 'rc_user', $user, false ); + $time = $db->selectField( + [ 'recentchanges' ] + $actorQuery['tables'], + 'MAX(rc_timestamp)', + [ $actorQuery['conds'] ], + __METHOD__, + [], + $actorQuery['joins'] + ); + + return wfTimestampOrNull( TS_MW, $time ); + } + + /** + * Get hash of the content, factoring in model/format + * + * @param Content $content + * @return string + */ + private function getContentHash( Content $content ) { + return sha1( implode( "\n", [ + $content->getModel(), + $content->getDefaultFormat(), + $content->serialize( $content->getDefaultFormat() ) + ] ) ); + } + + /** + * Get the temporary prepared edit stash key for a user + * + * This key can be used for caching prepared edits provided: + * - a) The $user was used for PST options + * - b) The parser output was made from the PST using cannonical matching options + * + * @param Title $title + * @param string $contentHash Result of getContentHash() + * @param User $user User to get parser options from + * @return string + */ + private function getStashKey( Title $title, $contentHash, User $user ) { + return $this->cache->makeKey( + 'stashed-edit-info', + md5( $title->getPrefixedDBkey() ), + // Account for the edit model/text + $contentHash, + // Account for user name related variables like signatures + md5( $user->getId() . "\n" . $user->getName() ) + ); + } + + /** + * @param string $hash + * @return string + */ + private function getStashParserOutputKey( $hash ) { + return $this->cache->makeKey( 'stashed-edit-output', $hash ); + } + + /** + * @param string $key + * @return stdClass|bool Object map (pstContent,output,outputID,timestamp,edits) or false + */ + private function getStashValue( $key ) { + $stashInfo = $this->cache->get( $key ); + if ( !is_object( $stashInfo ) ) { + return false; + } + + $parserOutputKey = $this->getStashParserOutputKey( $stashInfo->outputID ); + $parserOutput = $this->cache->get( $parserOutputKey ); + if ( $parserOutput instanceof ParserOutput ) { + $stashInfo->output = $parserOutput; + + return $stashInfo; + } + + return false; + } + + /** + * Build a value to store in memcached based on the PST content and parser output + * + * This makes a simple version of WikiPage::prepareContentForEdit() as stash info + * + * @param string $key + * @param Content $pstContent Pre-Save transformed content + * @param ParserOutput $parserOutput + * @param string $timestamp TS_MW + * @param User $user + * @return string|bool True or an error code + */ + private function storeStashValue( + $key, + Content $pstContent, + ParserOutput $parserOutput, + $timestamp, + User $user + ) { + // If an item is renewed, mind the cache TTL determined by config and parser functions. + // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness. + $age = time() - wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() ); + $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL ); + // Avoid extremely stale user signature timestamps (T84843) + if ( $parserOutput->getFlag( 'user-signature' ) ) { + $ttl = min( $ttl, self::MAX_SIGNATURE_TTL ); + } + + if ( $ttl <= 0 ) { + return 'uncacheable'; // low TTL due to a tag, magic word, or signature? + } + + // Store what is actually needed and split the output into another key (T204742) + $parserOutputID = md5( $key ); + $stashInfo = (object)[ + 'pstContent' => $pstContent, + 'outputID' => $parserOutputID, + 'timestamp' => $timestamp, + 'edits' => $user->getEditCount() + ]; + + $ok = $this->cache->set( $key, $stashInfo, $ttl ); + if ( $ok ) { + $ok = $this->cache->set( + $this->getStashParserOutputKey( $parserOutputID ), + $parserOutput, + $ttl + ); + } + + if ( $ok ) { + // These blobs can waste slots in low cardinality memcached slabs + $this->pruneExcessStashedEntries( $user, $key ); + } + + return $ok ? true : 'store_error'; + } + + /** + * @param User $user + * @param string $newKey + */ + private function pruneExcessStashedEntries( User $user, $newKey ) { + $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) ); + + $keyList = $this->cache->get( $key ) ?: []; + if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) { + $oldestKey = array_shift( $keyList ); + $this->cache->delete( $oldestKey ); + } + + $keyList[] = $newKey; + $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL ); + } + + /** + * @param User $user + * @return int + */ + private function recentStashEntryCount( User $user ) { + $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) ); + + return count( $this->cache->get( $key ) ?: [] ); + } +} diff --git a/includes/api/ApiFormatFeedWrapper.php b/includes/api/ApiFormatFeedWrapper.php index 262eb1f760..40cc738fff 100644 --- a/includes/api/ApiFormatFeedWrapper.php +++ b/includes/api/ApiFormatFeedWrapper.php @@ -79,7 +79,9 @@ class ApiFormatFeedWrapper extends ApiFormatBase { $data = $this->getResult()->getResultData(); if ( isset( $data['_feed'] ) && isset( $data['_feeditems'] ) ) { - $data['_feed']->httpHeaders(); + /** @var ChannelFeed $feed */ + $feed = $data['_feed']; + $feed->httpHeaders(); } else { // Error has occurred, print something useful ApiBase::dieDebug( __METHOD__, 'Invalid feed class/item' ); @@ -94,6 +96,7 @@ class ApiFormatFeedWrapper extends ApiFormatBase { public function execute() { $data = $this->getResult()->getResultData(); if ( isset( $data['_feed'] ) && isset( $data['_feeditems'] ) ) { + /** @var ChannelFeed $feed */ $feed = $data['_feed']; $items = $data['_feeditems']; diff --git a/includes/api/ApiStashEdit.php b/includes/api/ApiStashEdit.php index 51845627a6..d6d15c75d4 100644 --- a/includes/api/ApiStashEdit.php +++ b/includes/api/ApiStashEdit.php @@ -18,9 +18,7 @@ * @file */ -use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; -use Wikimedia\ScopedCallback; /** * Prepare an edit in shared cache so that it can be reused on edit @@ -36,18 +34,6 @@ use Wikimedia\ScopedCallback; * @since 1.25 */ class ApiStashEdit extends ApiBase { - const ERROR_NONE = 'stashed'; - const ERROR_PARSE = 'error_parse'; - const ERROR_CACHE = 'error_cache'; - const ERROR_UNCACHEABLE = 'uncacheable'; - const ERROR_BUSY = 'busy'; - - const PRESUME_FRESH_TTL_SEC = 30; - const MAX_CACHE_TTL = 300; // 5 minutes - const MAX_SIGNATURE_TTL = 60; - - const MAX_CACHE_RECENT = 2; - public function execute() { $user = $this->getUser(); $params = $this->extractRequestParams(); @@ -56,7 +42,7 @@ class ApiStashEdit extends ApiBase { $this->dieWithError( 'apierror-botsnotsupported' ); } - $cache = ObjectCache::getLocalClusterInstance(); + $editStash = MediaWikiServices::getInstance()->getPageEditStash(); $page = $this->getTitleOrPageId( $params ); $title = $page->getTitle(); @@ -79,8 +65,7 @@ class ApiStashEdit extends ApiBase { if ( !preg_match( '/^[0-9a-f]{40}$/', $textHash ) ) { $this->dieWithError( 'apierror-stashedit-missingtext', 'missingtext' ); } - $textKey = $cache->makeKey( 'stashedit', 'text', $textHash ); - $text = $cache->get( $textKey ); + $text = $editStash->fetchInputText( $textHash ); if ( !is_string( $text ) ) { $this->dieWithError( 'apierror-stashedit-missingtext', 'missingtext' ); } @@ -145,9 +130,8 @@ class ApiStashEdit extends ApiBase { if ( $user->pingLimiter( 'stashedit' ) ) { $status = 'ratelimited'; } else { - $status = self::parseAndStash( $page, $content, $user, $params['summary'] ); - $textKey = $cache->makeKey( 'stashedit', 'text', $textHash ); - $cache->set( $textKey, $text, self::MAX_CACHE_TTL ); + $status = $editStash->parseAndCache( $page, $content, $user, $params['summary'] ); + $editStash->stashInputText( $text, $textHash ); } $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); @@ -169,324 +153,12 @@ class ApiStashEdit extends ApiBase { * @param string $summary Edit summary * @return string ApiStashEdit::ERROR_* constant * @since 1.25 + * @deprecated Since 1.34 */ - public static function parseAndStash( WikiPage $page, Content $content, User $user, $summary ) { - $logger = LoggerFactory::getInstance( 'StashEdit' ); - - $title = $page->getTitle(); - $key = self::getStashKey( $title, self::getContentHash( $content ), $user ); - $fname = __METHOD__; - - // Use the master DB to allow for fast blocking locks on the "save path" where this - // value might actually be used to complete a page edit. If the edit submission request - // happens before this edit stash requests finishes, then the submission will block until - // the stash request finishes parsing. For the lock acquisition below, there is not much - // need to duplicate parsing of the same content/user/summary bundle, so try to avoid - // blocking at all here. - $dbw = wfGetDB( DB_MASTER ); - if ( !$dbw->lock( $key, $fname, 0 ) ) { - // De-duplicate requests on the same key - return self::ERROR_BUSY; - } - /** @noinspection PhpUnusedLocalVariableInspection */ - $unlocker = new ScopedCallback( function () use ( $dbw, $key, $fname ) { - $dbw->unlock( $key, $fname ); - } ); - - $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC; - - // Reuse any freshly build matching edit stash cache - $editInfo = self::getStashValue( $key ); - if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) { - $alreadyCached = true; - } else { - $format = $content->getDefaultFormat(); - $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false ); - $alreadyCached = false; - } - - if ( $editInfo && $editInfo->output ) { - // Let extensions add ParserOutput metadata or warm other caches - Hooks::run( 'ParserOutputStashForEdit', - [ $page, $content, $editInfo->output, $summary, $user ] ); - - $titleStr = (string)$title; - if ( $alreadyCached ) { - $logger->debug( "Already cached parser output for key '{cachekey}' ('{title}').", - [ 'cachekey' => $key, 'title' => $titleStr ] ); - return self::ERROR_NONE; - } - - $code = self::storeStashValue( - $key, - $editInfo->pstContent, - $editInfo->output, - $editInfo->timestamp, - $user - ); - - if ( $code === true ) { - $logger->debug( "Cached parser output for key '{cachekey}' ('{title}').", - [ 'cachekey' => $key, 'title' => $titleStr ] ); - return self::ERROR_NONE; - } elseif ( $code === 'uncacheable' ) { - $logger->info( - "Uncacheable parser output for key '{cachekey}' ('{title}') [{code}].", - [ 'cachekey' => $key, 'title' => $titleStr, 'code' => $code ] ); - return self::ERROR_UNCACHEABLE; - } else { - $logger->error( "Failed to cache parser output for key '{cachekey}' ('{title}').", - [ 'cachekey' => $key, 'title' => $titleStr, 'code' => $code ] ); - return self::ERROR_CACHE; - } - } - - return self::ERROR_PARSE; - } - - /** - * Check that a prepared edit is in cache and still up-to-date - * - * This method blocks if the prepared edit is already being rendered, - * waiting until rendering finishes before doing final validity checks. - * - * The cache is rejected if template or file changes are detected. - * Note that foreign template or file transclusions are not checked. - * - * The result is a map (pstContent,output,timestamp) with fields - * extracted directly from WikiPage::prepareContentForEdit(). - * - * @param Title $title - * @param Content $content - * @param User $user User to get parser options from - * @return stdClass|bool Returns false on cache miss - */ - public static function checkCache( Title $title, Content $content, User $user ) { - if ( $user->isBot() ) { - return false; // bots never stash - don't pollute stats - } - - $logger = LoggerFactory::getInstance( 'StashEdit' ); - $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); - - $key = self::getStashKey( $title, self::getContentHash( $content ), $user ); - $editInfo = self::getStashValue( $key ); - if ( !is_object( $editInfo ) ) { - $start = microtime( true ); - // We ignore user aborts and keep parsing. Block on any prior parsing - // so as to use its results and make use of the time spent parsing. - // Skip this logic if there no master connection in case this method - // is called on an HTTP GET request for some reason. - $lb = MediaWikiServices::getInstance()->getDBLoadBalancer(); - $dbw = $lb->getAnyOpenConnection( $lb->getWriterIndex() ); - if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) { - $editInfo = self::getStashValue( $key ); - $dbw->unlock( $key, __METHOD__ ); - } - - $timeMs = 1000 * max( 0, microtime( true ) - $start ); - $stats->timing( 'editstash.lock_wait_time', $timeMs ); - } - - if ( !is_object( $editInfo ) || !$editInfo->output ) { - $stats->increment( 'editstash.cache_misses.no_stash' ); - $logger->debug( "Empty cache for key '$key' ('$title'); user '{$user->getName()}'." ); - return false; - } - - $age = time() - wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() ); - if ( $age <= self::PRESUME_FRESH_TTL_SEC ) { - // Assume nothing changed in this time - $stats->increment( 'editstash.cache_hits.presumed_fresh' ); - $logger->debug( "Timestamp-based cache hit for key '$key' (age: $age sec)." ); - } elseif ( isset( $editInfo->edits ) && $editInfo->edits === $user->getEditCount() ) { - // Logged-in user made no local upload/template edits in the meantime - $stats->increment( 'editstash.cache_hits.presumed_fresh' ); - $logger->debug( "Edit count based cache hit for key '$key' (age: $age sec)." ); - } elseif ( $user->isAnon() - && self::lastEditTime( $user ) < $editInfo->output->getCacheTime() - ) { - // Logged-out user made no local upload/template edits in the meantime - $stats->increment( 'editstash.cache_hits.presumed_fresh' ); - $logger->debug( "Edit check based cache hit for key '$key' (age: $age sec)." ); - } else { - // User may have changed included content - $editInfo = false; - } - - if ( !$editInfo ) { - $stats->increment( 'editstash.cache_misses.proven_stale' ); - $logger->info( "Stale cache for key '$key'; old key with outside edits. (age: $age sec)" ); - } elseif ( $editInfo->output->getFlag( 'vary-revision' ) ) { - // This can be used for the initial parse, e.g. for filters or doEditContent(), - // but a second parse will be triggered in doEditUpdates(). This is not optimal. - $logger->info( "Cache for key '$key' ('$title') has vary_revision." ); - } elseif ( $editInfo->output->getFlag( 'vary-revision-id' ) ) { - // Similar to the above if we didn't guess the ID correctly. - $logger->info( "Cache for key '$key' ('$title') has vary_revision_id." ); - } - - return $editInfo; - } - - /** - * @param User $user - * @return string|null TS_MW timestamp or null - */ - private static function lastEditTime( User $user ) { - $db = wfGetDB( DB_REPLICA ); - $actorQuery = ActorMigration::newMigration()->getWhere( $db, 'rc_user', $user, false ); - $time = $db->selectField( - [ 'recentchanges' ] + $actorQuery['tables'], - 'MAX(rc_timestamp)', - [ $actorQuery['conds'] ], - __METHOD__, - [], - $actorQuery['joins'] - ); - - return wfTimestampOrNull( TS_MW, $time ); - } - - /** - * Get hash of the content, factoring in model/format - * - * @param Content $content - * @return string - */ - private static function getContentHash( Content $content ) { - return sha1( implode( "\n", [ - $content->getModel(), - $content->getDefaultFormat(), - $content->serialize( $content->getDefaultFormat() ) - ] ) ); - } - - /** - * Get the temporary prepared edit stash key for a user - * - * This key can be used for caching prepared edits provided: - * - a) The $user was used for PST options - * - b) The parser output was made from the PST using cannonical matching options - * - * @param Title $title - * @param string $contentHash Result of getContentHash() - * @param User $user User to get parser options from - * @return string - */ - private static function getStashKey( Title $title, $contentHash, User $user ) { - return ObjectCache::getLocalClusterInstance()->makeKey( - 'stashed-edit-info', - md5( $title->getPrefixedDBkey() ), - // Account for the edit model/text - $contentHash, - // Account for user name related variables like signatures - md5( $user->getId() . "\n" . $user->getName() ) - ); - } - - /** - * @param string $uuid - * @return string - */ - private static function getStashParserOutputKey( $uuid ) { - return ObjectCache::getLocalClusterInstance()->makeKey( 'stashed-edit-output', $uuid ); - } - - /** - * @param string $key - * @return stdClass|bool Object map (pstContent,output,outputID,timestamp,edits) or false - */ - private static function getStashValue( $key ) { - $cache = ObjectCache::getLocalClusterInstance(); - - $stashInfo = $cache->get( $key ); - if ( !is_object( $stashInfo ) ) { - return false; - } - - $parserOutputKey = self::getStashParserOutputKey( $stashInfo->outputID ); - $parserOutput = $cache->get( $parserOutputKey ); - if ( $parserOutput instanceof ParserOutput ) { - $stashInfo->output = $parserOutput; - - return $stashInfo; - } - - return false; - } - - /** - * Build a value to store in memcached based on the PST content and parser output - * - * This makes a simple version of WikiPage::prepareContentForEdit() as stash info - * - * @param string $key - * @param Content $pstContent Pre-Save transformed content - * @param ParserOutput $parserOutput - * @param string $timestamp TS_MW - * @param User $user - * @return string|bool True or an error code - */ - private static function storeStashValue( - $key, Content $pstContent, ParserOutput $parserOutput, $timestamp, User $user - ) { - // If an item is renewed, mind the cache TTL determined by config and parser functions. - // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness. - $age = time() - wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() ); - $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL ); - // Avoid extremely stale user signature timestamps (T84843) - if ( $parserOutput->getFlag( 'user-signature' ) ) { - $ttl = min( $ttl, self::MAX_SIGNATURE_TTL ); - } - - if ( $ttl <= 0 ) { - return 'uncacheable'; // low TTL due to a tag, magic word, or signature? - } - - // Store what is actually needed and split the output into another key (T204742) - $parseroutputID = md5( $key ); - $stashInfo = (object)[ - 'pstContent' => $pstContent, - 'outputID' => $parseroutputID, - 'timestamp' => $timestamp, - 'edits' => $user->getEditCount() - ]; - - $cache = ObjectCache::getLocalClusterInstance(); - $ok = $cache->set( $key, $stashInfo, $ttl ); - if ( $ok ) { - $ok = $cache->set( - self::getStashParserOutputKey( $parseroutputID ), - $parserOutput, - $ttl - ); - } - - if ( $ok ) { - // These blobs can waste slots in low cardinality memcached slabs - self::pruneExcessStashedEntries( $cache, $user, $key ); - } - - return $ok ? true : 'store_error'; - } - - /** - * @param BagOStuff $cache - * @param User $user - * @param string $newKey - */ - private static function pruneExcessStashedEntries( BagOStuff $cache, User $user, $newKey ) { - $key = $cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) ); - - $keyList = $cache->get( $key ) ?: []; - if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) { - $oldestKey = array_shift( $keyList ); - $cache->delete( $oldestKey ); - } + public function parseAndStash( WikiPage $page, Content $content, User $user, $summary ) { + $editStash = MediaWikiServices::getInstance()->getPageEditStash(); - $keyList[] = $newKey; - $cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL ); + return $editStash->parseAndCache( $page, $content, $user, $summary ); } public function getAllowedParams() { diff --git a/maintenance/runJobs.php b/maintenance/runJobs.php index 802a114fca..9354e4fb6c 100644 --- a/maintenance/runJobs.php +++ b/maintenance/runJobs.php @@ -21,13 +21,16 @@ * @ingroup Maintenance */ +if ( !defined( 'MEDIAWIKI' ) ) { + // So extensions (and other code) can check whether they're running in job mode. + // This is not defined if this script is included from installer/updater or phpunit. + define( 'MEDIAWIKI_JOB_RUNNER', true ); +} + require_once __DIR__ . '/Maintenance.php'; use MediaWiki\Logger\LoggerFactory; -// So extensions (and other code) can check whether they're running in job mode -define( 'MEDIAWIKI_JOB_RUNNER', true ); - /** * Maintenance script that runs pending jobs. * diff --git a/tests/phpunit/includes/api/ApiStashEditTest.php b/tests/phpunit/includes/api/ApiStashEditTest.php index a63f8aa1dc..c6ed8a7879 100644 --- a/tests/phpunit/includes/api/ApiStashEditTest.php +++ b/tests/phpunit/includes/api/ApiStashEditTest.php @@ -1,9 +1,13 @@ setService( 'PageEditStash', new PageEditStash( + new HashBagOStuff( [] ), + MediaWikiServices::getInstance()->getDBLoadBalancer(), + new NullLogger(), + new NullStatsdDataFactory(), + PageEditStash::INITIATOR_USER + ) ); + // Clear rate-limiting cache between tests $this->setMwGlobals( 'wgMainCacheType', 'hash' ); } + public function tearDown() { + parent::tearDown(); + } + /** * Make a stashedit API call with suitable default parameters * @@ -24,6 +37,7 @@ class ApiStashEditTest extends ApiTestCase { * sensible defaults filled in. To make a parameter actually not passed, set to null. * @param User $user User to do the request * @param string $expectedResult 'stashed', 'editconflict' + * @return array */ protected function doStash( array $params = [], User $user = null, $expectedResult = 'stashed' @@ -88,13 +102,11 @@ class ApiStashEditTest extends ApiTestCase { * @return string */ protected function getStashedText( $hash ) { - $cache = ObjectCache::getLocalClusterInstance(); - $key = $cache->makeKey( 'stashedit', 'text', $hash ); - return $cache->get( $key ); + return MediaWikiServices::getInstance()->getPageEditStash()->fetchInputText( $hash ); } /** - * Return a key that can be passed to the cache to obtain a PreparedEdit object. + * Return a key that can be passed to the cache to obtain a stashed edit object. * * @param string $title Title of page * @param string Content $text Content of edit @@ -107,8 +119,10 @@ class ApiStashEditTest extends ApiTestCase { if ( !$user ) { $user = $this->getTestSysop()->getUser(); } - $wrapper = TestingAccessWrapper::newFromClass( ApiStashEdit::class ); - return $wrapper->getStashKey( $titleObj, $wrapper->getContentHash( $content ), $user ); + $editStash = TestingAccessWrapper::newFromObject( + MediaWikiServices::getInstance()->getPageEditStash() ); + + return $editStash->getStashKey( $titleObj, $editStash->getContentHash( $content ), $user ); } public function testBasicEdit() { @@ -263,15 +277,15 @@ class ApiStashEditTest extends ApiTestCase { } /** - * Shortcut for calling ApiStashEdit::checkCache() without having to create Titles and Contents - * in every test. + * Shortcut for calling PageStashEdit::checkCache() without + * having to create Titles and Contents in every test. * * @param User $user * @param string $text The text of the article - * @return stdClass|bool Return value of ApiStashEdit::checkCache(), false if not in cache + * @return stdClass|bool Return value of PageStashEdit::checkCache(), false if not in cache */ protected function doCheckCache( User $user, $text = 'Content' ) { - return ApiStashEdit::checkCache( + return MediaWikiServices::getInstance()->getPageEditStash()->checkCache( Title::newFromText( __CLASS__ ), new WikitextContent( $text ), $user @@ -305,11 +319,11 @@ class ApiStashEditTest extends ApiTestCase { } public function testCheckCacheAnon() { - $user = new User(); + $user = User::newFromName( '174.5.4.6', false ); $this->doStash( [], $user ); - $this->assertInstanceOf( stdClass::class, $this->docheckCache( $user ) ); + $this->assertInstanceOf( stdClass::class, $this->doCheckCache( $user ) ); } /** @@ -320,7 +334,7 @@ class ApiStashEditTest extends ApiTestCase { * @param int $howOld How many seconds is "old" (we actually set it one second before this) */ protected function doStashOld( - User $user, $text = 'Content', $howOld = ApiStashEdit::PRESUME_FRESH_TTL_SEC + User $user, $text = 'Content', $howOld = PageEditStash::PRESUME_FRESH_TTL_SEC ) { $this->doStash( [ 'text' => $text ], $user ); @@ -328,7 +342,9 @@ class ApiStashEditTest extends ApiTestCase { // fake the time? $key = $this->getStashKey( __CLASS__, $text, $user ); - $cache = ObjectCache::getLocalClusterInstance(); + $editStash = TestingAccessWrapper::newFromObject( + MediaWikiServices::getInstance()->getPageEditStash() ); + $cache = $editStash->cache; $editInfo = $cache->get( $key ); $outputKey = $cache->makeKey( 'stashed-edit-output', $editInfo->outputID ); @@ -350,7 +366,7 @@ class ApiStashEditTest extends ApiTestCase { public function testCheckCacheOldNoEditsAnon() { // Specify a made-up IP address to make sure no edits are lying around - $user = User::newFromName( '192.0.2.77', false ); + $user = User::newFromName( '172.0.2.77', false ); $this->doStashOld( $user ); @@ -379,7 +395,9 @@ class ApiStashEditTest extends ApiTestCase { public function testSignatureTtl( $text, $ttl ) { $this->doStash( [ 'text' => $text ] ); - $cache = ObjectCache::getLocalClusterInstance(); + $editStash = TestingAccessWrapper::newFromObject( + MediaWikiServices::getInstance()->getPageEditStash() ); + $cache = $editStash->cache; $key = $this->getStashKey( __CLASS__, $text ); $wrapper = TestingAccessWrapper::newFromObject( $cache ); @@ -389,9 +407,9 @@ class ApiStashEditTest extends ApiTestCase { public function signatureProvider() { return [ - '~~~' => [ '~~~', ApiStashEdit::MAX_SIGNATURE_TTL ], - '~~~~' => [ '~~~~', ApiStashEdit::MAX_SIGNATURE_TTL ], - '~~~~~' => [ '~~~~~', ApiStashEdit::MAX_SIGNATURE_TTL ], + '~~~' => [ '~~~', PageEditStash::MAX_SIGNATURE_TTL ], + '~~~~' => [ '~~~~', PageEditStash::MAX_SIGNATURE_TTL ], + '~~~~~' => [ '~~~~~', PageEditStash::MAX_SIGNATURE_TTL ], ]; } -- 2.20.1