Optimize summary-based extension edit stash caches
authorAaron Schulz <aschulz@wikimedia.org>
Sat, 30 Jul 2016 06:42:05 +0000 (23:42 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Thu, 11 Aug 2016 00:50:19 +0000 (17:50 -0700)
* Send stash requests when the summary changes, so that things like
  AbuseFilter caching have a higher hit rate.
* Make the backend API skip parsing if a fresh cache is already present.
  This makes requests for summary-only changes much faster and more likely
  to finish in time.
* Avoid sending the full text if only the summary changed since the
  last successful stash. This works via an optional stashedtexthash
  parameter to the API.
* Also always apply the lock in parseAndStash(), even for VE.

Change-Id: I9bfd74cf05411853b675c6f54ff5d8934bcfc54c

includes/api/ApiStashEdit.php
includes/api/i18n/en.json
includes/api/i18n/qqq.json
resources/src/mediawiki.action/mediawiki.action.edit.stash.js

index 446a98c..297c0fb 100644 (file)
@@ -39,6 +39,7 @@ class ApiStashEdit extends ApiBase {
        const ERROR_PARSE = 'error_parse';
        const ERROR_CACHE = 'error_cache';
        const ERROR_UNCACHEABLE = 'uncacheable';
+       const ERROR_BUSY = 'busy';
 
        const PRESUME_FRESH_TTL_SEC = 30;
        const MAX_CACHE_TTL = 300; // 5 minutes
@@ -51,6 +52,7 @@ class ApiStashEdit extends ApiBase {
                        $this->dieUsage( 'This interface is not supported for bots', 'botsnotsupported' );
                }
 
+               $cache = ObjectCache::getLocalClusterInstance();
                $page = $this->getTitleOrPageId( $params );
                $title = $page->getTitle();
 
@@ -60,8 +62,23 @@ class ApiStashEdit extends ApiBase {
                        $this->dieUsage( 'Unsupported content model/format', 'badmodelformat' );
                }
 
-               // Trim and fix newlines so the key SHA1's match (see RequestContext::getText())
-               $text = rtrim( str_replace( "\r\n", "\n", $params['text'] ) );
+               if ( strlen( $params['stashedtexthash'] ) ) {
+                       // Load from cache since the client indicates the text is the same as last stash
+                       $textHash = $params['stashedtexthash'];
+                       $textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
+                       $text = $cache->get( $textKey );
+                       if ( !is_string( $text ) ) {
+                               $this->dieUsage( 'No stashed text found with the given hash', 'missingtext' );
+                       }
+               } elseif ( $params['text'] !== null ) {
+                       // Trim and fix newlines so the key SHA1's match (see WebRequest::getText())
+                       $text = rtrim( str_replace( "\r\n", "\n", $params['text'] ) );
+                       $textHash = sha1( $text );
+               } else {
+                       $this->dieUsage(
+                               'The text or stashedtexthash parameter must be given', 'missingtextparam' );
+               }
+
                $textContent = ContentHandler::makeContent(
                        $text, $title, $params['contentmodel'], $params['contentformat'] );
 
@@ -113,24 +130,24 @@ class ApiStashEdit extends ApiBase {
                // The user will abort the AJAX request by pressing "save", so ignore that
                ignore_user_abort( true );
 
-               // Use the master DB for fast blocking locks
-               $dbw = wfGetDB( DB_MASTER );
-
-               // Get a key based on the source text, format, and user preferences
-               $key = self::getStashKey( $title, $content, $user );
-               // De-duplicate requests on the same key
                if ( $user->pingLimiter( 'stashedit' ) ) {
                        $status = 'ratelimited';
-               } elseif ( $dbw->lock( $key, __METHOD__, 1 ) ) {
-                       $status = self::parseAndStash( $page, $content, $user, $params['summary'] );
-                       $dbw->unlock( $key, __METHOD__ );
                } else {
-                       $status = 'busy';
+                       $status = self::parseAndStash( $page, $content, $user, $params['summary'] );
+                       $textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
+                       $cache->set( $textKey, $text, self::MAX_CACHE_TTL );
                }
 
                $this->getStats()->increment( "editstash.cache_stores.$status" );
 
-               $this->getResult()->addValue( null, $this->getModuleName(), [ 'status' => $status ] );
+               $this->getResult()->addValue(
+                       null,
+                       $this->getModuleName(),
+                       [
+                               'status' => $status,
+                               'texthash' => $textHash
+                       ]
+               );
        }
 
        /**
@@ -145,17 +162,41 @@ class ApiStashEdit extends ApiBase {
                $cache = ObjectCache::getLocalClusterInstance();
                $logger = LoggerFactory::getInstance( 'StashEdit' );
 
-               $format = $content->getDefaultFormat();
-               $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
                $title = $page->getTitle();
+               $key = self::getStashKey( $title, self::getContentHash( $content ), $user );
 
-               if ( $editInfo && $editInfo->output ) {
-                       $key = self::getStashKey( $title, $content, $user );
+               // Use the master DB for fast blocking locks
+               $dbw = wfGetDB( DB_MASTER );
+               if ( !$dbw->lock( $key, __METHOD__, 1 ) ) {
+                       // De-duplicate requests on the same key
+                       return self::ERROR_BUSY;
+               }
+               $unlocker = new ScopedCallback( function () use ( $dbw, $key ) {
+                       $dbw->unlock( $key, __METHOD__ );
+               } );
+
+               $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
+
+               // Reuse any freshly build matching edit stash cache
+               $editInfo = $cache->get( $key );
+               if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
+                       $alreadyCached = true;
+               } else {
+                       $format = $content->getDefaultFormat();
+                       $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
+                       $alreadyCached = false;
+               }
 
+               if ( $editInfo && $editInfo->output ) {
                        // Let extensions add ParserOutput metadata or warm other caches
                        Hooks::run( 'ParserOutputStashForEdit',
                                [ $page, $content, $editInfo->output, $summary, $user ] );
 
+                       if ( $alreadyCached ) {
+                               $logger->debug( "Already cached parser output for key '$key' ('$title')." );
+                               return self::ERROR_NONE;
+                       }
+
                        list( $stashInfo, $ttl, $code ) = self::buildStashValue(
                                $editInfo->pstContent,
                                $editInfo->output,
@@ -207,7 +248,7 @@ class ApiStashEdit extends ApiBase {
                $logger = LoggerFactory::getInstance( 'StashEdit' );
                $stats = RequestContext::getMain()->getStats();
 
-               $key = self::getStashKey( $title, $content, $user );
+               $key = self::getStashKey( $title, self::getContentHash( $content ), $user );
                $editInfo = $cache->get( $key );
                if ( !is_object( $editInfo ) ) {
                        $start = microtime( true );
@@ -282,6 +323,20 @@ class ApiStashEdit extends ApiBase {
                return wfTimestampOrNull( TS_MW, $time );
        }
 
+       /**
+        * Get hash of the content, factoring in model/format
+        *
+        * @param Content $content
+        * @return string
+        */
+       private static function getContentHash( Content $content ) {
+               return sha1( implode( "\n", [
+                       $content->getModel(),
+                       $content->getDefaultFormat(),
+                       $content->serialize( $content->getDefaultFormat() )
+               ] ) );
+       }
+
        /**
         * Get the temporary prepared edit stash key for a user
         *
@@ -290,22 +345,19 @@ class ApiStashEdit extends ApiBase {
         *   - b) The parser output was made from the PST using cannonical matching options
         *
         * @param Title $title
-        * @param Content $content
+        * @param string $contentHash Result of getContentHash()
         * @param User $user User to get parser options from
         * @return string
         */
-       private static function getStashKey( Title $title, Content $content, User $user ) {
-               $hash = sha1( implode( ':', [
+       private static function getStashKey( Title $title, $contentHash, User $user ) {
+               return ObjectCache::getLocalClusterInstance()->makeKey(
+                       'prepared-edit',
+                       md5( $title->getPrefixedDBkey() ),
                        // Account for the edit model/text
-                       $content->getModel(),
-                       $content->getDefaultFormat(),
-                       sha1( $content->serialize( $content->getDefaultFormat() ) ),
+                       $contentHash,
                        // Account for user name related variables like signatures
-                       $user->getId(),
-                       md5( $user->getName() )
-               ] ) );
-
-               return wfMemcKey( 'prepared-edit', md5( $title->getPrefixedDBkey() ), $hash );
+                       md5( $user->getId() . "\n" . $user->getName() )
+               );
        }
 
        /**
@@ -313,7 +365,7 @@ class ApiStashEdit extends ApiBase {
         *
         * This makes a simple version of WikiPage::prepareContentForEdit() as stash info
         *
-        * @param Content $pstContent
+        * @param Content $pstContent Pre-Save transformed content
         * @param ParserOutput $parserOutput
         * @param string $timestamp TS_MW
         * @param User $user
@@ -355,7 +407,11 @@ class ApiStashEdit extends ApiBase {
                        ],
                        'text' => [
                                ApiBase::PARAM_TYPE => 'text',
-                               ApiBase::PARAM_REQUIRED => true
+                               ApiBase::PARAM_DFLT => null
+                       ],
+                       'stashedtexthash' => [
+                               ApiBase::PARAM_TYPE => 'string',
+                               ApiBase::PARAM_DFLT => null
                        ],
                        'summary' => [
                                ApiBase::PARAM_TYPE => 'string',
index 7f30ef8..bd3ca73 100644 (file)
        "apihelp-stashedit-param-section": "Section number. <kbd>0</kbd> for the top section, <kbd>new</kbd> for a new section.",
        "apihelp-stashedit-param-sectiontitle": "The title for a new section.",
        "apihelp-stashedit-param-text": "Page content.",
+       "apihelp-stashedit-param-stashedtexthash": "Page content hash from a prior stash to use instead.",
        "apihelp-stashedit-param-contentmodel": "Content model of the new content.",
        "apihelp-stashedit-param-contentformat": "Content serialization format used for the input text.",
        "apihelp-stashedit-param-baserevid": "Revision ID of the base revision.",
index 6c47aa4..9167f94 100644 (file)
        "apihelp-stashedit-param-section": "{{doc-apihelp-param|stashedit|section}}",
        "apihelp-stashedit-param-sectiontitle": "{{doc-apihelp-param|stashedit|sectiontitle}}",
        "apihelp-stashedit-param-text": "{{doc-apihelp-param|stashedit|text}}",
+       "apihelp-stashedit-param-stashedtexthash": "{{doc-apihelp-param|stashedit|stashedtexthash}}",
        "apihelp-stashedit-param-contentmodel": "{{doc-apihelp-param|stashedit|contentmodel}}",
        "apihelp-stashedit-param-contentformat": "{{doc-apihelp-param|stashedit|contentformat}}",
        "apihelp-stashedit-param-baserevid": "{{doc-apihelp-param|stashedit|baserevid}}",
index 07c7d76..704287a 100644 (file)
                        format = $form.find( '[name=format]' ).val(),
                        revId = $form.find( '[name=parentRevId]' ).val(),
                        lastText = $text.textSelection( 'getContents' ),
-                       timer = null;
+                       lastSummary = $summary.textSelection( 'getContents' ),
+                       lastTextHash = null,
+                       lastPriority = 0,
+                       origSummary = lastSummary,
+                       timer = null,
+                       PRIORITY_LOW = 1,
+                       PRIORITY_HIGH = 2;
 
                // Send a request to stash the edit to the API.
                // If a request is in progress, abort it since its payload is stale and the API
                // may limit concurrent stash parses.
-               function stashEdit() {
+               function stashEdit( priority, hashForReuse ) {
                        if ( pending ) {
                                pending.abort();
+                               pending = null;
                        }
 
                        api.getToken( 'csrf' ).then( function ( token ) {
+                               // If applicable, just send the hash key to reuse the last text server-side
+                               var req, params;
+
+                               // Update tracking of the last text/summary sent out
                                lastText = $text.textSelection( 'getContents' );
+                               lastSummary = $summary.textSelection( 'getContents' );
+                               lastPriority = priority;
+                               lastTextHash = null; // "failed" until proven successful
 
-                               pending = api.post( {
+                               params = {
                                        action: 'stashedit',
                                        token: token,
                                        title: mw.config.get( 'wgPageName' ),
                                        section: section,
                                        sectiontitle: '',
-                                       text: lastText,
-                                       summary: $summary.textSelection( 'getContents' ),
+                                       summary: lastSummary,
                                        contentmodel: model,
                                        contentformat: format,
                                        baserevid: revId
+                               };
+                               if ( hashForReuse ) {
+                                       params.stashedtexthash = hashForReuse;
+                               } else {
+                                       params.text = lastText;
+                               }
+
+                               req = api.post( params );
+                               pending = req;
+                               req.then( function ( data ) {
+                                       if ( req === pending ) {
+                                               pending = null;
+                                       }
+                                       if ( data.stashedit && data.stashedit.texthash ) {
+                                               lastTextHash = data.stashedit.texthash;
+                                       }
                                } );
                        } );
                }
 
                // Check if edit body text changed since the last stashEdit() call or if no edit
                // stash calls have yet been made
-               function isChanged() {
+               function isTextChanged() {
                        var newText = $text.textSelection( 'getContents' );
                        return newText !== lastText;
                }
 
+               // Check if summary changed since the last stashEdit() call or if no edit
+               // stash calls have yet been made
+               function isSummaryChanged() {
+                       var newSummary = $summary.textSelection( 'getContents' );
+                       return newSummary !== lastSummary;
+               }
+
                function onEditorIdle() {
-                       if ( !isChanged() ) {
+                       var textChanged = isTextChanged(),
+                               summaryChanged = isSummaryChanged(),
+                               priority = textChanged ? PRIORITY_HIGH : PRIORITY_LOW;
+
+                       if ( !textChanged && !summaryChanged ) {
+                               return; // nothing to do
+                       }
+
+                       if ( pending && lastPriority > priority ) {
+                               // Stash requests for summary changes should wait on pending text change stashes
+                               pending.then( onEditorIdle );
                                return;
                        }
 
-                       stashEdit();
+                       stashEdit( priority, textChanged ? null : lastTextHash );
                }
 
-               function onTextKeyUp( e ) {
+               function onKeyUp( e ) {
                        // Ignore keystrokes that don't modify text, like cursor movements.
                        // See <http://www.javascripter.net/faq/keycodes.htm> and
                        // <http://www.quirksmode.org/js/keys.html>. We don't have to be exhaustive,
                        timer = setTimeout( onEditorIdle, idleTimeout );
                }
 
+               function onSummaryFocus() {
+                       // Summary typing is usually near the end of the workflow and involves less pausing.
+                       // Re-stash frequently in hopes of capturing the final summary before submission.
+                       idleTimeout = 1000;
+                       // Stash now since the text is likely the final version. The re-stashes based on the
+                       // summary are targeted at caching edit checks that need the final summary.
+                       onEditorIdle();
+               }
+
+               function onTextFocus() {
+                       // User returned to the text field...
+                       if ( $summary.textSelection( 'getContents' ) === origSummary ) {
+                               idleTimeout = 3000; // no summary yet; reset stash rate to default
+                       }
+               }
+
                function onFormLoaded() {
                        if (
                                // Reverts may involve use (undo) links; stash as they review the diff.
                                // probably save the page soon
                                || $.inArray( $form.find( '#mw-edit-mode' ).val(), [ 'preview', 'diff' ] ) > -1
                        ) {
-                               stashEdit();
+                               stashEdit( PRIORITY_HIGH, null );
                        }
                }
 
-               // We don't attempt to stash new section edits because in such cases
-               // the parser output varies on the edit summary (since it determines
-               // the new section's name).
+               // We don't attempt to stash new section edits because in such cases the parser output
+               // varies on the edit summary (since it determines the new section's name).
                if ( $form.find( 'input[name=wpSection]' ).val() === 'new' ) {
                        return;
                }
 
-               $text.on( { change: onEditorIdle, keyup: onTextKeyUp } );
-               $summary.on( { focus: onEditorIdle } );
+               $text.on( {
+                       change: onEditorIdle,
+                       keyup: onKeyUp,
+                       focus: onTextFocus
+               } );
+               $summary.on( {
+                       focus: onSummaryFocus,
+                       focusout: onEditorIdle,
+                       keyup: onKeyUp
+               } );
                onFormLoaded();
-
        } );
 }( mediaWiki, jQuery ) );