Only call ParserOptions::getNumberHeadings() when needed for the benefit of cache...

[lhc/web/wiklou.git] / includes / parser / Parser.php
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php

index 3061a2d..fa84d28 100644 (file)
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -55,6 +55,12 @@ class Parser {
          */
         const VERSION = '1.6.4';
  
+       /**
+        * Update this version number when the output of serialiseHalfParsedText()
+        * changes in an incompatible way
+        */
+       const HALF_PARSED_VERSION = 2;
+
         # Flags for Parser::setFunctionHook
         # Also available as global constants from Defines.php
         const SFH_NO_HASH = 1;
@@ -106,20 +112,37 @@ class Parser {
         var $mVariables, $mSubstWords; # Initialised by initialiseVariables()
         var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
  
-
         # Cleared with clearState():
-       var $mOutput, $mAutonumber, $mDTopen, $mStripState;
+       var $mOutput, $mAutonumber, $mDTopen;
+
+       /**
+        * @var StripState
+        */
+       var $mStripState;
+
         var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
         var $mLinkHolders, $mLinkID;
         var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
         var $mTplExpandCache; # empty-frame expansion cache
         var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
         var $mExpensiveFunctionCount; # number of expensive parser function calls
+
+       /**
+        * @var User
+        */
         var $mUser; # User object; only used when doing pre-save transform
  
         # Temporary
         # These are variables reset at least once per parse regardless of $clearState
-       var $mOptions;      # ParserOptions object
+
+       /**
+        * @var ParserOptions
+        */
+       var $mOptions;
+
+       /**
+        * @var Title
+        */
         var $mTitle;        # Title context, used for self-link rendering and similar things
         var $mOutputType;   # Output type, one of the OT_xxx constants
         var $ot;            # Shortcut alias, see setOutputType()
@@ -199,7 +222,6 @@ class Parser {
                 $this->mLastSection = '';
                 $this->mDTopen = false;
                 $this->mIncludeCount = array();
-               $this->mStripState = new StripState;
                 $this->mArgStack = false;
                 $this->mInPre = false;
                 $this->mLinkHolders = new LinkHolderArray( $this );
@@ -222,6 +244,7 @@ class Parser {
                 # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
                 # Changed to \x7f to allow XML double-parsing -- TS
                 $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+               $this->mStripState = new StripState( $this->mUniqPrefix );
  
  
                 # Clear these on every parse, bug 4549
@@ -271,12 +294,7 @@ class Parser {
                 wfProfileIn( __METHOD__ );
                 wfProfileIn( $fname );
  
-               $this->mOptions = $options;
-               if ( $clearState ) {
-                       $this->clearState();
-               }
-
-               $this->setTitle( $title ); # Page title has to be set for the pre-processor
+               $this->startParse( $title, $options, self::OT_HTML, $clearState );
  
                 $oldRevisionId = $this->mRevisionId;
                 $oldRevisionObject = $this->mRevisionObject;
@@ -288,7 +306,7 @@ class Parser {
                         $this->mRevisionTimestamp = null;
                         $this->mRevisionUser = null;
                 }
-               $this->setOutputType( self::OT_HTML );
+
                 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
                 # No more strip!
                 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
@@ -329,18 +347,11 @@ class Parser {
                 }
  
                 /**
-                * A page get its title converted except:
-                * a) Language conversion is globally disabled
-                * b) Title convert is globally disabled
-                * c) The page is a redirect page
-                * d) User request with a "linkconvert" set to "no"
-                * e) A "nocontentconvert" magic word has been set
-                * f) A "notitleconvert" magic word has been set
-                * g) User sets "noconvertlink" in his/her preference
-                *
-                * Note that if a user tries to set a title in a conversion
-                * rule but content conversion was not done, then the parser
-                * won't pick it up.  This is probably expected behavior.
+                * A converted title will be provided in the output object if title and
+                * content conversion are enabled, the article text does not contain
+                * a conversion-suppressing double-underscore tag, and no
+                * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
+                * automatic link conversion.
                  */
                 if ( !( $wgDisableLangConversion
                                 || $wgDisableTitleConversion
@@ -361,23 +372,7 @@ class Parser {
  
                 wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
  
-//!JF Move to its own function
-
-               $uniq_prefix = $this->mUniqPrefix;
-               $matches = array();
-               $elements = array_keys( $this->mTransparentTagHooks );
-               $text = $this->extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
-               foreach ( $matches as $marker => $data ) {
-                       list( $element, $content, $params, $tag ) = $data;
-                       $tagName = strtolower( $element );
-                       if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
-                               $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
-                       } else {
-                               $output = $tag;
-                       }
-                       $this->mStripState->general->setPair( $marker, $output );
-               }
+               $text = $this->replaceTransparentTags( $text );
                 $text = $this->mStripState->unstripGeneral( $text );
  
                 $text = Sanitizer::normalizeCharReferences( $text );
@@ -465,12 +460,9 @@ class Parser {
          * Expand templates and variables in the text, producing valid, static wikitext.
          * Also removes comments.
          */
-       function preprocess( $text, $title, $options, $revid = null ) {
+       function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
                 wfProfileIn( __METHOD__ );
-               $this->mOptions = $options;
-               $this->clearState();
-               $this->setOutputType( self::OT_PREPROCESS );
-               $this->setTitle( $title );
+               $this->startParse( $title, $options, self::OT_PREPROCESS, true );
                 if ( $revid !== null ) {
                         $this->mRevisionId = $revid;
                 }
@@ -488,25 +480,23 @@ class Parser {
          * <noinclude>, <includeonly> etc. are parsed as for template transclusion,
          * comments, templates, arguments, tags hooks and parser functions are untouched.
          */
-       public function getPreloadText( $text, $title, $options ) {
+       public function getPreloadText( $text, Title $title, ParserOptions $options ) {
                 # Parser (re)initialisation
-               $this->mOptions = $options;
-               $this->clearState();
-               $this->setOutputType( self::OT_PLAIN );
-               $this->setTitle( $title );
+               $this->startParse( $title, $options, self::OT_PLAIN, true );
  
                 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
-               return $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+               $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+               $text = $this->mStripState->unstripBoth( $text );
+               return $text;
         }
  
         /**
          * Get a random string
          *
-        * @private
          * @static
          */
-       static private function getRandomString() {
+       static public function getRandomString() {
                 return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
         }
  
@@ -632,6 +622,13 @@ class Parser {
                 return $this->mLinkID++;
         }
  
+       function setLinkID( $id ) {
+               $this->mLinkID = $id;
+       }
+
+       /**
+        * @return Language
+        */
         function getFunctionLang() {
                 global $wgLang, $wgContLang;
  
@@ -802,7 +799,7 @@ class Parser {
         function insertStripItem( $text ) {
                 $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
                 $this->mMarkerIndex++;
-               $this->mStripState->general->setPair( $rnd, $text );
+               $this->mStripState->addGeneral( $rnd, $text );
                 return $rnd;
         }
  
@@ -1129,10 +1126,9 @@ class Parser {
                                 throw new MWException( __METHOD__.': unrecognised match type "' .
                                         substr( $m[0], 0, 20 ) . '"' );
                         }
-                       $url = wfMsgForContent( $urlmsg, $id);
+                       $url = wfMsgForContent( $urlmsg, $id );
                         $sk = $this->mOptions->getSkin( $this->mTitle );
-                       $la = $sk->getExternalLinkAttributes( "external $CssClass" );
-                       return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+                       return $sk->makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
                         # ISBN
                         $isbn = $m[5];
@@ -2467,6 +2463,7 @@ class Parser {
                 }
                 if ( $stack > 0 ) {
                         wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" );
+                       wfProfileOut( __METHOD__ );
                         return false;
                 }
                 wfProfileOut( __METHOD__ );
@@ -2563,25 +2560,25 @@ class Parser {
                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
                                 break;
                         case 'pagenamee':
-                               $value = $this->mTitle->getPartialURL();
+                               $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
                                 break;
                         case 'fullpagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
                                 break;
                         case 'fullpagenamee':
-                               $value = $this->mTitle->getPrefixedURL();
+                               $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
                                 break;
                         case 'subpagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
                                 break;
                         case 'subpagenamee':
-                               $value = $this->mTitle->getSubpageUrlForm();
+                               $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
                                 break;
                         case 'basepagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
                                 break;
                         case 'basepagenamee':
-                               $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+                               $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
                                 break;
                         case 'talkpagename':
                                 if ( $this->mTitle->canTalk() ) {
@@ -2594,7 +2591,7 @@ class Parser {
                         case 'talkpagenamee':
                                 if ( $this->mTitle->canTalk() ) {
                                         $talkPage = $this->mTitle->getTalkPage();
-                                       $value = $talkPage->getPrefixedUrl();
+                                       $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
                                 } else {
                                         $value = '';
                                 }
@@ -2605,7 +2602,7 @@ class Parser {
                                 break;
                         case 'subjectpagenamee':
                                 $subjPage = $this->mTitle->getSubjectPage();
-                               $value = $subjPage->getPrefixedUrl();
+                               $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
                                 break;
                         case 'revisionid':
                                 # Let the edit saving system know we should parse the page
@@ -3362,12 +3359,12 @@ class Parser {
                                 $text = $rev->getText();
                         } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
                                 global $wgContLang;
-                               $message = $wgContLang->lcfirst( $title->getText() );
-                               $text = wfMsgForContentNoTrans( $message );
-                               if ( wfEmptyMsg( $message, $text ) ) {
+                               $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+                               if ( !$message->exists() ) {
                                         $text = false;
                                         break;
                                 }
+                               $text = $message->plain();
                         } else {
                                 break;
                         }
@@ -3551,9 +3548,9 @@ class Parser {
                 if ( $markerType === 'none' ) {
                         return $output;
                 } elseif ( $markerType === 'nowiki' ) {
-                       $this->mStripState->nowiki->setPair( $marker, $output );
+                       $this->mStripState->addNoWiki( $marker, $output );
                 } elseif ( $markerType === 'general' ) {
-                       $this->mStripState->general->setPair( $marker, $output );
+                       $this->mStripState->addGeneral( $marker, $output );
                 } else {
                         throw new MWException( __METHOD__.': invalid marker type' );
                 }
@@ -3688,8 +3685,6 @@ class Parser {
         function formatHeadings( $text, $origText, $isMain=true ) {
                 global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds;
  
-               $doNumberHeadings = $this->mOptions->getNumberHeadings();
-
                 # Inhibit editsection links if requested in the page
                 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
                         $showEditLink = 0;
@@ -3907,7 +3902,7 @@ class Parser {
                         }
  
                         # Don't number the heading if it is the only one (looks silly)
-                       if ( $doNumberHeadings && count( $matches[3] ) > 1) {
+                       if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
                                 # the two are different if the line contains a link
                                 $headline = $numbering . ' ' . $headline;
                         }
@@ -3967,10 +3962,10 @@ class Parser {
                                 // We use a page and section attribute to stop the language converter from converting these important bits
                                 // of data, but put the headline hint inside a content block because the language converter is supposed to
                                 // be able to convert that piece of data.
-                               $editlink = '<editsection page="' . htmlspecialchars($editlinkArgs[0]);
+                               $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]);
                                 $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
                                 if ( isset($editlinkArgs[2]) ) {
-                                       $editlink .= '>' . $editlinkArgs[2] . '</editsection>';
+                                       $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
                                 } else {
                                         $editlink .= '/>';
                                 }
@@ -3995,7 +3990,7 @@ class Parser {
                         if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
                                 $toc .= $sk->tocUnindent( $prevtoclevel - 1 );
                         }
-                       $toc = $sk->tocList( $toc );
+                       $toc = $sk->tocList( $toc, $this->mOptions->getUserLang() );
                         $this->mOutput->setTOCHTML( $toc );
                 }
  
@@ -4047,20 +4042,16 @@ class Parser {
          * @return String: the altered wiki markup
          */
         public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) {
-               $this->mOptions = $options;
-               $this->setTitle( $title );
+               $this->startParse( $title, $options, self::OT_WIKI, $clearState );
                 $this->setUser( $user );
-               $this->setOutputType( self::OT_WIKI );
-
-               if ( $clearState ) {
-                       $this->clearState();
-               }
  
                 $pairs = array(
                         "\r\n" => "\n",
                 );
                 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
-               $text = $this->pstPass2( $text, $user );
+               if( $options->getPreSaveTransform() ) {
+                       $text = $this->pstPass2( $text, $user );
+               }
                 $text = $this->mStripState->unstripBoth( $text );
  
                 $this->setUser( null ); #Reset
@@ -4097,9 +4088,9 @@ class Parser {
                 # whatever crap the system uses, localised or not, so we cannot
                 # ship premade translations.
                 $key = 'timezone-' . strtolower( trim( $tzMsg ) );
-               $value = wfMsgForContent( $key );
-               if ( !wfEmptyMsg( $key, $value ) ) {
-                       $tzMsg = $value;
+               $msg = wfMessage( $key )->inContentLanguage();
+               if ( $msg->exists() ) {
+                       $tzMsg = $msg->text();
                 }
  
                 date_default_timezone_set( $oldtz );
@@ -4110,6 +4101,9 @@ class Parser {
                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
                 $text = $this->replaceVariables( $text );
  
+               # This works almost by chance, as the replaceVariables are done before the getUserSig(),
+               # which may corrupt this parser instance via its wfMsgExt( parsemag ) call-
+
                 # Signatures
                 $sigText = $this->getUserSig( $user );
                 $text = strtr( $text, array(
@@ -4155,6 +4149,8 @@ class Parser {
          * validated, ready-to-insert wikitext.
          * If you have pre-fetched the nickname or the fancySig option, you can
          * specify them here to save a database query.
+        * Do not reuse this parser instance after calling getUserSig(),
+        * as it may have changed if it's the $wgParser.
          *
          * @param $user User
          * @param $nickname String: nickname to use or false to use user's default nickname
@@ -4273,9 +4269,12 @@ class Parser {
          * Set up some variables which are usually set up in parse()
          * so that an external function can call some class members with confidence
          */
-       public function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
+       public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
+               $this->startParse( $title, $options, $outputType, $clearState );
+       }
+
+       private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
                 $this->setTitle( $title );
-               $options->resetUsage();
                 $this->mOptions = $options;
                 $this->setOutputType( $outputType );
                 if ( $clearState ) {
@@ -4288,10 +4287,10 @@ class Parser {
          *
          * @param $text String: the text to preprocess
          * @param $options ParserOptions: options
+        * @param $title Title object or null to use $wgTitle
          * @return String
          */
-       public function transformMsg( $text, $options ) {
-               global $wgTitle;
+       public function transformMsg( $text, $options, $title = null ) {
                 static $executing = false;
  
                 # Guard against infinite recursion
@@ -4301,7 +4300,16 @@ class Parser {
                 $executing = true;
  
                 wfProfileIn( __METHOD__ );
-               $text = $this->preprocess( $text, $wgTitle, $options );
+               if ( !$title ) {
+                       global $wgTitle;
+                       $title = $wgTitle;
+               }
+               if ( !$title ) {
+                       # It's not uncommon having a null $wgTitle in scripts. See r80898
+                       # Create a ghost title in such case
+                       $title = Title::newFromText( 'Dwimmerlaik' );
+               }
+               $text = $this->preprocess( $text, $title, $options );
  
                 $executing = false;
                 wfProfileOut( __METHOD__ );
@@ -4322,6 +4330,7 @@ class Parser {
          */
         public function setHook( $tag, $callback ) {
                 $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
                 $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
                 $this->mTagHooks[$tag] = $callback;
                 if ( !in_array( $tag, $this->mStripList ) ) {
@@ -4333,6 +4342,7 @@ class Parser {
  
         function setTransparentTagHook( $tag, $callback ) {
                 $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
                 $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
                 $this->mTransparentTagHooks[$tag] = $callback;
  
@@ -4437,6 +4447,7 @@ class Parser {
          */
         function setFunctionTagHook( $tag, $callback, $flags ) {
                 $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
                 $old = isset( $this->mFunctionTagHooks[$tag] ) ?
                         $this->mFunctionTagHooks[$tag] : null;
                 $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
@@ -4526,7 +4537,7 @@ class Parser {
                         if ( strpos( $matches[0], '%' ) !== false ) {
                                 $matches[1] = rawurldecode( $matches[1] );
                         }
-                       $tp = Title::newFromText( $matches[1] );
+                       $tp = Title::newFromText( $matches[1], NS_FILE );
                         $nt =& $tp;
                         if ( is_null( $nt ) ) {
                                 # Bogus title. Ignore these so we don't bomb out later.
@@ -4851,6 +4862,30 @@ class Parser {
                 return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) );
         }
  
+       /**
+        * Replace transparent tags in $text with the values given by the callbacks.
+        *
+        * Transparent tag hooks are like regular XML-style tag hooks, except they
+        * operate late in the transformation sequence, on HTML instead of wikitext.
+        */
+       function replaceTransparentTags( $text ) {
+               $matches = array();
+               $elements = array_keys( $this->mTransparentTagHooks );
+               $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
+
+               foreach ( $matches as $marker => $data ) {
+                       list( $element, $content, $params, $tag ) = $data;
+                       $tagName = strtolower( $element );
+                       if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+                               $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+                       } else {
+                               $output = $tag;
+                       }
+                       $this->mStripState->addGeneral( $marker, $output );
+               }
+               return $text;
+       }
+
         /**
          * Break wikitext input into sections, and either pull or replace
          * some particular section's text.
@@ -4877,11 +4912,8 @@ class Parser {
          *                 for "replace", the whole page with the section replaced.
          */
         private function extractSections( $text, $section, $mode, $newText='' ) {
-               global $wgTitle;
-               $this->mOptions = new ParserOptions;
-               $this->clearState();
-               $this->setTitle( $wgTitle ); # not generally used but removes an ugly failure mode
-               $this->setOutputType( self::OT_PLAIN );
+               global $wgTitle; # not generally used but removes an ugly failure mode
+               $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
                 $outText = '';
                 $frame = $this->getPreprocessor()->newFrame();
  
@@ -4986,9 +5018,9 @@ class Parser {
         }
  
         /**
-        * This function returns $oldtext after the content of the section 
+        * This function returns $oldtext after the content of the section
          * specified by $section has been replaced with $text.
-        * 
+        *
          * @param $text String: former text of the article
          * @param $section Numeric: section identifier
          * @param $text String: replacing text
@@ -5084,7 +5116,11 @@ class Parser {
  
         /**
          * Accessor for $mDefaultSort
-        * Will use the title/prefixed title if none is set
+        * Will use the empty string if none is set.
+        *
+        * This value is treated as a prefix, so the
+        * empty string is equivalent to sorting by
+        * page name.
          *
          * @return string
          */
@@ -5092,7 +5128,7 @@ class Parser {
                 if ( $this->mDefaultSort !== false ) {
                         return $this->mDefaultSort;
                 } else {
-                       return $this->mTitle->getCategorySortkey();
+                       return '';
                 }
         }
  
@@ -5168,15 +5204,12 @@ class Parser {
         /**
          * strip/replaceVariables/unstrip for preprocessor regression testing
          */
-       function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) {
-               $this->mOptions = $options;
-               $this->clearState();
+       function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) {
                 if ( !$title instanceof Title ) {
                         $title = Title::newFromText( $title );
                 }
-               $this->mTitle = $title;
-               $options->resetUsage();
-               $this->setOutputType( $outputType );
+               $this->startParse( $title, $options, $outputType, true );
+
                 $text = $this->replaceVariables( $text );
                 $text = $this->mStripState->unstripBoth( $text );
                 $text = Sanitizer::removeHTMLtags( $text );
@@ -5198,6 +5231,17 @@ class Parser {
                 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
         }
  
+       /**
+        * Call a callback function on all regions of the given text that are not
+        * inside strip markers, and replace those regions with the return value
+        * of the callback. For example, with input:
+        *
+        *  aaa<MARKER>bbb
+        *
+        * This will call the callback function twice, with 'aaa' and 'bbb'. Those
+        * two strings will be replaced with the value returned by the callback in
+        * each case.
+        */
         function markerSkipCallback( $s, $callback ) {
                 $i = 0;
                 $out = '';
@@ -5222,168 +5266,68 @@ class Parser {
                 return $out;
         }
  
-       function serialiseHalfParsedText( $text ) {
-               $data = array();
-               $data['text'] = $text;
-
-               # First, find all strip markers, and store their
-               #  data in an array.
-               $stripState = new StripState;
-               $pos = 0;
-               while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) )
-                       && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) )
-               {
-                       $end_pos += strlen( self::MARKER_SUFFIX );
-                       $marker = substr( $text, $start_pos, $end_pos-$start_pos );
-
-                       if ( !empty( $this->mStripState->general->data[$marker] ) ) {
-                               $replaceArray = $stripState->general;
-                               $stripText = $this->mStripState->general->data[$marker];
-                       } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
-                               $replaceArray = $stripState->nowiki;
-                               $stripText = $this->mStripState->nowiki->data[$marker];
-                       } else {
-                               throw new MWException( "Hanging strip marker: '$marker'." );
-                       }
-
-                       $replaceArray->setPair( $marker, $stripText );
-                       $pos = $end_pos;
-               }
-               $data['stripstate'] = $stripState;
-
-               # Now, find all of our links, and store THEIR
-               #  data in an array! :)
-               $links = array( 'internal' => array(), 'interwiki' => array() );
-               $pos = 0;
-
-               # Internal links
-               while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
-                       list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
-
-                       $ns = trim( $ns );
-                       if ( empty( $links['internal'][$ns] ) ) {
-                               $links['internal'][$ns] = array();
-                       }
-
-                       $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
-                       $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
-                       $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
-               }
-
-               $pos = 0;
-
-               # Interwiki links
-               while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
-                       $data = substr( $text, $start_pos );
-                       $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
-                       $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
-                       $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
-               }
-
-               $data['linkholder'] = $links;
-
+       /**
+        * Save the parser state required to convert the given half-parsed text to
+        * HTML. "Half-parsed" in this context means the output of
+        * recursiveTagParse() or internalParse(). This output has strip markers
+        * from replaceVariables (extensionSubstitution() etc.), and link
+        * placeholders from replaceLinkHolders().
+        *
+        * Returns an array which can be serialized and stored persistently. This
+        * array can later be loaded into another parser instance with
+        * unserializeHalfParsedText(). The text can then be safely incorporated into
+        * the return value of a parser hook.
+        */
+       function serializeHalfParsedText( $text ) {
+               wfProfileIn( __METHOD__ );
+               $data = array(
+                       'text' => $text,
+                       'version' => self::HALF_PARSED_VERSION,
+                       'stripState' => $this->mStripState->getSubState( $text ),
+                       'linkHolders' => $this->mLinkHolders->getSubArray( $text )
+               );
+               wfProfileOut( __METHOD__ );
                 return $data;
         }
  
         /**
-        * TODO: document
-        * @param $data Array
-        * @param $intPrefix String unique identifying prefix
+        * Load the parser state given in the $data array, which is assumed to
+        * have been generated by serializeHalfParsedText(). The text contents is
+        * extracted from the array, and its markers are transformed into markers
+        * appropriate for the current Parser instance. This transformed text is
+        * returned, and can be safely included in the return value of a parser
+        * hook.
+        *
+        * If the $data array has been stored persistently, the caller should first
+        * check whether it is still valid, by calling isValidHalfParsedText().
+        *
+        * @param $data Serialized data
          * @return String
          */
-       function unserialiseHalfParsedText( $data, $intPrefix = null ) {
-               if ( !$intPrefix ) {
-                       $intPrefix = self::getRandomString();
+       function unserializeHalfParsedText( $data ) {
+               if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
+                       throw new MWException( __METHOD__.': invalid version' );
                 }
  
                 # First, extract the strip state.
-               $stripState = $data['stripstate'];
-               $this->mStripState->general->merge( $stripState->general );
-               $this->mStripState->nowiki->merge( $stripState->nowiki );
-
-               # Now, extract the text, and renumber links
-               $text = $data['text'];
-               $links = $data['linkholder'];
+               $texts = array( $data['text'] );
+               $texts = $this->mStripState->merge( $data['stripState'], $texts );
  
-               # Internal...
-               foreach ( $links['internal'] as $ns => $nsLinks ) {
-                       foreach ( $nsLinks as $key => $entry ) {
-                               $newKey = $intPrefix . '-' . $key;
-                               $this->mLinkHolders->internals[$ns][$newKey] = $entry;
-
-                               $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
-                       }
-               }
-
-               # Interwiki...
-               foreach ( $links['interwiki'] as $key => $entry ) {
-                       $newKey = "$intPrefix-$key";
-                       $this->mLinkHolders->interwikis[$newKey] = $entry;
-
-                       $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
-               }
+               # Now renumber links
+               $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
  
                 # Should be good to go.
-               return $text;
-       }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class StripState {
-       var $general, $nowiki;
-
-       function __construct() {
-               $this->general = new ReplacementArray;
-               $this->nowiki = new ReplacementArray;
-       }
-
-       function unstripGeneral( $text ) {
-               wfProfileIn( __METHOD__ );
-               do {
-                       $oldText = $text;
-                       $text = $this->general->replace( $text );
-               } while ( $text !== $oldText );
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       function unstripNoWiki( $text ) {
-               wfProfileIn( __METHOD__ );
-               do {
-                       $oldText = $text;
-                       $text = $this->nowiki->replace( $text );
-               } while ( $text !== $oldText );
-               wfProfileOut( __METHOD__ );
-               return $text;
-       }
-
-       function unstripBoth( $text ) {
-               wfProfileIn( __METHOD__ );
-               do {
-                       $oldText = $text;
-                       $text = $this->general->replace( $text );
-                       $text = $this->nowiki->replace( $text );
-               } while ( $text !== $oldText );
-               wfProfileOut( __METHOD__ );
-               return $text;
+               return $texts[0];
         }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class OnlyIncludeReplacer {
-       var $output = '';
  
-       function replace( $matches ) {
-               if ( substr( $matches[1], -1 ) === "\n" ) {
-                       $this->output .= substr( $matches[1], 0, -1 );
-               } else {
-                       $this->output .= $matches[1];
-               }
+       /**
+        * Returns true if the given array, presumed to be generated by
+        * serializeHalfParsedText(), is compatible with the current version of the
+        * parser.
+        *
+        * @param $data Array.
+        */
+       function isValidHalfParsedText( $data ) {
+               return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
         }
  }