*/
const VERSION = '1.6.4';
+ /**
+ * Update this version number when the output of serialiseHalfParsedText()
+ * changes in an incompatible way
+ */
+ const HALF_PARSED_VERSION = 2;
+
# Flags for Parser::setFunctionHook
# Also available as global constants from Defines.php
const SFH_NO_HASH = 1;
var $mVariables, $mSubstWords; # Initialised by initialiseVariables()
var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
-
# Cleared with clearState():
- var $mOutput, $mAutonumber, $mDTopen, $mStripState;
+ var $mOutput, $mAutonumber, $mDTopen;
+
+ /**
+ * @var StripState
+ */
+ var $mStripState;
+
var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
var $mLinkHolders, $mLinkID;
var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
var $mTplExpandCache; # empty-frame expansion cache
var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
var $mExpensiveFunctionCount; # number of expensive parser function calls
+
+ /**
+ * @var User
+ */
var $mUser; # User object; only used when doing pre-save transform
# Temporary
* @var ParserOptions
*/
var $mOptions;
+
+ /**
+ * @var Title
+ */
var $mTitle; # Title context, used for self-link rendering and similar things
var $mOutputType; # Output type, one of the OT_xxx constants
var $ot; # Shortcut alias, see setOutputType()
$this->mLastSection = '';
$this->mDTopen = false;
$this->mIncludeCount = array();
- $this->mStripState = new StripState;
$this->mArgStack = false;
$this->mInPre = false;
$this->mLinkHolders = new LinkHolderArray( $this );
# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Changed to \x7f to allow XML double-parsing -- TS
$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+ $this->mStripState = new StripState( $this->mUniqPrefix );
# Clear these on every parse, bug 4549
wfProfileIn( __METHOD__ );
wfProfileIn( $fname );
- $this->startExternalParse( $title, $options, self::OT_HTML, $clearState );
+ $this->startParse( $title, $options, self::OT_HTML, $clearState );
$oldRevisionId = $this->mRevisionId;
$oldRevisionObject = $this->mRevisionObject;
/**
* A converted title will be provided in the output object if title and
- * content conversion are enabled, the article text does not contain
- * a conversion-suppressing double-underscore tag, and no
+ * content conversion are enabled, the article text does not contain
+ * a conversion-suppressing double-underscore tag, and no
* {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
* automatic link conversion.
*/
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
-//!JF Move to its own function
-
- $uniq_prefix = $this->mUniqPrefix;
- $matches = array();
- $elements = array_keys( $this->mTransparentTagHooks );
- $text = $this->extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
- foreach ( $matches as $marker => $data ) {
- list( $element, $content, $params, $tag ) = $data;
- $tagName = strtolower( $element );
- if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
- } else {
- $output = $tag;
- }
- $this->mStripState->general->setPair( $marker, $output );
- }
+ $text = $this->replaceTransparentTags( $text );
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
*/
function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
wfProfileIn( __METHOD__ );
- $this->startExternalParse( $title, $options, self::OT_PREPROCESS, true );
+ $this->startParse( $title, $options, self::OT_PREPROCESS, true );
if ( $revid !== null ) {
$this->mRevisionId = $revid;
}
*/
public function getPreloadText( $text, Title $title, ParserOptions $options ) {
# Parser (re)initialisation
- $this->startExternalParse( $title, $options, self::OT_PLAIN, true );
+ $this->startParse( $title, $options, self::OT_PLAIN, true );
$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
- return $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+ $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+ $text = $this->mStripState->unstripBoth( $text );
+ return $text;
}
/**
* Get a random string
*
- * @private
* @static
*/
- static private function getRandomString() {
+ static public function getRandomString() {
return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
}
return $this->mLinkID++;
}
+ function setLinkID( $id ) {
+ $this->mLinkID = $id;
+ }
+
/**
* @return Language
*/
function insertStripItem( $text ) {
$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
$this->mMarkerIndex++;
- $this->mStripState->general->setPair( $rnd, $text );
+ $this->mStripState->addGeneral( $rnd, $text );
return $rnd;
}
if ( $markerType === 'none' ) {
return $output;
} elseif ( $markerType === 'nowiki' ) {
- $this->mStripState->nowiki->setPair( $marker, $output );
+ $this->mStripState->addNoWiki( $marker, $output );
} elseif ( $markerType === 'general' ) {
- $this->mStripState->general->setPair( $marker, $output );
+ $this->mStripState->addGeneral( $marker, $output );
} else {
throw new MWException( __METHOD__.': invalid marker type' );
}
function formatHeadings( $text, $origText, $isMain=true ) {
global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds;
- $doNumberHeadings = $this->mOptions->getNumberHeadings();
-
# Inhibit editsection links if requested in the page
if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
$showEditLink = 0;
}
# Don't number the heading if it is the only one (looks silly)
- if ( $doNumberHeadings && count( $matches[3] ) > 1) {
+ if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
# the two are different if the line contains a link
$headline = $numbering . ' ' . $headline;
}
if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
$toc .= $sk->tocUnindent( $prevtoclevel - 1 );
}
- $toc = $sk->tocList( $toc );
+ $toc = $sk->tocList( $toc, $this->mOptions->getUserLang() );
$this->mOutput->setTOCHTML( $toc );
}
* @return String: the altered wiki markup
*/
public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) {
- $this->startExternalParse( $title, $options, self::OT_WIKI, $clearState );
+ $this->startParse( $title, $options, self::OT_WIKI, $clearState );
$this->setUser( $user );
$pairs = array(
# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
$text = $this->replaceVariables( $text );
- # This works almost by chance, as the replaceVariables are done before the getUserSig(),
+ # This works almost by chance, as the replaceVariables are done before the getUserSig(),
# which may corrupt this parser instance via its wfMsgExt( parsemag ) call-
# Signatures
* so that an external function can call some class members with confidence
*/
public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
+ $this->startParse( $title, $options, $outputType, $clearState );
+ }
+
+ private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
$this->setTitle( $title );
$this->mOptions = $options;
$this->setOutputType( $outputType );
return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) );
}
+ /**
+ * Replace transparent tags in $text with the values given by the callbacks.
+ *
+ * Transparent tag hooks are like regular XML-style tag hooks, except they
+ * operate late in the transformation sequence, on HTML instead of wikitext.
+ */
+ function replaceTransparentTags( $text ) {
+ $matches = array();
+ $elements = array_keys( $this->mTransparentTagHooks );
+ $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
+
+ foreach ( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ $tagName = strtolower( $element );
+ if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+ } else {
+ $output = $tag;
+ }
+ $this->mStripState->addGeneral( $marker, $output );
+ }
+ return $text;
+ }
+
/**
* Break wikitext input into sections, and either pull or replace
* some particular section's text.
*/
private function extractSections( $text, $section, $mode, $newText='' ) {
global $wgTitle; # not generally used but removes an ugly failure mode
- $this->startExternalParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
+ $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
$outText = '';
$frame = $this->getPreprocessor()->newFrame();
}
/**
- * This function returns $oldtext after the content of the section
+ * This function returns $oldtext after the content of the section
* specified by $section has been replaced with $text.
- *
+ *
* @param $text String: former text of the article
* @param $section Numeric: section identifier
* @param $text String: replacing text
if ( !$title instanceof Title ) {
$title = Title::newFromText( $title );
}
- $this->startExternalParse( $title, $options, $outputType, true );
+ $this->startParse( $title, $options, $outputType, true );
$text = $this->replaceVariables( $text );
$text = $this->mStripState->unstripBoth( $text );
return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
}
+ /**
+ * Call a callback function on all regions of the given text that are not
+ * inside strip markers, and replace those regions with the return value
+ * of the callback. For example, with input:
+ *
+ * aaa<MARKER>bbb
+ *
+ * This will call the callback function twice, with 'aaa' and 'bbb'. Those
+ * two strings will be replaced with the value returned by the callback in
+ * each case.
+ */
function markerSkipCallback( $s, $callback ) {
$i = 0;
$out = '';
return $out;
}
- function serialiseHalfParsedText( $text ) {
- $data = array();
- $data['text'] = $text;
-
- # First, find all strip markers, and store their
- # data in an array.
- $stripState = new StripState;
- $pos = 0;
- while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) )
- && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) )
- {
- $end_pos += strlen( self::MARKER_SUFFIX );
- $marker = substr( $text, $start_pos, $end_pos-$start_pos );
-
- if ( !empty( $this->mStripState->general->data[$marker] ) ) {
- $replaceArray = $stripState->general;
- $stripText = $this->mStripState->general->data[$marker];
- } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
- $replaceArray = $stripState->nowiki;
- $stripText = $this->mStripState->nowiki->data[$marker];
- } else {
- throw new MWException( "Hanging strip marker: '$marker'." );
- }
-
- $replaceArray->setPair( $marker, $stripText );
- $pos = $end_pos;
- }
- $data['stripstate'] = $stripState;
-
- # Now, find all of our links, and store THEIR
- # data in an array! :)
- $links = array( 'internal' => array(), 'interwiki' => array() );
- $pos = 0;
-
- # Internal links
- while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
- list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
-
- $ns = trim( $ns );
- if ( empty( $links['internal'][$ns] ) ) {
- $links['internal'][$ns] = array();
- }
-
- $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
- $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
- $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
- }
-
- $pos = 0;
-
- # Interwiki links
- while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
- $data = substr( $text, $start_pos );
- $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
- $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
- $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
- }
-
- $data['linkholder'] = $links;
-
+ /**
+ * Save the parser state required to convert the given half-parsed text to
+ * HTML. "Half-parsed" in this context means the output of
+ * recursiveTagParse() or internalParse(). This output has strip markers
+ * from replaceVariables (extensionSubstitution() etc.), and link
+ * placeholders from replaceLinkHolders().
+ *
+ * Returns an array which can be serialized and stored persistently. This
+ * array can later be loaded into another parser instance with
+ * unserializeHalfParsedText(). The text can then be safely incorporated into
+ * the return value of a parser hook.
+ */
+ function serializeHalfParsedText( $text ) {
+ wfProfileIn( __METHOD__ );
+ $data = array(
+ 'text' => $text,
+ 'version' => self::HALF_PARSED_VERSION,
+ 'stripState' => $this->mStripState->getSubState( $text ),
+ 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
+ );
+ wfProfileOut( __METHOD__ );
return $data;
}
/**
- * TODO: document
- * @param $data Array
- * @param $intPrefix String unique identifying prefix
+ * Load the parser state given in the $data array, which is assumed to
+ * have been generated by serializeHalfParsedText(). The text contents is
+ * extracted from the array, and its markers are transformed into markers
+ * appropriate for the current Parser instance. This transformed text is
+ * returned, and can be safely included in the return value of a parser
+ * hook.
+ *
+ * If the $data array has been stored persistently, the caller should first
+ * check whether it is still valid, by calling isValidHalfParsedText().
+ *
+ * @param $data Serialized data
* @return String
*/
- function unserialiseHalfParsedText( $data, $intPrefix = null ) {
- if ( !$intPrefix ) {
- $intPrefix = self::getRandomString();
+ function unserializeHalfParsedText( $data ) {
+ if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
+ throw new MWException( __METHOD__.': invalid version' );
}
# First, extract the strip state.
- $stripState = $data['stripstate'];
- $this->mStripState->general->merge( $stripState->general );
- $this->mStripState->nowiki->merge( $stripState->nowiki );
-
- # Now, extract the text, and renumber links
- $text = $data['text'];
- $links = $data['linkholder'];
-
- # Internal...
- foreach ( $links['internal'] as $ns => $nsLinks ) {
- foreach ( $nsLinks as $key => $entry ) {
- $newKey = $intPrefix . '-' . $key;
- $this->mLinkHolders->internals[$ns][$newKey] = $entry;
-
- $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
- }
- }
+ $texts = array( $data['text'] );
+ $texts = $this->mStripState->merge( $data['stripState'], $texts );
- # Interwiki...
- foreach ( $links['interwiki'] as $key => $entry ) {
- $newKey = "$intPrefix-$key";
- $this->mLinkHolders->interwikis[$newKey] = $entry;
-
- $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
- }
+ # Now renumber links
+ $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
# Should be good to go.
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class StripState {
- var $general, $nowiki;
-
- function __construct() {
- $this->general = new ReplacementArray;
- $this->nowiki = new ReplacementArray;
- }
-
- function unstripGeneral( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripNoWiki( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
+ return $texts[0];
}
- function unstripBoth( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class OnlyIncludeReplacer {
- var $output = '';
-
- function replace( $matches ) {
- if ( substr( $matches[1], -1 ) === "\n" ) {
- $this->output .= substr( $matches[1], 0, -1 );
- } else {
- $this->output .= $matches[1];
- }
+ /**
+ * Returns true if the given array, presumed to be generated by
+ * serializeHalfParsedText(), is compatible with the current version of the
+ * parser.
+ *
+ * @param $data Array.
+ */
+ function isValidHalfParsedText( $data ) {
+ return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
}
}