* produces altered wiki markup.
* preprocess()
* removes HTML comments and expands templates
- * cleanSig()
+ * cleanSig() / cleanSigInSig()
* Cleans a signature before saving it to preferences
- * extractSections()
- * Extracts sections from an article for section editing
+ * getSection()
+ * Return the content of a section from an article for section editing
+ * replaceSection()
+ * Replaces a section by number inside an article
* getPreloadText()
* Removes <noinclude> sections, and <includeonly> tags.
*
*/
const VERSION = '1.6.4';
+ /**
+ * Update this version number when the output of serialiseHalfParsedText()
+ * changes in an incompatible way
+ */
+ const HALF_PARSED_VERSION = 2;
+
# Flags for Parser::setFunctionHook
# Also available as global constants from Defines.php
const SFH_NO_HASH = 1;
const MARKER_SUFFIX = "-QINU\x7f";
# Persistent:
- var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables;
- var $mSubstWords, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex;
- var $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList;
- var $mVarCache, $mConf, $mFunctionTagHooks;
-
+ var $mTagHooks = array();
+ var $mTransparentTagHooks = array();
+ var $mFunctionHooks = array();
+ var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
+ var $mFunctionTagHooks = array();
+ var $mStripList = array();
+ var $mDefaultStripList = array();
+ var $mVarCache = array();
+ var $mImageParams = array();
+ var $mImageParamsMagicArray = array();
+ var $mMarkerIndex = 0;
+ var $mFirstCall = true;
+ var $mVariables, $mSubstWords; # Initialised by initialiseVariables()
+ var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
# Cleared with clearState():
- var $mOutput, $mAutonumber, $mDTopen, $mStripState;
+ var $mOutput, $mAutonumber, $mDTopen;
+
+ /**
+ * @var StripState
+ */
+ var $mStripState;
+
var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
var $mLinkHolders, $mLinkID;
var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
var $mExpensiveFunctionCount; # number of expensive parser function calls
+ /**
+ * @var User
+ */
+ var $mUser; # User object; only used when doing pre-save transform
+
# Temporary
# These are variables reset at least once per parse regardless of $clearState
- var $mOptions; # ParserOptions object
+
+ /**
+ * @var ParserOptions
+ */
+ var $mOptions;
+
+ /**
+ * @var Title
+ */
var $mTitle; # Title context, used for self-link rendering and similar things
var $mOutputType; # Output type, one of the OT_xxx constants
var $ot; # Shortcut alias, see setOutputType()
+ var $mRevisionObject; # The revision object of the specified revision ID
var $mRevisionId; # ID to display in {{REVISIONID}} tags
var $mRevisionTimestamp; # The timestamp of the specified revision ID
+ var $mRevisionUser; # Userto display in {{REVISIONUSER}} tag
var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
/**
*/
function __construct( $conf = array() ) {
$this->mConf = $conf;
- $this->mTagHooks = array();
- $this->mTransparentTagHooks = array();
- $this->mFunctionHooks = array();
- $this->mFunctionTagHooks = array();
- $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
- $this->mDefaultStripList = $this->mStripList = array();
$this->mUrlProtocols = wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S';
- $this->mVarCache = array();
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
} elseif ( extension_loaded( 'domxml' ) ) {
} else {
$this->mPreprocessorClass = 'Preprocessor_Hash';
}
- $this->mMarkerIndex = 0;
- $this->mFirstCall = true;
}
/**
$this->firstCallInit();
}
$this->mOutput = new ParserOutput;
+ $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
$this->mAutonumber = 0;
$this->mLastSection = '';
$this->mDTopen = false;
$this->mIncludeCount = array();
- $this->mStripState = new StripState;
$this->mArgStack = false;
$this->mInPre = false;
$this->mLinkHolders = new LinkHolderArray( $this );
$this->mLinkID = 0;
- $this->mRevisionTimestamp = $this->mRevisionId = null;
+ $this->mRevisionObject = $this->mRevisionTimestamp =
+ $this->mRevisionId = $this->mRevisionUser = null;
$this->mVarCache = array();
+ $this->mUser = null;
/**
* Prefix for temporary replacement strings for the multipass parser.
# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Changed to \x7f to allow XML double-parsing -- TS
$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+ $this->mStripState = new StripState( $this->mUniqPrefix );
# Clear these on every parse, bug 4549
wfProfileOut( __METHOD__ );
}
- function setOutputType( $ot ) {
- $this->mOutputType = $ot;
- # Shortcut alias
- $this->ot = array(
- 'html' => $ot == self::OT_HTML,
- 'wiki' => $ot == self::OT_WIKI,
- 'pre' => $ot == self::OT_PREPROCESS,
- 'plain' => $ot == self::OT_PLAIN,
- );
- }
-
- /**
- * Set the context title
- */
- function setTitle( $t ) {
- if ( !$t || $t instanceof FakeTitle ) {
- $t = Title::newFromText( 'NO TITLE' );
- }
-
- if ( strval( $t->getFragment() ) !== '' ) {
- # Strip the fragment to avoid various odd effects
- $this->mTitle = clone $t;
- $this->mTitle->setFragment( '' );
- } else {
- $this->mTitle = $t;
- }
- }
-
- /**
- * Accessor for mUniqPrefix.
- *
- * @public
- */
- function uniqPrefix() {
- if ( !isset( $this->mUniqPrefix ) ) {
- # @todo Fixme: this is probably *horribly wrong*
- # LanguageConverter seems to want $wgParser's uniqPrefix, however
- # if this is called for a parser cache hit, the parser may not
- # have ever been initialized in the first place.
- # Not really sure what the heck is supposed to be going on here.
- return '';
- # throw new MWException( "Accessing uninitialized mUniqPrefix" );
- }
- return $this->mUniqPrefix;
- }
-
/**
* Convert wikitext to HTML
* Do not call this function recursively.
wfProfileIn( __METHOD__ );
wfProfileIn( $fname );
- if ( $clearState ) {
- $this->clearState();
- }
-
- $this->mOptions = $options;
- $this->setTitle( $title ); # Page title has to be set for the pre-processor
+ $this->startParse( $title, $options, self::OT_HTML, $clearState );
$oldRevisionId = $this->mRevisionId;
+ $oldRevisionObject = $this->mRevisionObject;
$oldRevisionTimestamp = $this->mRevisionTimestamp;
+ $oldRevisionUser = $this->mRevisionUser;
if ( $revid !== null ) {
$this->mRevisionId = $revid;
+ $this->mRevisionObject = null;
$this->mRevisionTimestamp = null;
+ $this->mRevisionUser = null;
}
- $this->setOutputType( self::OT_HTML );
+
wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
# No more strip!
wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
}
/**
- * A page get its title converted except:
- * a) Language conversion is globally disabled
- * b) Title convert is globally disabled
- * c) The page is a redirect page
- * d) User request with a "linkconvert" set to "no"
- * e) A "nocontentconvert" magic word has been set
- * f) A "notitleconvert" magic word has been set
- * g) User sets "noconvertlink" in his/her preference
- *
- * Note that if a user tries to set a title in a conversion
- * rule but content conversion was not done, then the parser
- * won't pick it up. This is probably expected behavior.
+ * A converted title will be provided in the output object if title and
+ * content conversion are enabled, the article text does not contain
+ * a conversion-suppressing double-underscore tag, and no
+ * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
+ * automatic link conversion.
*/
if ( !( $wgDisableLangConversion
|| $wgDisableTitleConversion
|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
|| isset( $this->mDoubleUnderscores['notitleconvert'] )
- || $this->mOutput->getDisplayTitle() !== false ) )
+ || $this->mOutput->getDisplayTitle() !== false ) )
{
$convruletitle = $wgContLang->getConvRuleTitle();
if ( $convruletitle ) {
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
-//!JF Move to its own function
-
- $uniq_prefix = $this->mUniqPrefix;
- $matches = array();
- $elements = array_keys( $this->mTransparentTagHooks );
- $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
- foreach ( $matches as $marker => $data ) {
- list( $element, $content, $params, $tag ) = $data;
- $tagName = strtolower( $element );
- if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
- } else {
- $output = $tag;
- }
- $this->mStripState->general->setPair( $marker, $output );
- }
+ $text = $this->replaceTransparentTags( $text );
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
- if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) {
+ if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
$text = MWTidy::tidy( $text );
} else {
# attempt to sanitize at least some nesting problems
$PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
$limitReport =
"NewPP limit report\n" .
- "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
+ "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
$PFreport;
$this->mOutput->setText( $text );
$this->mRevisionId = $oldRevisionId;
+ $this->mRevisionObject = $oldRevisionObject;
$this->mRevisionTimestamp = $oldRevisionTimestamp;
+ $this->mRevisionUser = $oldRevisionUser;
wfProfileOut( $fname );
wfProfileOut( __METHOD__ );
* If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded
*
* @param $text String: text extension wants to have parsed
- * @param PPFrame $frame: The frame to use for expanding any template variables
+ * @param $frame PPFrame: The frame to use for expanding any template variables
*/
function recursiveTagParse( $text, $frame=false ) {
wfProfileIn( __METHOD__ );
* Expand templates and variables in the text, producing valid, static wikitext.
* Also removes comments.
*/
- function preprocess( $text, $title, $options, $revid = null ) {
+ function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
wfProfileIn( __METHOD__ );
- $this->clearState();
- $this->setOutputType( self::OT_PREPROCESS );
- $this->mOptions = $options;
- $this->setTitle( $title );
+ $this->startParse( $title, $options, self::OT_PREPROCESS, true );
if ( $revid !== null ) {
$this->mRevisionId = $revid;
}
* <noinclude>, <includeonly> etc. are parsed as for template transclusion,
* comments, templates, arguments, tags hooks and parser functions are untouched.
*/
- public function getPreloadText( $text, $title, $options ) {
+ public function getPreloadText( $text, Title $title, ParserOptions $options ) {
# Parser (re)initialisation
- $this->clearState();
- $this->setOutputType( self::OT_PLAIN );
- $this->mOptions = $options;
- $this->setTitle( $title );
+ $this->startParse( $title, $options, self::OT_PLAIN, true );
$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
- return $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+ $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+ $text = $this->mStripState->unstripBoth( $text );
+ return $text;
}
/**
* Get a random string
*
- * @private
* @static
*/
- function getRandomString() {
+ static public function getRandomString() {
return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
}
- function &getTitle() { return $this->mTitle; }
- function getOptions() { return $this->mOptions; }
- function getRevisionId() { return $this->mRevisionId; }
- function getOutput() { return $this->mOutput; }
- function nextLinkID() { return $this->mLinkID++; }
+ /**
+ * Set the current user.
+ * Should only be used when doing pre-save transform.
+ *
+ * @param $user Mixed: User object or null (to reset)
+ */
+ function setUser( $user ) {
+ $this->mUser = $user;
+ }
+ /**
+ * Accessor for mUniqPrefix.
+ *
+ * @return String
+ */
+ public function uniqPrefix() {
+ if ( !isset( $this->mUniqPrefix ) ) {
+ # @todo Fixme: this is probably *horribly wrong*
+ # LanguageConverter seems to want $wgParser's uniqPrefix, however
+ # if this is called for a parser cache hit, the parser may not
+ # have ever been initialized in the first place.
+ # Not really sure what the heck is supposed to be going on here.
+ return '';
+ # throw new MWException( "Accessing uninitialized mUniqPrefix" );
+ }
+ return $this->mUniqPrefix;
+ }
+
+ /**
+ * Set the context title
+ */
+ function setTitle( $t ) {
+ if ( !$t || $t instanceof FakeTitle ) {
+ $t = Title::newFromText( 'NO TITLE' );
+ }
+
+ if ( strval( $t->getFragment() ) !== '' ) {
+ # Strip the fragment to avoid various odd effects
+ $this->mTitle = clone $t;
+ $this->mTitle->setFragment( '' );
+ } else {
+ $this->mTitle = $t;
+ }
+ }
+
+ /**
+ * Accessor for the Title object
+ *
+ * @return Title object
+ */
+ function getTitle() {
+ return $this->mTitle;
+ }
+
+ /**
+ * Accessor/mutator for the Title object
+ *
+ * @param $x New Title object or null to just get the current one
+ * @return Title object
+ */
+ function Title( $x = null ) {
+ return wfSetVar( $this->mTitle, $x );
+ }
+
+ /**
+ * Set the output type
+ *
+ * @param $ot Integer: new value
+ */
+ function setOutputType( $ot ) {
+ $this->mOutputType = $ot;
+ # Shortcut alias
+ $this->ot = array(
+ 'html' => $ot == self::OT_HTML,
+ 'wiki' => $ot == self::OT_WIKI,
+ 'pre' => $ot == self::OT_PREPROCESS,
+ 'plain' => $ot == self::OT_PLAIN,
+ );
+ }
+
+ /**
+ * Accessor/mutator for the output type
+ *
+ * @param $x New value or null to just get the current one
+ * @return Integer
+ */
+ function OutputType( $x = null ) {
+ return wfSetVar( $this->mOutputType, $x );
+ }
+
+ /**
+ * Get the ParserOutput object
+ *
+ * @return ParserOutput object
+ */
+ function getOutput() {
+ return $this->mOutput;
+ }
+
+ /**
+ * Get the ParserOptions object
+ *
+ * @return ParserOptions object
+ */
+ function getOptions() {
+ return $this->mOptions;
+ }
+
+ /**
+ * Accessor/mutator for the ParserOptions object
+ *
+ * @param $x New value or null to just get the current one
+ * @return Current ParserOptions object
+ */
+ function Options( $x = null ) {
+ return wfSetVar( $this->mOptions, $x );
+ }
+
+ function nextLinkID() {
+ return $this->mLinkID++;
+ }
+
+ function setLinkID( $id ) {
+ $this->mLinkID = $id;
+ }
+
+ /**
+ * @return Language
+ */
function getFunctionLang() {
global $wgLang, $wgContLang;
}
}
+ /**
+ * Get a User object either from $this->mUser, if set, or from the
+ * ParserOptions object otherwise
+ *
+ * @return User object
+ */
+ function getUser() {
+ if ( !is_null( $this->mUser ) ) {
+ return $this->mUser;
+ }
+ return $this->mOptions->getUser();
+ }
+
/**
* Get a preprocessor object
+ *
+ * @return Preprocessor instance
*/
function getPreprocessor() {
if ( !isset( $this->mPreprocessor ) ) {
*
* @param $elements list of element names. Comments are always extracted.
* @param $text Source text string.
+ * @param $matches Out parameter, Array: extracted tags
* @param $uniq_prefix
+ * @return String: stripped text
*
- * @public
* @static
*/
- function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
+ public function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
static $n = 1;
$stripped = '';
$matches = array();
function insertStripItem( $text ) {
$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
$this->mMarkerIndex++;
- $this->mStripState->general->setPair( $rnd, $text );
+ $this->mStripState->addGeneral( $rnd, $text );
return $rnd;
}
*/
function doTableStuff( $text ) {
wfProfileIn( __METHOD__ );
-
+
$lines = StringUtils::explode( "\n", $text );
- $text = null;
$out = '';
$td_history = array(); # Is currently a td tag open?
$last_tag_history = array(); # Save history of last lag activated (td, th or caption)
foreach ( $lines as $outLine ) {
$line = trim( $outLine );
- if ( $line === '' ) { # empty line, go to next line
+ if ( $line === '' ) { # empty line, go to next line
$out .= $outLine."\n";
continue;
}
$last_tag = array_pop( $last_tag_history );
if ( array_pop( $td_history ) ) {
- $previous = "</{$last_tag}>{$previous}";
+ $previous = "</{$last_tag}>\n{$previous}";
}
if ( $first_character === '|' ) {
$df = DateFormatter::getInstance();
$text = $df->reformat( $this->mOptions->getDateFormat(), $text );
}
- $text = $this->doAllQuotes( $text );
$text = $this->replaceInternalLinks( $text );
+ $text = $this->doAllQuotes( $text );
$text = $this->replaceExternalLinks( $text );
# replaceInternalLinks may sometimes leave behind
(\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
(?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
ISBN\s+(\b # m[5]: ISBN, capture number
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
- [0-9Xx] # check digit
- \b)
+ (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
+ (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
+ [0-9Xx] # check digit
+ \b)
)!x', array( &$this, 'magicLinkCallback' ), $text );
wfProfileOut( __METHOD__ );
return $text;
return $this->makeFreeExternalLink( $m[0] );
} elseif ( isset( $m[4] ) && $m[4] !== '' ) {
# RFC or PMID
- $CssClass = '';
if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
$keyword = 'RFC';
$urlmsg = 'rfcurl';
throw new MWException( __METHOD__.': unrecognised match type "' .
substr( $m[0], 0, 20 ) . '"' );
}
- $url = wfMsg( $urlmsg, $id);
- $sk = $this->mOptions->getSkin();
- $la = $sk->getExternalLinkAttributes( "external $CssClass" );
- return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+ $url = wfMsgForContent( $urlmsg, $id );
+ $sk = $this->mOptions->getSkin( $this->mTitle );
+ return $sk->makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# ISBN
$isbn = $m[5];
global $wgContLang;
wfProfileIn( __METHOD__ );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$trail = '';
# The characters '<' and '>' (which were escaped by
# First, do some preliminary work. This may shift some apostrophes from
# being mark-up to being text. It also counts the number of occurrences
# of bold and italics mark-ups.
- $i = 0;
$numbold = 0;
$numitalics = 0;
- foreach ( $arr as $r ) {
+ for ( $i = 0; $i < count( $arr ); $i++ ) {
if ( ( $i % 2 ) == 1 ) {
# If there are ever four apostrophes, assume the first is supposed to
# be text, and the remaining three constitute mark-up for bold text.
$numbold++;
}
}
- $i++;
}
# If there is an odd number of both bold and italics, it is likely
/**
* Replace external links (REL)
*
- * Note: this is all very hackish and the order of execution matters a lot.
+ * Note: this is all very hackish and the order of execution matters a lot.
* Make sure to run maintenance/parserTests.php if you change this code.
*
* @private
global $wgContLang;
wfProfileIn( __METHOD__ );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
$s = array_shift( $bits );
* (depending on configuration, namespace, and the URL's domain) and/or a
* target attribute (depending on configuration).
*
- * @param string $url Optional URL, to extract the domain from for rel =>
+ * @param $url String: optional URL, to extract the domain from for rel =>
* nofollow if appropriate
- * @return array Associative array of HTML attributes
+ * @return Array: associative array of HTML attributes
*/
function getExternalLinkAttribs( $url = false ) {
$attribs = array();
/**
* Replace unusual URL escape codes with their equivalent characters
- * @param string
- * @return string
- * @static
+ *
+ * @param $url String
+ * @return String
+ *
* @todo This can merge genuinely required bits in the path or query string,
* breaking legit URLs. A proper fix would treat the various parts of
* the URL differently; as a workaround, just use the output for
/**
* Callback function used in replaceUnusualEscapes().
* Replaces unusual URL escape codes with their equivalent character
- * @static
- * @private
*/
private static function replaceUnusualEscapesCallback( $matches ) {
$char = urldecode( $matches[0] );
* @private
*/
function maybeMakeExternalImage( $url ) {
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
$imagesexception = !empty( $imagesfrom );
$text = false;
$imagematch = false;
}
if ( $this->mOptions->getAllowExternalImages()
- || ( $imagesexception && $imagematch ) ) {
+ || ( $imagesexception && $imagematch ) ) {
if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
# Image found
$text = $sk->makeExternalImage( $url );
/**
* Process [[ ]] wikilinks
- * @return processed text
+ * @return String: processed text
*
* @private
*/
$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
}
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$holders = new LinkHolderArray( $this );
- # split the entire text string on occurences of [[
+ # split the entire text string on occurences of [[
$a = StringUtils::explode( '[[', ' ' . $s );
# get the first element (all text up to first [[), and remove the space we added
$s = $a->current();
}
if ( $wgContLang->hasVariants() ) {
- $selflink = $wgContLang->convertLinkToAllVariants( $this->mTitle->getPrefixedText() );
+ $selflink = $wgContLang->autoConvertToAllVariants( $this->mTitle->getPrefixedText() );
} else {
$selflink = array( $this->mTitle->getPrefixedText() );
}
# fix up urlencoded title texts
if ( strpos( $m[1], '%' ) !== false ) {
# Should anchors '#' also be rejected?
- $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode( $m[1] ) );
+ $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) );
}
$trail = $m[3];
} elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption
$might_be_img = true;
$text = $m[2];
if ( strpos( $m[1], '%' ) !== false ) {
- $m[1] = urldecode( $m[1] );
+ $m[1] = rawurldecode( $m[1] );
}
$trail = "";
} else { # Invalid form; output directly
$wasblank = ( $text == '' );
if ( $wasblank ) {
$text = $link;
+ } else {
+ # Bug 4598 madness. Handle the quotes only if they come from the alternate part
+ # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
+ # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
+ # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
+ $text = $this->doQuotes( $text );
}
# Link not escaped by : , create the various objects
* Strip the whitespace Category links produce, see bug 87
* @todo We might want to use trim($tmp, "\n") here.
*/
- $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;
+ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
wfProfileOut( __METHOD__."-category" );
continue;
* breaking URLs in the following text without breaking trails on the
* wiki links, it's been made into a horrible function.
*
- * @param Title $nt
- * @param string $text
- * @param string $query
- * @param string $trail
- * @param string $prefix
- * @return string HTML-wikitext mix oh yuck
+ * @param $nt Title
+ * @param $text String
+ * @param $query String
+ * @param $trail String
+ * @param $prefix String
+ * @return String: HTML-wikitext mix oh yuck
*/
function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
list( $inside, $trail ) = Linker::splitTrail( $trail );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
# FIXME: use link() instead of deprecated makeKnownLinkObj()
$link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix );
return $this->armorLinks( $link ) . $trail;
* Not needed quite as much as it used to be since free links are a bit
* more sensible these days. But bracketed links are still an issue.
*
- * @param string more-or-less HTML
- * @return string less-or-more HTML with NOPARSE bits
+ * @param $text String: more-or-less HTML
+ * @return String: less-or-more HTML with NOPARSE bits
*/
function armorLinks( $text ) {
return preg_replace( '/\b(' . wfUrlProtocols() . ')/',
/**
* Return true if subpage links should be expanded on this page.
- * @return bool
+ * @return Boolean
*/
function areSubpagesAllowed() {
# Some namespaces don't allow subpages
/**
* Handle link to subpage if necessary
- * @param string $target the source of the link
- * @param string &$text the link text, modified as necessary
+ *
+ * @param $target String: the source of the link
+ * @param &$text String: the link text, modified as necessary
* @return string the full name of the link
* @private
*/
$this->mLastSection = '';
return $result;
}
+
/**
* getCommon() returns the length of the longest common substring
* of both arguments, starting at the beginning of both.
}
return $i;
}
+
/**
* These next three functions open, continue, and close the list
* element appropriate to the prefix character passed into them.
/**
* Make lists from lines starting with ':', '*', '#', etc. (DBL)
*
- * @param $linestart bool whether or not this is at the start of a line.
+ * @param $text String
+ * @param $linestart Boolean: whether or not this is at the start of a line.
* @private
* @return string the lists rendered as HTML
*/
if ( $preOpenMatch and !$preCloseMatch ) {
$this->mInPre = true;
}
- if ( $closematch ) {
- $inBlockElem = false;
- } else {
- $inBlockElem = true;
- }
+ $inBlockElem = !$closematch;
} elseif ( !$inBlockElem && !$this->mInPre ) {
if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
# pre
/**
* Split up a string on ':', ignoring any occurences inside tags
* to prevent illegal overlapping.
- * @param string $str the string to split
- * @param string &$before set to everything before the ':'
- * @param string &$after set to everything after the ':'
- * return string the position of the ':', or false if none found
+ *
+ * @param $str String: the string to split
+ * @param &$before String: set to everything before the ':'
+ * @param &$after String: set to everything after the ':'
+ * return String: the position of the ':', or false if none found
*/
function findColonNoLinks( $str, &$before, &$after ) {
wfProfileIn( __METHOD__ );
}
if ( $stack > 0 ) {
wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" );
+ wfProfileOut( __METHOD__ );
return false;
}
wfProfileOut( __METHOD__ );
* @private
*/
function getVariableValue( $index, $frame=false ) {
- global $wgContLang, $wgSitename, $wgServer, $wgServerName;
- global $wgScriptPath, $wgStylePath;
+ global $wgContLang, $wgSitename, $wgServer;
+ global $wgArticlePath, $wgScriptPath, $wgStylePath;
/**
* Some of these require message or data lookups and can be
$value = wfEscapeWikiText( $this->mTitle->getText() );
break;
case 'pagenamee':
- $value = $this->mTitle->getPartialURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
break;
case 'fullpagename':
$value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
break;
case 'fullpagenamee':
- $value = $this->mTitle->getPrefixedURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
break;
case 'subpagename':
$value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
break;
case 'subpagenamee':
- $value = $this->mTitle->getSubpageUrlForm();
+ $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
break;
case 'basepagename':
$value = wfEscapeWikiText( $this->mTitle->getBaseText() );
break;
case 'basepagenamee':
- $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+ $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
break;
case 'talkpagename':
if ( $this->mTitle->canTalk() ) {
case 'talkpagenamee':
if ( $this->mTitle->canTalk() ) {
$talkPage = $this->mTitle->getTalkPage();
- $value = $talkPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
} else {
$value = '';
}
break;
case 'subjectpagenamee':
$subjPage = $this->mTitle->getSubjectPage();
- $value = $subjPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
break;
case 'revisionid':
# Let the edit saving system know we should parse the page
case 'currentversion':
$value = SpecialVersion::getVersion();
break;
+ case 'articlepath':
+ return $wgArticlePath;
case 'sitename':
return $wgSitename;
case 'server':
return $wgServer;
case 'servername':
- return $wgServerName;
+ wfSuppressWarnings(); # May give an E_WARNING in PHP < 5.3.3
+ $serverName = parse_url( $wgServer, PHP_URL_HOST );
+ wfRestoreWarnings();
+ return $serverName ? $serverName : $wgServer;
case 'scriptpath':
return $wgScriptPath;
case 'stylepath':
case 'directionmark':
return $wgContLang->getDirMark();
case 'contentlanguage':
- global $wgContLanguageCode;
- return $wgContLanguageCode;
+ global $wgLanguageCode;
+ return $wgLanguageCode;
default:
$ret = null;
if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) ) ) {
* Preprocess some wikitext and return the document tree.
* This is the ghost of replace_variables().
*
- * @param string $text The text to parse
- * @param integer flags Bitwise combination of:
+ * @param $text String: The text to parse
+ * @param $flags Integer: bitwise combination of:
* self::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
* included. Default is to assume a direct page view.
*
* self::OT_PREPROCESS: templates but not extension tags
* self::OT_HTML: all templates and extension tags
*
- * @param string $tex The text to transform
- * @param PPFrame $frame Object describing the arguments passed to the template.
+ * @param $text String: the text to transform
+ * @param $frame PPFrame Object describing the arguments passed to the template.
* Arguments may also be provided as an associative array, as was the usual case before MW1.12.
* Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly.
- * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion
+ * @param $argsOnly Boolean: only do argument (triple-brace) expansion, not double-brace expansion
* @private
*/
function replaceVariables( $text, $frame = false, $argsOnly = false ) {
* Warn the user when a parser limitation is reached
* Will warn at most once the user per limitation type
*
- * @param string $limitationType, should be one of:
+ * @param $limitationType String: should be one of:
* 'expensive-parserfunction' (corresponding messages:
* 'expensive-parserfunction-warning',
* 'expensive-parserfunction-category')
* 'post-expand-template-inclusion' (corresponding messages:
* 'post-expand-template-inclusion-warning',
* 'post-expand-template-inclusion-category')
- * @params int $current, $max When an explicit limit has been
+ * @param $current Current value
+ * @param $max Maximum allowed, when an explicit limit has been
* exceeded, provide the values (optional)
*/
function limitationWarn( $limitationType, $current=null, $max=null) {
* Return the text of a template, after recursively
* replacing any variables or templates within the template.
*
- * @param array $piece The parts of the template
+ * @param $piece Array: the parts of the template
* $piece['title']: the title, i.e. the part before the |
* $piece['parts']: the parameter array
* $piece['lineStart']: whether the brace was at the start of a line
- * @param PPFrame The current frame, contains template arguments
- * @return string the text of the template
+ * @param $frame PPFrame The current frame, contains template arguments
+ * @return String: the text of the template
* @private
*/
function braceSubstitution( $piece, $frame ) {
$originalTitle = $part1;
# $args is a list of argument nodes, starting from index 0, not including $part1
+ # *** FIXME if piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object
$args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
wfProfileOut( __METHOD__.'-setup' );
if ( $id !== false ) {
$text = $this->getVariableValue( $id, $frame );
if ( MagicWord::getCacheTTL( $id ) > -1 ) {
- $this->mOutput->mContainsOldMagic = true;
+ $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
}
$found = true;
}
if ( !$title->equals( $cacheTitle ) ) {
$this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
- array( $title->getNamespace(),$cdb = $title->getDBkey() );
+ array( $title->getNamespace(), $cdb = $title->getDBkey() );
}
return array( $dom, $title );
* Fetch the unparsed text of a template and register a reference to it.
*/
function fetchTemplateAndTitle( $title ) {
- $templateCb = $this->mOptions->getTemplateCallback();
+ $templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate()
$stuff = call_user_func( $templateCb, $title, $this );
$text = $stuff['text'];
$finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
$text = $rev->getText();
} elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
global $wgContLang;
- $message = $wgContLang->lcfirst( $title->getText() );
- $text = wfMsgForContentNoTrans( $message );
- if ( wfEmptyMsg( $message, $text ) ) {
+ $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+ if ( !$message->exists() ) {
$text = false;
break;
}
+ $text = $message->plain();
} else {
break;
}
global $wgEnableScaryTranscluding;
if ( !$wgEnableScaryTranscluding ) {
- return wfMsg('scarytranscludedisabled');
+ return wfMsgForContent('scarytranscludedisabled');
}
$url = $title->getFullUrl( "action=$action" );
if ( strlen( $url ) > 255 ) {
- return wfMsg( 'scarytranscludetoolong' );
+ return wfMsgForContent( 'scarytranscludetoolong' );
}
return $this->fetchScaryTemplateMaybeFromCache( $url );
}
$text = Http::get( $url );
if ( !$text ) {
- return wfMsg( 'scarytranscludefailed', $url );
+ return wfMsgForContent( 'scarytranscludefailed', $url );
}
$dbw = wfGetDB( DB_MASTER );
$text = $frame->getArgument( $argName );
if ( $text === false && $parts->getLength() > 0
&& (
- $this->ot['html']
- || $this->ot['pre']
- || ( $this->ot['wiki'] && $frame->isTemplate() )
+ $this->ot['html']
+ || $this->ot['pre']
+ || ( $this->ot['wiki'] && $frame->isTemplate() )
)
) {
# No match in frame, use the supplied default
* Return the text to be used for a given extension tag.
* This is the ghost of strip().
*
- * @param array $params Associative array of parameters:
+ * @param $params Associative array of parameters:
* name PPNode for the tag name
* attr PPNode for unparsed text where tag attributes are thought to be
* attributes Optional associative array of parsed attributes
* inner Contents of extension element
* noClose Original text did not have a close tag
- * @param PPFrame $frame
+ * @param $frame PPFrame
*/
function extensionSubstitution( $params, $frame ) {
- global $wgRawHtml, $wgContLang;
-
$name = $frame->expand( $params['name'] );
$attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
$content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
if ( $markerType === 'none' ) {
return $output;
} elseif ( $markerType === 'nowiki' ) {
- $this->mStripState->nowiki->setPair( $marker, $output );
+ $this->mStripState->addNoWiki( $marker, $output );
} elseif ( $markerType === 'general' ) {
- $this->mStripState->general->setPair( $marker, $output );
+ $this->mStripState->addGeneral( $marker, $output );
} else {
throw new MWException( __METHOD__.': invalid marker type' );
}
/**
* Increment an include size counter
*
- * @param string $type The type of expansion
- * @param integer $size The size of the text
- * @return boolean False if this inclusion would take it over the maximum, true otherwise
+ * @param $type String: the type of expansion
+ * @param $size Integer: the size of the text
+ * @return Boolean: false if this inclusion would take it over the maximum, true otherwise
*/
function incrementIncludeSize( $type, $size ) {
- if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) {
+ if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
return false;
} else {
$this->mIncludeSizes[$type] += $size;
/**
* Increment the expensive function count
*
- * @return boolean False if the limit has been exceeded
+ * @return Boolean: false if the limit has been exceeded
*/
function incrementExpensiveFunctionCount() {
global $wgExpensiveParserFunctionLimit;
$this->mShowToc = false;
}
if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) {
- $this->mOutput->setProperty( 'hiddencat', 'y' );
$this->addTrackingCategory( 'hidden-category-category' );
}
# (bug 8068) Allow control over whether robots index a page.
$this->addTrackingCategory( 'index-category' );
}
+ # Cache all double underscores in the database
+ foreach ( $this->mDoubleUnderscores as $key => $val ) {
+ $this->mOutput->setProperty( $key, '' );
+ }
+
wfProfileOut( __METHOD__ );
return $text;
}
/**
* Add a tracking category, getting the title from a system message,
* or print a debug message if the title is invalid.
- * @param $msg String message key
- * @return Bool whether the addition was successful
+ *
+ * @param $msg String: message key
+ * @return Boolean: whether the addition was successful
*/
protected function addTrackingCategory( $msg ) {
$cat = wfMsgForContent( $msg );
* It loops through all headlines, collects the necessary data, then splits up the
* string and re-inserts the newly formatted headlines.
*
- * @param string $text
- * @param string $origText Original, untouched wikitext
- * @param boolean $isMain
+ * @param $text String
+ * @param $origText String: original, untouched wikitext
+ * @param $isMain Boolean
* @private
*/
function formatHeadings( $text, $origText, $isMain=true ) {
global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds;
- $doNumberHeadings = $this->mOptions->getNumberHeadings();
- $showEditLink = $this->mOptions->getEditSection();
-
- # Do not call quickUserCan unless necessary
- if ( $showEditLink && !$this->mTitle->quickUserCan( 'edit' ) ) {
- $showEditLink = 0;
- }
-
# Inhibit editsection links if requested in the page
- if ( isset( $this->mDoubleUnderscores['noeditsection'] ) || $this->mOptions->getIsPrintable() ) {
+ if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
$showEditLink = 0;
+ } else {
+ $showEditLink = $this->mOptions->getEditSection();
+ }
+ if ( $showEditLink ) {
+ $this->mOutput->setEditSectionTokens( true );
}
# Get all headlines for numbering them and adding funky stuff like [edit]
}
# We need this to perform operations on the HTML
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
# headline counter
$headlineCount = 0;
$head = array();
$sublevelCount = array();
$levelCount = array();
- $toclevel = 0;
$level = 0;
$prevlevel = 0;
$toclevel = 0;
$node = $root->getFirstChild();
$byteOffset = 0;
$tocraw = array();
+ $refers = array();
foreach ( $matches[3] as $headline ) {
$isTemplate = false;
if ( $toclevel ) {
$prevlevel = $level;
- $prevtoclevel = $toclevel;
}
$level = $matches[1][$headlineCount];
# For the anchor, strip out HTML-y stuff period
$safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
- $safeHeadline = preg_replace( '/[ _]+/', ' ', $safeHeadline );
- $safeHeadline = trim( $safeHeadline );
+ $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
# Save headline for section edit hint before it's escaped
$headlineHint = $safeHeadline;
'noninitial' );
}
- # HTML names must be case-insensitively unique (bug 10721). FIXME:
- # Does this apply to Unicode characters? Because we aren't
- # handling those here.
+ # HTML names must be case-insensitively unique (bug 10721).
+ # This does not apply to Unicode characters per
+ # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison
+ # FIXME: We may be changing them depending on the current locale.
$arrayKey = strtolower( $safeHeadline );
if ( $legacyHeadline === false ) {
$legacyArrayKey = false;
}
# Don't number the heading if it is the only one (looks silly)
- if ( $doNumberHeadings && count( $matches[3] ) > 1) {
+ if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
# the two are different if the line contains a link
$headline = $numbering . ' ' . $headline;
}
while ( $node && !$isTemplate ) {
if ( $node->getName() === 'h' ) {
$bits = $node->splitHeading();
- if ( $bits['i'] == $sectionIndex )
+ if ( $bits['i'] == $sectionIndex ) {
break;
+ }
}
$byteOffset += mb_strlen( $this->mStripState->unstripBoth(
$frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
# give headline the correct <h#> tag
if ( $showEditLink && $sectionIndex !== false ) {
+ // Output edit section links as markers with styles that can be customized by skins
if ( $isTemplate ) {
# Put a T flag in the section identifier, to indicate to extractSections()
# that sections inside <includeonly> should be counted.
- $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex" );
+ $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
} else {
- $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint );
+ $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
+ }
+ // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
+ // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
+ // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
+ // so we don't have to worry about a user trying to input one of these markers directly.
+ // We use a page and section attribute to stop the language converter from converting these important bits
+ // of data, but put the headline hint inside a content block because the language converter is supposed to
+ // be able to convert that piece of data.
+ $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]);
+ $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
+ if ( isset($editlinkArgs[2]) ) {
+ $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+ } else {
+ $editlink .= '/>';
}
} else {
$editlink = '';
if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
$toc .= $sk->tocUnindent( $prevtoclevel - 1 );
}
- $toc = $sk->tocList( $toc );
+ $toc = $sk->tocList( $toc, $this->mOptions->getUserLang() );
$this->mOutput->setTOCHTML( $toc );
}
}
}
- /**
- * Merge $tree2 into $tree1 by replacing the section with index
- * $section in $tree1 and its descendants with the sections in $tree2.
- * Note that in the returned section tree, only the 'index' and
- * 'byteoffset' fields are guaranteed to be correct.
- * @param $tree1 array Section tree from ParserOutput::getSectons()
- * @param $tree2 array Section tree
- * @param $section int Section index
- * @param $title Title Title both section trees come from
- * @param $len2 int Length of the original wikitext for $tree2
- * @return array Merged section tree
- */
- public static function mergeSectionTrees( $tree1, $tree2, $section, $title, $len2 ) {
- global $wgContLang;
- $newTree = array();
- $targetLevel = false;
- $merged = false;
- $lastLevel = 1;
- $nextIndex = 1;
- $numbering = array( 0 );
- $titletext = $title->getPrefixedDBkey();
- foreach ( $tree1 as $s ) {
- if ( $targetLevel !== false ) {
- if ( $s['level'] <= $targetLevel ) {
- # We've skipped enough
- $targetLevel = false;
- } else {
- continue;
- }
- }
- if ( $s['index'] != $section ||
- $s['fromtitle'] != $titletext ) {
- self::incrementNumbering( $numbering,
- $s['toclevel'], $lastLevel );
-
- # Rewrite index, byteoffset and number
- if ( $s['fromtitle'] == $titletext ) {
- $s['index'] = $nextIndex++;
- if ( $merged ) {
- $s['byteoffset'] += $len2;
- }
- }
- $s['number'] = implode( '.', array_map(
- array( $wgContLang, 'formatnum' ),
- $numbering ) );
- $lastLevel = $s['toclevel'];
- $newTree[] = $s;
- } else {
- # We're at $section
- # Insert sections from $tree2 here
- foreach ( $tree2 as $s2 ) {
- # Rewrite the fields in $s2
- # before inserting it
- $s2['toclevel'] += $s['toclevel'] - 1;
- $s2['level'] += $s['level'] - 1;
- $s2['index'] = $nextIndex++;
- $s2['byteoffset'] += $s['byteoffset'];
-
- self::incrementNumbering( $numbering,
- $s2['toclevel'], $lastLevel );
- $s2['number'] = implode( '.', array_map(
- array( $wgContLang, 'formatnum' ),
- $numbering ) );
- $lastLevel = $s2['toclevel'];
- $newTree[] = $s2;
- }
- # Skip all descendants of $section in $tree1
- $targetLevel = $s['level'];
- $merged = true;
- }
- }
- return $newTree;
- }
-
- /**
- * Increment a section number. Helper function for mergeSectionTrees()
- * @param $number array Array representing a section number
- * @param $level int Current TOC level (depth)
- * @param $lastLevel int Level of previous TOC entry
- */
- private static function incrementNumbering( &$number, $level, $lastLevel ) {
- if ( $level > $lastLevel ) {
- $number[$level - 1] = 1;
- } elseif ( $level < $lastLevel ) {
- foreach ( $number as $key => $unused )
- if ( $key >= $level ) {
- unset( $number[$key] );
- }
- $number[$level - 1]++;
- } else {
- $number[$level - 1]++;
- }
- }
-
/**
* Transform wiki markup when saving a page by doing \r\n -> \n
* conversion, substitting signatures, {{subst:}} templates, etc.
*
- * @param string $text the text to transform
- * @param Title &$title the Title object for the current article
- * @param User $user the User object describing the current user
- * @param ParserOptions $options parsing options
- * @param bool $clearState whether to clear the parser state first
- * @return string the altered wiki markup
- * @public
+ * @param $text String: the text to transform
+ * @param $title Title: the Title object for the current article
+ * @param $user User: the User object describing the current user
+ * @param $options ParserOptions: parsing options
+ * @param $clearState Boolean: whether to clear the parser state first
+ * @return String: the altered wiki markup
*/
- function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) {
- $this->mOptions = $options;
- $this->setTitle( $title );
- $this->setOutputType( self::OT_WIKI );
-
- if ( $clearState ) {
- $this->clearState();
- }
+ public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) {
+ $this->startParse( $title, $options, self::OT_WIKI, $clearState );
+ $this->setUser( $user );
$pairs = array(
"\r\n" => "\n",
);
$text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
- $text = $this->pstPass2( $text, $user );
+ if( $options->getPreSaveTransform() ) {
+ $text = $this->pstPass2( $text, $user );
+ }
$text = $this->mStripState->unstripBoth( $text );
+
+ $this->setUser( null ); #Reset
+
return $text;
}
# whatever crap the system uses, localised or not, so we cannot
# ship premade translations.
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
- $value = wfMsgForContent( $key );
- if ( !wfEmptyMsg( $key, $value ) ) {
- $tzMsg = $value;
+ $msg = wfMessage( $key )->inContentLanguage();
+ if ( $msg->exists() ) {
+ $tzMsg = $msg->text();
}
date_default_timezone_set( $oldtz );
# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
$text = $this->replaceVariables( $text );
+ # This works almost by chance, as the replaceVariables are done before the getUserSig(),
+ # which may corrupt this parser instance via its wfMsgExt( parsemag ) call-
+
# Signatures
$sigText = $this->getUserSig( $user );
$text = strtr( $text, array(
* validated, ready-to-insert wikitext.
* If you have pre-fetched the nickname or the fancySig option, you can
* specify them here to save a database query.
+ * Do not reuse this parser instance after calling getUserSig(),
+ * as it may have changed if it's the $wgParser.
*
- * @param User $user
+ * @param $user User
+ * @param $nickname String: nickname to use or false to use user's default nickname
+ * @param $fancySig Boolean: whether the nicknname is the complete signature
+ * or null to use default value
* @return string
*/
function getUserSig( &$user, $nickname = false, $fancySig = null ) {
/**
* Check that the user's signature contains no bad XML
*
- * @param string $text
+ * @param $text String
* @return mixed An expanded string, or false if invalid.
*/
function validateSig( $text ) {
* 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures @see cleanSigInSig
* 2) Substitute all transclusions
*
- * @param string $text
+ * @param $text String
* @param $parsing Whether we're cleaning (preferences save) or parsing
- * @return string Signature text
+ * @return String: signature text
*/
function cleanSig( $text, $parsing = false ) {
if ( !$parsing ) {
global $wgTitle;
+ $this->mOptions = new ParserOptions;
$this->clearState();
$this->setTitle( $wgTitle );
- $this->mOptions = new ParserOptions;
$this->setOutputType = self::OT_PREPROCESS;
}
/**
* Strip ~~~, ~~~~ and ~~~~~ out of signatures
- * @param string $text
- * @return string Signature text with /~{3,5}/ removed
+ *
+ * @param $text String
+ * @return String: signature text with /~{3,5}/ removed
*/
function cleanSigInSig( $text ) {
$text = preg_replace( '/~{3,5}/', '', $text );
/**
* Set up some variables which are usually set up in parse()
* so that an external function can call some class members with confidence
- * @public
*/
- function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
+ public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
+ $this->startParse( $title, $options, $outputType, $clearState );
+ }
+
+ private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
$this->setTitle( $title );
$this->mOptions = $options;
$this->setOutputType( $outputType );
/**
* Wrapper for preprocess()
*
- * @param string $text the text to preprocess
- * @param ParserOptions $options options
- * @return string
- * @public
+ * @param $text String: the text to preprocess
+ * @param $options ParserOptions: options
+ * @param $title Title object or null to use $wgTitle
+ * @return String
*/
- function transformMsg( $text, $options ) {
- global $wgTitle;
+ public function transformMsg( $text, $options, $title = null ) {
static $executing = false;
# Guard against infinite recursion
$executing = true;
wfProfileIn( __METHOD__ );
- $text = $this->preprocess( $text, $wgTitle, $options );
+ if ( !$title ) {
+ global $wgTitle;
+ $title = $wgTitle;
+ }
+ if ( !$title ) {
+ # It's not uncommon having a null $wgTitle in scripts. See r80898
+ # Create a ghost title in such case
+ $title = Title::newFromText( 'Dwimmerlaik' );
+ }
+ $text = $this->preprocess( $text, $title, $options );
$executing = false;
wfProfileOut( __METHOD__ );
* Transform and return $text. Use $parser for any required context, e.g. use
* $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
*
- * @public
- *
- * @param mixed $tag The tag to use, e.g. 'hook' for <hook>
- * @param mixed $callback The callback function (and object) to use for the tag
- *
+ * @param $tag Mixed: the tag to use, e.g. 'hook' for <hook>
+ * @param $callback Mixed: the callback function (and object) to use for the tag
* @return The old value of the mTagHooks array associated with the hook
*/
- function setHook( $tag, $callback ) {
+ public function setHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
$oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
$this->mTagHooks[$tag] = $callback;
if ( !in_array( $tag, $this->mStripList ) ) {
function setTransparentTagHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
$this->mTransparentTagHooks[$tag] = $callback;
* nowiki Wiki markup in the return value should be escaped
* isHTML The returned text is HTML, armour it against wikitext transformation
*
- * @public
- *
- * @param string $id The magic word ID
- * @param mixed $callback The callback function (and object) to use
- * @param integer $flags a combination of the following flags:
+ * @param $id String: The magic word ID
+ * @param $callback Mixed: the callback function (and object) to use
+ * @param $flags Integer: a combination of the following flags:
* SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
*
* SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text. This
*
* @return The old callback function for this name, if any
*/
- function setFunctionHook( $id, $callback, $flags = 0 ) {
+ public function setFunctionHook( $id, $callback, $flags = 0 ) {
global $wgContLang;
$oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
/**
* Get all registered function hook identifiers
*
- * @return array
+ * @return Array
*/
function getFunctionHooks() {
return array_keys( $this->mFunctionHooks );
*/
function setFunctionTagHook( $tag, $callback, $flags ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
$old = isset( $this->mFunctionTagHooks[$tag] ) ?
$this->mFunctionTagHooks[$tag] : null;
$this->mFunctionTagHooks[$tag] = array( $callback, $flags );
/**
* Replace <!--LINK--> link placeholders with plain text of links
* (not HTML-formatted).
- * @param string $text
- * @return string
+ *
+ * @param $text String
+ * @return String
*/
function replaceLinkHoldersText( $text ) {
return $this->mLinkHolders->replaceText( $text );
$ig->setParser( $this );
$ig->setHideBadImages();
$ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
- $ig->useSkin( $this->mOptions->getSkin() );
+ $ig->useSkin( $this->mOptions->getSkin( $this->mTitle ) );
$ig->mRevisionId = $this->mRevisionId;
if ( isset( $params['showfilename'] ) ) {
}
if ( strpos( $matches[0], '%' ) !== false ) {
- $matches[1] = urldecode( $matches[1] );
+ $matches[1] = rawurldecode( $matches[1] );
}
- $tp = Title::newFromText( $matches[1] );
+ $tp = Title::newFromText( $matches[1], NS_FILE );
$nt =& $tp;
if ( is_null( $nt ) ) {
# Bogus title. Ignore these so we don't bomb out later.
/**
* Parse image options text and use it to make an image
- * @param Title $title
- * @param string $options
- * @param LinkHolderArray $holders
+ *
+ * @param $title Title
+ * @param $options String
+ * @param $holders LinkHolderArray
*/
function makeImage( $title, $options, $holders = false ) {
# Check if the options text is of the form "options|alt text"
# * text-bottom
$parts = StringUtils::explode( "|", $options );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
# Give extensions a chance to select the file revision for us
$skip = $time = $descQuery = false;
}
# Get the file
- $imagename = $title->getDBkey();
$file = wfFindFile( $title, array( 'time' => $time ) );
# Get parameter map
$handler = $file ? $file->getHandler() : false;
if ( preg_match( "/^($prots)$chars+$/", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );
+ if ( $this->mOptions->getExternalLinkTarget() ) {
+ $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
+ }
$validated = true;
}
} else {
# Will the image be presented in a frame, with the caption below?
$imageIsFramed = isset( $params['frame']['frame'] ) ||
- isset( $params['frame']['framed'] ) ||
- isset( $params['frame']['thumbnail'] ) ||
- isset( $params['frame']['manualthumb'] );
+ isset( $params['frame']['framed'] ) ||
+ isset( $params['frame']['thumbnail'] ) ||
+ isset( $params['frame']['manualthumb'] );
# In the old days, [[Image:Foo|text...]] would set alt text. Later it
# came to also set the caption, ordinary text after the image -- which
wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) );
# Linker does the rest
- $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery );
+ $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery, $this->mOptions->getThumbSize() );
# Give the handler a chance to modify the parser object
if ( $handler ) {
$this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
}
- /**#@+
+ /**
* Callback from the Sanitizer for expanding items found in HTML attribute
* values, so they can be safely tested and escaped.
- * @param string $text
- * @param PPFrame $frame
- * @return string
+ *
+ * @param $text String
+ * @param $frame PPFrame
+ * @return String
* @private
*/
function attributeStripCallback( &$text, $frame = false ) {
return $text;
}
- /**#@-*/
-
- /**#@+
- * Accessor/mutator
- */
- function Title( $x = null ) { return wfSetVar( $this->mTitle, $x ); }
- function Options( $x = null ) { return wfSetVar( $this->mOptions, $x ); }
- function OutputType( $x = null ) { return wfSetVar( $this->mOutputType, $x ); }
- /**#@-*/
-
- /**#@+
+ /**
* Accessor
*/
function getTags() {
return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) );
}
- /**#@-*/
+ /**
+ * Replace transparent tags in $text with the values given by the callbacks.
+ *
+ * Transparent tag hooks are like regular XML-style tag hooks, except they
+ * operate late in the transformation sequence, on HTML instead of wikitext.
+ */
+ function replaceTransparentTags( $text ) {
+ $matches = array();
+ $elements = array_keys( $this->mTransparentTagHooks );
+ $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
+
+ foreach ( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ $tagName = strtolower( $element );
+ if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+ } else {
+ $output = $tag;
+ }
+ $this->mStripState->addGeneral( $marker, $output );
+ }
+ return $text;
+ }
/**
* Break wikitext input into sections, and either pull or replace
*
* External callers should use the getSection and replaceSection methods.
*
- * @param string $text Page wikitext
- * @param string $section A section identifier string of the form:
+ * @param $text String: Page wikitext
+ * @param $section String: a section identifier string of the form:
* <flag1> - <flag2> - ... - <section number>
*
* Currently the only recognised flag is "T", which means the target section number
* pull the given section along with its lower-level subsections. If the section is
* not found, $mode=get will return $newtext, and $mode=replace will return $text.
*
- * @param string $mode One of "get" or "replace"
- * @param string $newText Replacement text for section data.
- * @return string for "get", the extracted section text.
- * for "replace", the whole page with the section replaced.
+ * @param $mode String: one of "get" or "replace"
+ * @param $newText String: replacement text for section data.
+ * @return String: for "get", the extracted section text.
+ * for "replace", the whole page with the section replaced.
*/
private function extractSections( $text, $section, $mode, $newText='' ) {
- global $wgTitle;
- $this->clearState();
- $this->setTitle( $wgTitle ); # not generally used but removes an ugly failure mode
- $this->mOptions = new ParserOptions;
- $this->setOutputType( self::OT_PLAIN );
+ global $wgTitle; # not generally used but removes an ugly failure mode
+ $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
$outText = '';
$frame = $this->getPreprocessor()->newFrame();
# Section zero doesn't nest, level=big
$targetLevel = 1000;
} else {
- while ( $node ) {
- if ( $node->getName() === 'h' ) {
- $bits = $node->splitHeading();
+ while ( $node ) {
+ if ( $node->getName() === 'h' ) {
+ $bits = $node->splitHeading();
if ( $bits['i'] == $sectionIndex ) {
- $targetLevel = $bits['level'];
+ $targetLevel = $bits['level'];
break;
}
}
*
* If a section contains subsections, these are also returned.
*
- * @param string $text text to look in
- * @param string $section section identifier
- * @param string $deftext default to return if section is not found
+ * @param $text String: text to look in
+ * @param $section String: section identifier
+ * @param $deftext String: default to return if section is not found
* @return string text of the requested section
*/
public function getSection( $text, $section, $deftext='' ) {
return $this->extractSections( $text, $section, "get", $deftext );
}
+ /**
+ * This function returns $oldtext after the content of the section
+ * specified by $section has been replaced with $text.
+ *
+ * @param $text String: former text of the article
+ * @param $section Numeric: section identifier
+ * @param $text String: replacing text
+ * #return String: modified text
+ */
public function replaceSection( $oldtext, $section, $text ) {
return $this->extractSections( $oldtext, $section, "replace", $text );
}
+ /**
+ * Get the ID of the revision we are parsing
+ *
+ * @return Mixed: integer or null
+ */
+ function getRevisionId() {
+ return $this->mRevisionId;
+ }
+
+ /**
+ * Get the revision object for $this->mRevisionId
+ *
+ * @return either a Revision object or null
+ */
+ protected function getRevisionObject() {
+ if ( !is_null( $this->mRevisionObject ) ) {
+ return $this->mRevisionObject;
+ }
+ if ( is_null( $this->mRevisionId ) ) {
+ return null;
+ }
+
+ $this->mRevisionObject = Revision::newFromId( $this->mRevisionId );
+ return $this->mRevisionObject;
+ }
+
/**
* Get the timestamp associated with the current revision, adjusted for
* the default server-local timestamp
function getRevisionTimestamp() {
if ( is_null( $this->mRevisionTimestamp ) ) {
wfProfileIn( __METHOD__ );
- global $wgContLang;
- $dbr = wfGetDB( DB_SLAVE );
- $timestamp = $dbr->selectField( 'revision', 'rev_timestamp',
- array( 'rev_id' => $this->mRevisionId ), __METHOD__ );
-
- # Normalize timestamp to internal MW format for timezone processing.
- # This has the added side-effect of replacing a null value with
- # the current time, which gives us more sensible behavior for
- # previews.
- $timestamp = wfTimestamp( TS_MW, $timestamp );
-
- # The cryptic '' timezone parameter tells to use the site-default
- # timezone offset instead of the user settings.
- #
- # Since this value will be saved into the parser cache, served
- # to other users, and potentially even used inside links and such,
- # it needs to be consistent for all visitors.
- $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
+
+ $revObject = $this->getRevisionObject();
+ $timestamp = $revObject ? $revObject->getTimestamp() : false;
+
+ if( $timestamp !== false ) {
+ global $wgContLang;
+
+ # The cryptic '' timezone parameter tells to use the site-default
+ # timezone offset instead of the user settings.
+ #
+ # Since this value will be saved into the parser cache, served
+ # to other users, and potentially even used inside links and such,
+ # it needs to be consistent for all visitors.
+ $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
+ }
wfProfileOut( __METHOD__ );
}
/**
* Get the name of the user that edited the last revision
+ *
+ * @return String: user name
*/
function getRevisionUser() {
- # if this template is subst: the revision id will be blank,
- # so just use the current user's name
- if ( $this->mRevisionId ) {
- $revision = Revision::newFromId( $this->mRevisionId );
- $revuser = $revision->getUserText();
- } else {
- global $wgUser;
- $revuser = $wgUser->getName();
+ if( is_null( $this->mRevisionUser ) ) {
+ $revObject = $this->getRevisionObject();
+
+ # if this template is subst: the revision id will be blank,
+ # so just use the current user's name
+ if( $revObject ) {
+ $this->mRevisionUser = $revObject->getUserText();
+ } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
+ $this->mRevisionUser = $this->getUser()->getName();
+ }
}
- return $revuser;
+ return $this->mRevisionUser;
}
/**
*/
public function setDefaultSort( $sort ) {
$this->mDefaultSort = $sort;
+ $this->mOutput->setProperty( 'defaultsort', $sort );
}
/**
* Accessor for $mDefaultSort
- * Will use the title/prefixed title if none is set
+ * Will use the empty string if none is set.
+ *
+ * This value is treated as a prefix, so the
+ * empty string is equivalent to sorting by
+ * page name.
*
* @return string
*/
public function getDefaultSort() {
- global $wgCategoryPrefixedDefaultSortkey;
if ( $this->mDefaultSort !== false ) {
return $this->mDefaultSort;
- } elseif ( $this->mTitle->getNamespace() == NS_CATEGORY ||
- !$wgCategoryPrefixedDefaultSortkey )
- {
- return $this->mTitle->getText();
} else {
- return $this->mTitle->getPrefixedText();
+ return '';
}
}
public function guessSectionNameFromWikiText( $text ) {
# Strip out wikitext links(they break the anchor)
$text = $this->stripSectionName( $text );
- $headline = Sanitizer::decodeCharReferences( $text );
- # strip out HTML
- $headline = StringUtils::delimiterReplace( '<', '>', '', $headline );
- $headline = trim( $headline );
- $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );
- $replacearray = array(
- '%3A' => ':',
- '%' => '.'
- );
- return str_replace(
- array_keys( $replacearray ),
- array_values( $replacearray ),
- $sectionanchor );
+ $text = Sanitizer::normalizeSectionNameWhitespace( $text );
+ return '#' . Sanitizer::escapeId( $text, 'noninitial' );
+ }
+
+ /**
+ * Same as guessSectionNameFromWikiText(), but produces legacy anchors
+ * instead. For use in redirects, since IE6 interprets Redirect: headers
+ * as something other than UTF-8 (apparently?), resulting in breakage.
+ *
+ * @param $text String: The section name
+ * @return string An anchor
+ */
+ public function guessLegacySectionNameFromWikiText( $text ) {
+ # Strip out wikitext links(they break the anchor)
+ $text = $this->stripSectionName( $text );
+ $text = Sanitizer::normalizeSectionNameWhitespace( $text );
+ return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
}
/**
* to create valid section anchors by mimicing the output of the
* parser when headings are parsed.
*
- * @param $text string Text string to be stripped of wikitext
+ * @param $text String: text string to be stripped of wikitext
* for use in a Section anchor
* @return Filtered text string
*/
return $text;
}
- function srvus( $text ) {
- return $this->testSrvus( $text, $this->mOutputType );
- }
-
/**
* strip/replaceVariables/unstrip for preprocessor regression testing
*/
- function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) {
- $this->clearState();
+ function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) {
if ( !$title instanceof Title ) {
$title = Title::newFromText( $title );
}
- $this->mTitle = $title;
- $this->mOptions = $options;
- $this->setOutputType( $outputType );
+ $this->startParse( $title, $options, $outputType, true );
+
$text = $this->replaceVariables( $text );
$text = $this->mStripState->unstripBoth( $text );
$text = Sanitizer::removeHTMLtags( $text );
return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
}
+ /**
+ * Call a callback function on all regions of the given text that are not
+ * inside strip markers, and replace those regions with the return value
+ * of the callback. For example, with input:
+ *
+ * aaa<MARKER>bbb
+ *
+ * This will call the callback function twice, with 'aaa' and 'bbb'. Those
+ * two strings will be replaced with the value returned by the callback in
+ * each case.
+ */
function markerSkipCallback( $s, $callback ) {
$i = 0;
$out = '';
return $out;
}
- function serialiseHalfParsedText( $text ) {
- $data = array();
- $data['text'] = $text;
-
- # First, find all strip markers, and store their
- # data in an array.
- $stripState = new StripState;
- $pos = 0;
- while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) )
- && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) )
- {
- $end_pos += strlen( self::MARKER_SUFFIX );
- $marker = substr( $text, $start_pos, $end_pos-$start_pos );
-
- if ( !empty( $this->mStripState->general->data[$marker] ) ) {
- $replaceArray = $stripState->general;
- $stripText = $this->mStripState->general->data[$marker];
- } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
- $replaceArray = $stripState->nowiki;
- $stripText = $this->mStripState->nowiki->data[$marker];
- } else {
- throw new MWException( "Hanging strip marker: '$marker'." );
- }
-
- $replaceArray->setPair( $marker, $stripText );
- $pos = $end_pos;
- }
- $data['stripstate'] = $stripState;
-
- # Now, find all of our links, and store THEIR
- # data in an array! :)
- $links = array( 'internal' => array(), 'interwiki' => array() );
- $pos = 0;
-
- # Internal links
- while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
- list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
-
- $ns = trim( $ns );
- if ( empty( $links['internal'][$ns] ) ) {
- $links['internal'][$ns] = array();
- }
-
- $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
- $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
- $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
- }
-
- $pos = 0;
-
- # Interwiki links
- while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
- $data = substr( $text, $start_pos );
- $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
- $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
- $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
- }
-
- $data['linkholder'] = $links;
-
+ /**
+ * Save the parser state required to convert the given half-parsed text to
+ * HTML. "Half-parsed" in this context means the output of
+ * recursiveTagParse() or internalParse(). This output has strip markers
+ * from replaceVariables (extensionSubstitution() etc.), and link
+ * placeholders from replaceLinkHolders().
+ *
+ * Returns an array which can be serialized and stored persistently. This
+ * array can later be loaded into another parser instance with
+ * unserializeHalfParsedText(). The text can then be safely incorporated into
+ * the return value of a parser hook.
+ */
+ function serializeHalfParsedText( $text ) {
+ wfProfileIn( __METHOD__ );
+ $data = array(
+ 'text' => $text,
+ 'version' => self::HALF_PARSED_VERSION,
+ 'stripState' => $this->mStripState->getSubState( $text ),
+ 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
+ );
+ wfProfileOut( __METHOD__ );
return $data;
}
/**
- * TODO: document
- * @param $data Array
- * @param $intPrefix String unique identifying prefix
+ * Load the parser state given in the $data array, which is assumed to
+ * have been generated by serializeHalfParsedText(). The text contents is
+ * extracted from the array, and its markers are transformed into markers
+ * appropriate for the current Parser instance. This transformed text is
+ * returned, and can be safely included in the return value of a parser
+ * hook.
+ *
+ * If the $data array has been stored persistently, the caller should first
+ * check whether it is still valid, by calling isValidHalfParsedText().
+ *
+ * @param $data Serialized data
* @return String
*/
- function unserialiseHalfParsedText( $data, $intPrefix = null ) {
- if ( !$intPrefix ) {
- $intPrefix = $this->getRandomString();
+ function unserializeHalfParsedText( $data ) {
+ if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
+ throw new MWException( __METHOD__.': invalid version' );
}
# First, extract the strip state.
- $stripState = $data['stripstate'];
- $this->mStripState->general->merge( $stripState->general );
- $this->mStripState->nowiki->merge( $stripState->nowiki );
-
- # Now, extract the text, and renumber links
- $text = $data['text'];
- $links = $data['linkholder'];
+ $texts = array( $data['text'] );
+ $texts = $this->mStripState->merge( $data['stripState'], $texts );
- # Internal...
- foreach ( $links['internal'] as $ns => $nsLinks ) {
- foreach ( $nsLinks as $key => $entry ) {
- $newKey = $intPrefix . '-' . $key;
- $this->mLinkHolders->internals[$ns][$newKey] = $entry;
-
- $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
- }
- }
-
- # Interwiki...
- foreach ( $links['interwiki'] as $key => $entry ) {
- $newKey = "$intPrefix-$key";
- $this->mLinkHolders->interwikis[$newKey] = $entry;
-
- $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
- }
+ # Now renumber links
+ $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
# Should be good to go.
- return $text;
+ return $texts[0];
}
-}
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class StripState {
- var $general, $nowiki;
-
- function __construct() {
- $this->general = new ReplacementArray;
- $this->nowiki = new ReplacementArray;
- }
-
- function unstripGeneral( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripNoWiki( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripBoth( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class OnlyIncludeReplacer {
- var $output = '';
-
- function replace( $matches ) {
- if ( substr( $matches[1], -1 ) === "\n" ) {
- $this->output .= substr( $matches[1], 0, -1 );
- } else {
- $this->output .= $matches[1];
- }
+ /**
+ * Returns true if the given array, presumed to be generated by
+ * serializeHalfParsedText(), is compatible with the current version of the
+ * parser.
+ *
+ * @param $data Array.
+ */
+ function isValidHalfParsedText( $data ) {
+ return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
}
}