* produces altered wiki markup.
* preprocess()
* removes HTML comments and expands templates
- * cleanSig()
+ * cleanSig() / cleanSigInSig()
* Cleans a signature before saving it to preferences
- * extractSections()
- * Extracts sections from an article for section editing
+ * getSection()
+ * Return the content of a section from an article for section editing
+ * replaceSection()
+ * Replaces a section by number inside an article
* getPreloadText()
* Removes <noinclude> sections, and <includeonly> tags.
*
*/
const VERSION = '1.6.4';
+ /**
+ * Update this version number when the output of serialiseHalfParsedText()
+ * changes in an incompatible way
+ */
+ const HALF_PARSED_VERSION = 2;
+
# Flags for Parser::setFunctionHook
# Also available as global constants from Defines.php
const SFH_NO_HASH = 1;
const MARKER_SUFFIX = "-QINU\x7f";
# Persistent:
- var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables;
- var $mSubstWords, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex;
- var $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList;
- var $mVarCache, $mConf, $mFunctionTagHooks;
-
+ var $mTagHooks = array();
+ var $mTransparentTagHooks = array();
+ var $mFunctionHooks = array();
+ var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
+ var $mFunctionTagHooks = array();
+ var $mStripList = array();
+ var $mDefaultStripList = array();
+ var $mVarCache = array();
+ var $mImageParams = array();
+ var $mImageParamsMagicArray = array();
+ var $mMarkerIndex = 0;
+ var $mFirstCall = true;
+ var $mVariables, $mSubstWords; # Initialised by initialiseVariables()
+ var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
# Cleared with clearState():
- var $mOutput, $mAutonumber, $mDTopen, $mStripState;
+ var $mOutput, $mAutonumber, $mDTopen;
+
+ /**
+ * @var StripState
+ */
+ var $mStripState;
+
var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
var $mLinkHolders, $mLinkID;
var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
var $mExpensiveFunctionCount; # number of expensive parser function calls
+ /**
+ * @var User
+ */
+ var $mUser; # User object; only used when doing pre-save transform
+
# Temporary
# These are variables reset at least once per parse regardless of $clearState
- var $mOptions; # ParserOptions object
+
+ /**
+ * @var ParserOptions
+ */
+ var $mOptions;
+
+ /**
+ * @var Title
+ */
var $mTitle; # Title context, used for self-link rendering and similar things
var $mOutputType; # Output type, one of the OT_xxx constants
var $ot; # Shortcut alias, see setOutputType()
+ var $mRevisionObject; # The revision object of the specified revision ID
var $mRevisionId; # ID to display in {{REVISIONID}} tags
var $mRevisionTimestamp; # The timestamp of the specified revision ID
+ var $mRevisionUser; # Userto display in {{REVISIONUSER}} tag
var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
/**
*/
function __construct( $conf = array() ) {
$this->mConf = $conf;
- $this->mTagHooks = array();
- $this->mTransparentTagHooks = array();
- $this->mFunctionHooks = array();
- $this->mFunctionTagHooks = array();
- $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
- $this->mDefaultStripList = $this->mStripList = array();
$this->mUrlProtocols = wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S';
- $this->mVarCache = array();
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
} elseif ( extension_loaded( 'domxml' ) ) {
} else {
$this->mPreprocessorClass = 'Preprocessor_Hash';
}
- $this->mMarkerIndex = 0;
- $this->mFirstCall = true;
}
/**
$this->firstCallInit();
}
$this->mOutput = new ParserOutput;
+ $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
$this->mAutonumber = 0;
$this->mLastSection = '';
$this->mDTopen = false;
$this->mIncludeCount = array();
- $this->mStripState = new StripState;
$this->mArgStack = false;
$this->mInPre = false;
$this->mLinkHolders = new LinkHolderArray( $this );
$this->mLinkID = 0;
- $this->mRevisionTimestamp = $this->mRevisionId = null;
+ $this->mRevisionObject = $this->mRevisionTimestamp =
+ $this->mRevisionId = $this->mRevisionUser = null;
$this->mVarCache = array();
+ $this->mUser = null;
/**
* Prefix for temporary replacement strings for the multipass parser.
# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Changed to \x7f to allow XML double-parsing -- TS
$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+ $this->mStripState = new StripState( $this->mUniqPrefix );
# Clear these on every parse, bug 4549
wfProfileIn( __METHOD__ );
wfProfileIn( $fname );
- if ( $clearState ) {
- $this->clearState();
- }
-
- $this->mOptions = $options;
- $this->setTitle( $title ); # Page title has to be set for the pre-processor
+ $this->startParse( $title, $options, self::OT_HTML, $clearState );
$oldRevisionId = $this->mRevisionId;
+ $oldRevisionObject = $this->mRevisionObject;
$oldRevisionTimestamp = $this->mRevisionTimestamp;
+ $oldRevisionUser = $this->mRevisionUser;
if ( $revid !== null ) {
$this->mRevisionId = $revid;
+ $this->mRevisionObject = null;
$this->mRevisionTimestamp = null;
+ $this->mRevisionUser = null;
}
- $this->setOutputType( self::OT_HTML );
+
wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
# No more strip!
wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
}
/**
- * A page get its title converted except:
- * a) Language conversion is globally disabled
- * b) Title convert is globally disabled
- * c) The page is a redirect page
- * d) User request with a "linkconvert" set to "no"
- * e) A "nocontentconvert" magic word has been set
- * f) A "notitleconvert" magic word has been set
- * g) User sets "noconvertlink" in his/her preference
- *
- * Note that if a user tries to set a title in a conversion
- * rule but content conversion was not done, then the parser
- * won't pick it up. This is probably expected behavior.
+ * A converted title will be provided in the output object if title and
+ * content conversion are enabled, the article text does not contain
+ * a conversion-suppressing double-underscore tag, and no
+ * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
+ * automatic link conversion.
*/
if ( !( $wgDisableLangConversion
|| $wgDisableTitleConversion
|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
|| isset( $this->mDoubleUnderscores['notitleconvert'] )
- || $this->mOutput->getDisplayTitle() !== false ) )
+ || $this->mOutput->getDisplayTitle() !== false ) )
{
$convruletitle = $wgContLang->getConvRuleTitle();
if ( $convruletitle ) {
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
-//!JF Move to its own function
-
- $uniq_prefix = $this->mUniqPrefix;
- $matches = array();
- $elements = array_keys( $this->mTransparentTagHooks );
- $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
- foreach ( $matches as $marker => $data ) {
- list( $element, $content, $params, $tag ) = $data;
- $tagName = strtolower( $element );
- if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
- } else {
- $output = $tag;
- }
- $this->mStripState->general->setPair( $marker, $output );
- }
+ $text = $this->replaceTransparentTags( $text );
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
- if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) {
+ if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
$text = MWTidy::tidy( $text );
} else {
# attempt to sanitize at least some nesting problems
$PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
$limitReport =
"NewPP limit report\n" .
- "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
+ "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
$PFreport;
$this->mOutput->setText( $text );
$this->mRevisionId = $oldRevisionId;
+ $this->mRevisionObject = $oldRevisionObject;
$this->mRevisionTimestamp = $oldRevisionTimestamp;
+ $this->mRevisionUser = $oldRevisionUser;
wfProfileOut( $fname );
wfProfileOut( __METHOD__ );
* Expand templates and variables in the text, producing valid, static wikitext.
* Also removes comments.
*/
- function preprocess( $text, $title, $options, $revid = null ) {
+ function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
wfProfileIn( __METHOD__ );
- $this->clearState();
- $this->setOutputType( self::OT_PREPROCESS );
- $this->mOptions = $options;
- $this->setTitle( $title );
+ $this->startParse( $title, $options, self::OT_PREPROCESS, true );
if ( $revid !== null ) {
$this->mRevisionId = $revid;
}
* <noinclude>, <includeonly> etc. are parsed as for template transclusion,
* comments, templates, arguments, tags hooks and parser functions are untouched.
*/
- public function getPreloadText( $text, $title, $options ) {
+ public function getPreloadText( $text, Title $title, ParserOptions $options ) {
# Parser (re)initialisation
- $this->clearState();
- $this->setOutputType( self::OT_PLAIN );
- $this->mOptions = $options;
- $this->setTitle( $title );
+ $this->startParse( $title, $options, self::OT_PLAIN, true );
$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
- return $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+ $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
+ $text = $this->mStripState->unstripBoth( $text );
+ return $text;
}
/**
* Get a random string
*
- * @private
* @static
*/
- static function getRandomString() {
+ static public function getRandomString() {
return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
}
+ /**
+ * Set the current user.
+ * Should only be used when doing pre-save transform.
+ *
+ * @param $user Mixed: User object or null (to reset)
+ */
+ function setUser( $user ) {
+ $this->mUser = $user;
+ }
+
/**
* Accessor for mUniqPrefix.
*
* Set the context title
*/
function setTitle( $t ) {
- if ( !$t || $t instanceof FakeTitle ) {
- $t = Title::newFromText( 'NO TITLE' );
- }
+ if ( !$t || $t instanceof FakeTitle ) {
+ $t = Title::newFromText( 'NO TITLE' );
+ }
if ( strval( $t->getFragment() ) !== '' ) {
# Strip the fragment to avoid various odd effects
*
* @return Title object
*/
- function &getTitle() {
+ function getTitle() {
return $this->mTitle;
}
return $this->mLinkID++;
}
+ function setLinkID( $id ) {
+ $this->mLinkID = $id;
+ }
+
+ /**
+ * @return Language
+ */
function getFunctionLang() {
global $wgLang, $wgContLang;
}
}
+ /**
+ * Get a User object either from $this->mUser, if set, or from the
+ * ParserOptions object otherwise
+ *
+ * @return User object
+ */
+ function getUser() {
+ if ( !is_null( $this->mUser ) ) {
+ return $this->mUser;
+ }
+ return $this->mOptions->getUser();
+ }
+
/**
* Get a preprocessor object
*
*
* @param $elements list of element names. Comments are always extracted.
* @param $text Source text string.
- * @param $matches Out parameter, Array: extarcted tags
+ * @param $matches Out parameter, Array: extracted tags
* @param $uniq_prefix
* @return String: stripped text
*
function insertStripItem( $text ) {
$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
$this->mMarkerIndex++;
- $this->mStripState->general->setPair( $rnd, $text );
+ $this->mStripState->addGeneral( $rnd, $text );
return $rnd;
}
*/
function doTableStuff( $text ) {
wfProfileIn( __METHOD__ );
-
+
$lines = StringUtils::explode( "\n", $text );
- $text = null;
$out = '';
$td_history = array(); # Is currently a td tag open?
$last_tag_history = array(); # Save history of last lag activated (td, th or caption)
$has_opened_tr = array(); # Did this table open a <tr> element?
$indent_level = 0; # indent level of the table
- $table_tag = 'table';
- $tr_tag = 'tr';
- $th_tag = 'th';
- $td_tag = 'td';
- $caption_tag = 'caption';
-
- $extra_table_attribs = null;
- $extra_tr_attribs = null;
- $extra_td_attribs = null;
-
- $convert_style = false;
-
foreach ( $lines as $outLine ) {
$line = trim( $outLine );
- if ( $line === '' ) { # empty line, go to next line
+ if ( $line === '' ) { # empty line, go to next line
$out .= $outLine."\n";
continue;
}
$indent_level = strlen( $matches[1] );
$attributes = $this->mStripState->unstripBoth( $matches[2] );
+ $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' );
- $attr = Sanitizer::decodeTagAttributes( $attributes );
-
- $mode = @$attr['mode'];
- if ( !$mode ) $mode = 'data';
-
- if ( $mode == 'grid' || $mode == 'layout' ) {
- $table_tag = 'div';
- $tr_tag = 'div';
- $th_tag = 'div';
- $td_tag = 'div';
- $caption_tag = 'div';
-
- $extra_table_attribs = array( 'class' => 'grid-table' );
- $extra_tr_attribs = array( 'class' => 'grid-row' );
- $extra_td_attribs = array( 'class' => 'grid-cell' );
-
- $convert_style = true;
- }
-
- if ($convert_style) $attr['style'] = Sanitizer::styleFromAttributes( $attr );
- $attr = Sanitizer::validateTagAttributes( $attr, $table_tag );
- $attributes = Sanitizer::collapseTagAttributes( $attr, $extra_table_attribs );
-
- $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<$table_tag{$attributes}>";
+ $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
array_push( $td_history , false );
array_push( $last_tag_history , '' );
array_push( $tr_history , false );
continue;
} elseif ( substr( $line , 0 , 2 ) === '|}' ) {
# We are ending a table
- $line = "</$table_tag>" . substr( $line , 2 );
+ $line = '</table>' . substr( $line , 2 );
$last_tag = array_pop( $last_tag_history );
if ( !array_pop( $has_opened_tr ) ) {
- $line = "<$tr_tag><$td_tag></$td_tag></$tr_tag>{$line}";
+ $line = "<tr><td></td></tr>{$line}";
}
if ( array_pop( $tr_history ) ) {
- $line = "</$tr_tag>{$line}";
+ $line = "</tr>{$line}";
}
if ( array_pop( $td_history ) ) {
# Whats after the tag is now only attributes
$attributes = $this->mStripState->unstripBoth( $line );
-
- $attr = Sanitizer::decodeTagAttributes( $attributes );
- if ($convert_style) $attr['style'] = Sanitizer::styleFromAttributes( $attr );
- $attr = Sanitizer::validateTagAttributes( $attr, $tr_tag );
- $attributes = Sanitizer::collapseTagAttributes( $attr, $extra_tr_attribs );
-
+ $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
array_pop( $tr_attributes );
array_push( $tr_attributes, $attributes );
array_push( $has_opened_tr , true );
if ( array_pop( $tr_history ) ) {
- $line = "</$tr_tag>";
+ $line = '</tr>';
}
if ( array_pop( $td_history ) ) {
if ( $first_character !== '+' ) {
$tr_after = array_pop( $tr_attributes );
if ( !array_pop( $tr_history ) ) {
- $previous = "<$tr_tag{$tr_after}>\n";
+ $previous = "<tr{$tr_after}>\n";
}
array_push( $tr_history , true );
array_push( $tr_attributes , '' );
}
if ( $first_character === '|' ) {
- $last_tag = $td_tag;
+ $last_tag = 'td';
} elseif ( $first_character === '!' ) {
- $last_tag = $th_tag;
+ $last_tag = 'th';
} elseif ( $first_character === '+' ) {
- $last_tag = $caption_tag;
+ $last_tag = 'caption';
} else {
$last_tag = '';
}
# A cell could contain both parameters and data
$cell_data = explode( '|' , $cell , 2 );
- $attributes = '';
-
# Bug 553: Note that a '|' inside an invalid link should not
# be mistaken as delimiting cell parameters
if ( strpos( $cell_data[0], '[[' ) !== false ) {
- if ($extra_td_attribs) $attributes = Sanitizer::collapseTagAttributes( $extra_td_attribs );
- $cell = "{$previous}<{$last_tag}{$attributes}>{$cell}";
+ $cell = "{$previous}<{$last_tag}>{$cell}";
} elseif ( count( $cell_data ) == 1 ) {
- if ($extra_td_attribs) $attributes = Sanitizer::collapseTagAttributes( $extra_td_attribs );
- $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[0]}";
+ $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
} else {
$attributes = $this->mStripState->unstripBoth( $cell_data[0] );
-
- $attr = Sanitizer::decodeTagAttributes( $attributes );
- if ($convert_style) $attr['style'] = Sanitizer::styleFromAttributes( $attr );
- $attr = Sanitizer::validateTagAttributes( $attr, $last_tag );
- $attributes = Sanitizer::collapseTagAttributes( $attr, $extra_td_attribs );
-
+ $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
$cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
}
# Closing open td, tr && table
while ( count( $td_history ) > 0 ) {
if ( array_pop( $td_history ) ) {
- $out .= "</$td_tag>\n";
+ $out .= "</td>\n";
}
if ( array_pop( $tr_history ) ) {
- $out .= "</$tr_tag>\n";
+ $out .= "</tr>\n";
}
if ( !array_pop( $has_opened_tr ) ) {
- $out .= "<$tr_tag><$td_tag></$td_tag></$tr_tag>\n" ;
+ $out .= "<tr><td></td></tr>\n" ;
}
- $out .= "</$table_tag>\n";
+ $out .= "</table>\n";
}
# Remove trailing line-ending (b/c)
}
# special case: don't return empty table
- if ( $out === "<$table_tag>\n<$tr_tag><$td_tag></$td_tag></$tr_tag>\n</$table_tag>" ) {
+ if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
$out = '';
}
(\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
(?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
ISBN\s+(\b # m[5]: ISBN, capture number
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
- [0-9Xx] # check digit
- \b)
+ (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
+ (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
+ [0-9Xx] # check digit
+ \b)
)!x', array( &$this, 'magicLinkCallback' ), $text );
wfProfileOut( __METHOD__ );
return $text;
return $this->makeFreeExternalLink( $m[0] );
} elseif ( isset( $m[4] ) && $m[4] !== '' ) {
# RFC or PMID
- $CssClass = '';
if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
$keyword = 'RFC';
$urlmsg = 'rfcurl';
throw new MWException( __METHOD__.': unrecognised match type "' .
substr( $m[0], 0, 20 ) . '"' );
}
- $url = wfMsg( $urlmsg, $id);
- $sk = $this->mOptions->getSkin();
- $la = $sk->getExternalLinkAttributes( "external $CssClass" );
- return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+ $url = wfMsgForContent( $urlmsg, $id );
+ $sk = $this->mOptions->getSkin( $this->mTitle );
+ return $sk->makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# ISBN
$isbn = $m[5];
global $wgContLang;
wfProfileIn( __METHOD__ );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$trail = '';
# The characters '<' and '>' (which were escaped by
# First, do some preliminary work. This may shift some apostrophes from
# being mark-up to being text. It also counts the number of occurrences
# of bold and italics mark-ups.
- $i = 0;
$numbold = 0;
$numitalics = 0;
- foreach ( $arr as $r ) {
+ for ( $i = 0; $i < count( $arr ); $i++ ) {
if ( ( $i % 2 ) == 1 ) {
# If there are ever four apostrophes, assume the first is supposed to
# be text, and the remaining three constitute mark-up for bold text.
$numbold++;
}
}
- $i++;
}
# If there is an odd number of both bold and italics, it is likely
/**
* Replace external links (REL)
*
- * Note: this is all very hackish and the order of execution matters a lot.
+ * Note: this is all very hackish and the order of execution matters a lot.
* Make sure to run maintenance/parserTests.php if you change this code.
*
* @private
global $wgContLang;
wfProfileIn( __METHOD__ );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
$s = array_shift( $bits );
* @private
*/
function maybeMakeExternalImage( $url ) {
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
$imagesexception = !empty( $imagesfrom );
$text = false;
$imagematch = false;
}
if ( $this->mOptions->getAllowExternalImages()
- || ( $imagesexception && $imagematch ) ) {
+ || ( $imagesexception && $imagematch ) ) {
if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
# Image found
$text = $sk->makeExternalImage( $url );
/**
* Process [[ ]] wikilinks
- * @return processed text
+ * @return String: processed text
*
* @private
*/
$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
}
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
$holders = new LinkHolderArray( $this );
- # split the entire text string on occurences of [[
+ # split the entire text string on occurences of [[
$a = StringUtils::explode( '[[', ' ' . $s );
# get the first element (all text up to first [[), and remove the space we added
$s = $a->current();
}
if ( $wgContLang->hasVariants() ) {
- $selflink = $wgContLang->convertLinkToAllVariants( $this->mTitle->getPrefixedText() );
+ $selflink = $wgContLang->autoConvertToAllVariants( $this->mTitle->getPrefixedText() );
} else {
$selflink = array( $this->mTitle->getPrefixedText() );
}
# fix up urlencoded title texts
if ( strpos( $m[1], '%' ) !== false ) {
# Should anchors '#' also be rejected?
- $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode( $m[1] ) );
+ $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) );
}
$trail = $m[3];
} elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption
$might_be_img = true;
$text = $m[2];
if ( strpos( $m[1], '%' ) !== false ) {
- $m[1] = urldecode( $m[1] );
+ $m[1] = rawurldecode( $m[1] );
}
$trail = "";
} else { # Invalid form; output directly
$text = $link;
} else {
# Bug 4598 madness. Handle the quotes only if they come from the alternate part
- # [[Lista d''e paise d''o munno]] -> <a href="">Lista d''e paise d''o munno</a>
- # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
- $text = $this->doQuotes($text);
+ # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
+ # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
+ # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
+ $text = $this->doQuotes( $text );
}
# Link not escaped by : , create the various objects
* Strip the whitespace Category links produce, see bug 87
* @todo We might want to use trim($tmp, "\n") here.
*/
- $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;
+ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
wfProfileOut( __METHOD__."-category" );
continue;
*/
function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
list( $inside, $trail ) = Linker::splitTrail( $trail );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
# FIXME: use link() instead of deprecated makeKnownLinkObj()
$link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix );
return $this->armorLinks( $link ) . $trail;
if ( $preOpenMatch and !$preCloseMatch ) {
$this->mInPre = true;
}
- if ( $closematch ) {
- $inBlockElem = false;
- } else {
- $inBlockElem = true;
- }
+ $inBlockElem = !$closematch;
} elseif ( !$inBlockElem && !$this->mInPre ) {
if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
# pre
}
if ( $stack > 0 ) {
wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" );
+ wfProfileOut( __METHOD__ );
return false;
}
wfProfileOut( __METHOD__ );
* @private
*/
function getVariableValue( $index, $frame=false ) {
- global $wgContLang, $wgSitename, $wgServer, $wgServerName;
- global $wgScriptPath, $wgStylePath;
+ global $wgContLang, $wgSitename, $wgServer;
+ global $wgArticlePath, $wgScriptPath, $wgStylePath;
/**
* Some of these require message or data lookups and can be
$value = wfEscapeWikiText( $this->mTitle->getText() );
break;
case 'pagenamee':
- $value = $this->mTitle->getPartialURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
break;
case 'fullpagename':
$value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
break;
case 'fullpagenamee':
- $value = $this->mTitle->getPrefixedURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
break;
case 'subpagename':
$value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
break;
case 'subpagenamee':
- $value = $this->mTitle->getSubpageUrlForm();
+ $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
break;
case 'basepagename':
$value = wfEscapeWikiText( $this->mTitle->getBaseText() );
break;
case 'basepagenamee':
- $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+ $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
break;
case 'talkpagename':
if ( $this->mTitle->canTalk() ) {
case 'talkpagenamee':
if ( $this->mTitle->canTalk() ) {
$talkPage = $this->mTitle->getTalkPage();
- $value = $talkPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
} else {
$value = '';
}
break;
case 'subjectpagenamee':
$subjPage = $this->mTitle->getSubjectPage();
- $value = $subjPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
break;
case 'revisionid':
# Let the edit saving system know we should parse the page
case 'currentversion':
$value = SpecialVersion::getVersion();
break;
+ case 'articlepath':
+ return $wgArticlePath;
case 'sitename':
return $wgSitename;
case 'server':
return $wgServer;
case 'servername':
- return $wgServerName;
+ wfSuppressWarnings(); # May give an E_WARNING in PHP < 5.3.3
+ $serverName = parse_url( $wgServer, PHP_URL_HOST );
+ wfRestoreWarnings();
+ return $serverName ? $serverName : $wgServer;
case 'scriptpath':
return $wgScriptPath;
case 'stylepath':
case 'directionmark':
return $wgContLang->getDirMark();
case 'contentlanguage':
- global $wgContLanguageCode;
- return $wgContLanguageCode;
+ global $wgLanguageCode;
+ return $wgLanguageCode;
default:
$ret = null;
if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) ) ) {
$originalTitle = $part1;
# $args is a list of argument nodes, starting from index 0, not including $part1
+ # *** FIXME if piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object
$args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
wfProfileOut( __METHOD__.'-setup' );
if ( !$title->equals( $cacheTitle ) ) {
$this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
- array( $title->getNamespace(),$cdb = $title->getDBkey() );
+ array( $title->getNamespace(), $cdb = $title->getDBkey() );
}
return array( $dom, $title );
$text = $rev->getText();
} elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
global $wgContLang;
- $message = $wgContLang->lcfirst( $title->getText() );
- $text = wfMsgForContentNoTrans( $message );
- if ( wfEmptyMsg( $message, $text ) ) {
+ $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+ if ( !$message->exists() ) {
$text = false;
break;
}
+ $text = $message->plain();
} else {
break;
}
global $wgEnableScaryTranscluding;
if ( !$wgEnableScaryTranscluding ) {
- return wfMsg('scarytranscludedisabled');
+ return wfMsgForContent('scarytranscludedisabled');
}
$url = $title->getFullUrl( "action=$action" );
if ( strlen( $url ) > 255 ) {
- return wfMsg( 'scarytranscludetoolong' );
+ return wfMsgForContent( 'scarytranscludetoolong' );
}
return $this->fetchScaryTemplateMaybeFromCache( $url );
}
$text = Http::get( $url );
if ( !$text ) {
- return wfMsg( 'scarytranscludefailed', $url );
+ return wfMsgForContent( 'scarytranscludefailed', $url );
}
$dbw = wfGetDB( DB_MASTER );
$text = $frame->getArgument( $argName );
if ( $text === false && $parts->getLength() > 0
&& (
- $this->ot['html']
- || $this->ot['pre']
- || ( $this->ot['wiki'] && $frame->isTemplate() )
+ $this->ot['html']
+ || $this->ot['pre']
+ || ( $this->ot['wiki'] && $frame->isTemplate() )
)
) {
# No match in frame, use the supplied default
if ( $markerType === 'none' ) {
return $output;
} elseif ( $markerType === 'nowiki' ) {
- $this->mStripState->nowiki->setPair( $marker, $output );
+ $this->mStripState->addNoWiki( $marker, $output );
} elseif ( $markerType === 'general' ) {
- $this->mStripState->general->setPair( $marker, $output );
+ $this->mStripState->addGeneral( $marker, $output );
} else {
throw new MWException( __METHOD__.': invalid marker type' );
}
* @return Boolean: false if this inclusion would take it over the maximum, true otherwise
*/
function incrementIncludeSize( $type, $size ) {
- if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) {
+ if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
return false;
} else {
$this->mIncludeSizes[$type] += $size;
$this->mOutput->setIndexPolicy( 'index' );
$this->addTrackingCategory( 'index-category' );
}
-
+
# Cache all double underscores in the database
foreach ( $this->mDoubleUnderscores as $key => $val ) {
$this->mOutput->setProperty( $key, '' );
function formatHeadings( $text, $origText, $isMain=true ) {
global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds;
- $doNumberHeadings = $this->mOptions->getNumberHeadings();
-
# Inhibit editsection links if requested in the page
if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
$showEditLink = 0;
} else {
$showEditLink = $this->mOptions->getEditSection();
}
+ if ( $showEditLink ) {
+ $this->mOutput->setEditSectionTokens( true );
+ }
# Get all headlines for numbering them and adding funky stuff like [edit]
# links - this is for later, but we need the number of headlines right now
}
# We need this to perform operations on the HTML
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
# headline counter
$headlineCount = 0;
$head = array();
$sublevelCount = array();
$levelCount = array();
- $toclevel = 0;
$level = 0;
$prevlevel = 0;
$toclevel = 0;
$node = $root->getFirstChild();
$byteOffset = 0;
$tocraw = array();
+ $refers = array();
foreach ( $matches[3] as $headline ) {
$isTemplate = false;
if ( $toclevel ) {
$prevlevel = $level;
- $prevtoclevel = $toclevel;
}
$level = $matches[1][$headlineCount];
'noninitial' );
}
- # HTML names must be case-insensitively unique (bug 10721). FIXME:
- # Does this apply to Unicode characters? Because we aren't
- # handling those here.
+ # HTML names must be case-insensitively unique (bug 10721).
+ # This does not apply to Unicode characters per
+ # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison
+ # FIXME: We may be changing them depending on the current locale.
$arrayKey = strtolower( $safeHeadline );
if ( $legacyHeadline === false ) {
$legacyArrayKey = false;
}
# Don't number the heading if it is the only one (looks silly)
- if ( $doNumberHeadings && count( $matches[3] ) > 1) {
+ if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
# the two are different if the line contains a link
$headline = $numbering . ' ' . $headline;
}
while ( $node && !$isTemplate ) {
if ( $node->getName() === 'h' ) {
$bits = $node->splitHeading();
- if ( $bits['i'] == $sectionIndex )
+ if ( $bits['i'] == $sectionIndex ) {
break;
+ }
}
$byteOffset += mb_strlen( $this->mStripState->unstripBoth(
$frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
# give headline the correct <h#> tag
if ( $showEditLink && $sectionIndex !== false ) {
+ // Output edit section links as markers with styles that can be customized by skins
if ( $isTemplate ) {
# Put a T flag in the section identifier, to indicate to extractSections()
# that sections inside <includeonly> should be counted.
- $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex" );
+ $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
} else {
- $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint );
+ $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
+ }
+ // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
+ // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
+ // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
+ // so we don't have to worry about a user trying to input one of these markers directly.
+ // We use a page and section attribute to stop the language converter from converting these important bits
+ // of data, but put the headline hint inside a content block because the language converter is supposed to
+ // be able to convert that piece of data.
+ $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]);
+ $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
+ if ( isset($editlinkArgs[2]) ) {
+ $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+ } else {
+ $editlink .= '/>';
}
} else {
$editlink = '';
if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
$toc .= $sk->tocUnindent( $prevtoclevel - 1 );
}
- $toc = $sk->tocList( $toc );
+ $toc = $sk->tocList( $toc, $this->mOptions->getUserLang() );
$this->mOutput->setTOCHTML( $toc );
}
* conversion, substitting signatures, {{subst:}} templates, etc.
*
* @param $text String: the text to transform
- * @param &$title Title: the Title object for the current article
+ * @param $title Title: the Title object for the current article
* @param $user User: the User object describing the current user
* @param $options ParserOptions: parsing options
* @param $clearState Boolean: whether to clear the parser state first
* @return String: the altered wiki markup
*/
- public function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) {
- $this->mOptions = $options;
- $this->setTitle( $title );
- $this->setOutputType( self::OT_WIKI );
-
- if ( $clearState ) {
- $this->clearState();
- }
+ public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) {
+ $this->startParse( $title, $options, self::OT_WIKI, $clearState );
+ $this->setUser( $user );
$pairs = array(
"\r\n" => "\n",
);
$text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
- $text = $this->pstPass2( $text, $user );
+ if( $options->getPreSaveTransform() ) {
+ $text = $this->pstPass2( $text, $user );
+ }
$text = $this->mStripState->unstripBoth( $text );
+
+ $this->setUser( null ); #Reset
+
return $text;
}
# whatever crap the system uses, localised or not, so we cannot
# ship premade translations.
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
- $value = wfMsgForContent( $key );
- if ( !wfEmptyMsg( $key, $value ) ) {
- $tzMsg = $value;
+ $msg = wfMessage( $key )->inContentLanguage();
+ if ( $msg->exists() ) {
+ $tzMsg = $msg->text();
}
date_default_timezone_set( $oldtz );
# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
$text = $this->replaceVariables( $text );
+ # This works almost by chance, as the replaceVariables are done before the getUserSig(),
+ # which may corrupt this parser instance via its wfMsgExt( parsemag ) call-
+
# Signatures
$sigText = $this->getUserSig( $user );
$text = strtr( $text, array(
* validated, ready-to-insert wikitext.
* If you have pre-fetched the nickname or the fancySig option, you can
* specify them here to save a database query.
+ * Do not reuse this parser instance after calling getUserSig(),
+ * as it may have changed if it's the $wgParser.
*
* @param $user User
* @param $nickname String: nickname to use or false to use user's default nickname
function cleanSig( $text, $parsing = false ) {
if ( !$parsing ) {
global $wgTitle;
+ $this->mOptions = new ParserOptions;
$this->clearState();
$this->setTitle( $wgTitle );
- $this->mOptions = new ParserOptions;
$this->setOutputType = self::OT_PREPROCESS;
}
* Set up some variables which are usually set up in parse()
* so that an external function can call some class members with confidence
*/
- public function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
+ public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
+ $this->startParse( $title, $options, $outputType, $clearState );
+ }
+
+ private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
$this->setTitle( $title );
$this->mOptions = $options;
$this->setOutputType( $outputType );
*
* @param $text String: the text to preprocess
* @param $options ParserOptions: options
+ * @param $title Title object or null to use $wgTitle
* @return String
*/
- public function transformMsg( $text, $options ) {
- global $wgTitle;
+ public function transformMsg( $text, $options, $title = null ) {
static $executing = false;
# Guard against infinite recursion
$executing = true;
wfProfileIn( __METHOD__ );
- $text = $this->preprocess( $text, $wgTitle, $options );
+ if ( !$title ) {
+ global $wgTitle;
+ $title = $wgTitle;
+ }
+ if ( !$title ) {
+ # It's not uncommon having a null $wgTitle in scripts. See r80898
+ # Create a ghost title in such case
+ $title = Title::newFromText( 'Dwimmerlaik' );
+ }
+ $text = $this->preprocess( $text, $title, $options );
$executing = false;
wfProfileOut( __METHOD__ );
*/
public function setHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
$oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
$this->mTagHooks[$tag] = $callback;
if ( !in_array( $tag, $this->mStripList ) ) {
function setTransparentTagHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
$this->mTransparentTagHooks[$tag] = $callback;
*/
function setFunctionTagHook( $tag, $callback, $flags ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
$old = isset( $this->mFunctionTagHooks[$tag] ) ?
$this->mFunctionTagHooks[$tag] : null;
$this->mFunctionTagHooks[$tag] = array( $callback, $flags );
$ig->setParser( $this );
$ig->setHideBadImages();
$ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
- $ig->useSkin( $this->mOptions->getSkin() );
+ $ig->useSkin( $this->mOptions->getSkin( $this->mTitle ) );
$ig->mRevisionId = $this->mRevisionId;
if ( isset( $params['showfilename'] ) ) {
}
if ( strpos( $matches[0], '%' ) !== false ) {
- $matches[1] = urldecode( $matches[1] );
+ $matches[1] = rawurldecode( $matches[1] );
}
- $tp = Title::newFromText( $matches[1] );
+ $tp = Title::newFromText( $matches[1], NS_FILE );
$nt =& $tp;
if ( is_null( $nt ) ) {
# Bogus title. Ignore these so we don't bomb out later.
# * text-bottom
$parts = StringUtils::explode( "|", $options );
- $sk = $this->mOptions->getSkin();
+ $sk = $this->mOptions->getSkin( $this->mTitle );
# Give extensions a chance to select the file revision for us
$skip = $time = $descQuery = false;
}
# Get the file
- $imagename = $title->getDBkey();
$file = wfFindFile( $title, array( 'time' => $time ) );
# Get parameter map
$handler = $file ? $file->getHandler() : false;
if ( preg_match( "/^($prots)$chars+$/", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );
+ if ( $this->mOptions->getExternalLinkTarget() ) {
+ $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
+ }
$validated = true;
}
} else {
# Will the image be presented in a frame, with the caption below?
$imageIsFramed = isset( $params['frame']['frame'] ) ||
- isset( $params['frame']['framed'] ) ||
- isset( $params['frame']['thumbnail'] ) ||
- isset( $params['frame']['manualthumb'] );
+ isset( $params['frame']['framed'] ) ||
+ isset( $params['frame']['thumbnail'] ) ||
+ isset( $params['frame']['manualthumb'] );
# In the old days, [[Image:Foo|text...]] would set alt text. Later it
# came to also set the caption, ordinary text after the image -- which
wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) );
# Linker does the rest
- $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery );
+ $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery, $this->mOptions->getThumbSize() );
# Give the handler a chance to modify the parser object
if ( $handler ) {
return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) );
}
+ /**
+ * Replace transparent tags in $text with the values given by the callbacks.
+ *
+ * Transparent tag hooks are like regular XML-style tag hooks, except they
+ * operate late in the transformation sequence, on HTML instead of wikitext.
+ */
+ function replaceTransparentTags( $text ) {
+ $matches = array();
+ $elements = array_keys( $this->mTransparentTagHooks );
+ $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
+
+ foreach ( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ $tagName = strtolower( $element );
+ if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+ } else {
+ $output = $tag;
+ }
+ $this->mStripState->addGeneral( $marker, $output );
+ }
+ return $text;
+ }
+
/**
* Break wikitext input into sections, and either pull or replace
* some particular section's text.
* for "replace", the whole page with the section replaced.
*/
private function extractSections( $text, $section, $mode, $newText='' ) {
- global $wgTitle;
- $this->clearState();
- $this->setTitle( $wgTitle ); # not generally used but removes an ugly failure mode
- $this->mOptions = new ParserOptions;
- $this->setOutputType( self::OT_PLAIN );
+ global $wgTitle; # not generally used but removes an ugly failure mode
+ $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
$outText = '';
$frame = $this->getPreprocessor()->newFrame();
return $this->extractSections( $text, $section, "get", $deftext );
}
+ /**
+ * This function returns $oldtext after the content of the section
+ * specified by $section has been replaced with $text.
+ *
+ * @param $text String: former text of the article
+ * @param $section Numeric: section identifier
+ * @param $text String: replacing text
+ * #return String: modified text
+ */
public function replaceSection( $oldtext, $section, $text ) {
return $this->extractSections( $oldtext, $section, "replace", $text );
}
return $this->mRevisionId;
}
+ /**
+ * Get the revision object for $this->mRevisionId
+ *
+ * @return either a Revision object or null
+ */
+ protected function getRevisionObject() {
+ if ( !is_null( $this->mRevisionObject ) ) {
+ return $this->mRevisionObject;
+ }
+ if ( is_null( $this->mRevisionId ) ) {
+ return null;
+ }
+
+ $this->mRevisionObject = Revision::newFromId( $this->mRevisionId );
+ return $this->mRevisionObject;
+ }
+
/**
* Get the timestamp associated with the current revision, adjusted for
* the default server-local timestamp
function getRevisionTimestamp() {
if ( is_null( $this->mRevisionTimestamp ) ) {
wfProfileIn( __METHOD__ );
- global $wgContLang;
- $dbr = wfGetDB( DB_SLAVE );
- $timestamp = $dbr->selectField( 'revision', 'rev_timestamp',
- array( 'rev_id' => $this->mRevisionId ), __METHOD__ );
-
- # Normalize timestamp to internal MW format for timezone processing.
- # This has the added side-effect of replacing a null value with
- # the current time, which gives us more sensible behavior for
- # previews.
- $timestamp = wfTimestamp( TS_MW, $timestamp );
-
- # The cryptic '' timezone parameter tells to use the site-default
- # timezone offset instead of the user settings.
- #
- # Since this value will be saved into the parser cache, served
- # to other users, and potentially even used inside links and such,
- # it needs to be consistent for all visitors.
- $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
+
+ $revObject = $this->getRevisionObject();
+ $timestamp = $revObject ? $revObject->getTimestamp() : false;
+
+ if( $timestamp !== false ) {
+ global $wgContLang;
+
+ # The cryptic '' timezone parameter tells to use the site-default
+ # timezone offset instead of the user settings.
+ #
+ # Since this value will be saved into the parser cache, served
+ # to other users, and potentially even used inside links and such,
+ # it needs to be consistent for all visitors.
+ $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
+ }
wfProfileOut( __METHOD__ );
}
* @return String: user name
*/
function getRevisionUser() {
- # if this template is subst: the revision id will be blank,
- # so just use the current user's name
- if ( $this->mRevisionId ) {
- $revision = Revision::newFromId( $this->mRevisionId );
- $revuser = $revision->getUserText();
- } else {
- global $wgUser;
- $revuser = $wgUser->getName();
+ if( is_null( $this->mRevisionUser ) ) {
+ $revObject = $this->getRevisionObject();
+
+ # if this template is subst: the revision id will be blank,
+ # so just use the current user's name
+ if( $revObject ) {
+ $this->mRevisionUser = $revObject->getUserText();
+ } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
+ $this->mRevisionUser = $this->getUser()->getName();
+ }
}
- return $revuser;
+ return $this->mRevisionUser;
}
/**
/**
* Accessor for $mDefaultSort
- * Will use the title/prefixed title if none is set
+ * Will use the empty string if none is set.
+ *
+ * This value is treated as a prefix, so the
+ * empty string is equivalent to sorting by
+ * page name.
*
* @return string
*/
if ( $this->mDefaultSort !== false ) {
return $this->mDefaultSort;
} else {
- return $this->mTitle->getCategorySortkey();
+ return '';
}
}
return '#' . Sanitizer::escapeId( $text, 'noninitial' );
}
+ /**
+ * Same as guessSectionNameFromWikiText(), but produces legacy anchors
+ * instead. For use in redirects, since IE6 interprets Redirect: headers
+ * as something other than UTF-8 (apparently?), resulting in breakage.
+ *
+ * @param $text String: The section name
+ * @return string An anchor
+ */
+ public function guessLegacySectionNameFromWikiText( $text ) {
+ # Strip out wikitext links(they break the anchor)
+ $text = $this->stripSectionName( $text );
+ $text = Sanitizer::normalizeSectionNameWhitespace( $text );
+ return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
+ }
+
/**
* Strips a text string of wikitext for use in a section anchor
*
return $text;
}
- function srvus( $text ) {
- return $this->testSrvus( $text, $this->mOutputType );
- }
-
/**
* strip/replaceVariables/unstrip for preprocessor regression testing
*/
- function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) {
- $this->clearState();
+ function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) {
if ( !$title instanceof Title ) {
$title = Title::newFromText( $title );
}
- $this->mTitle = $title;
- $this->mOptions = $options;
- $this->setOutputType( $outputType );
+ $this->startParse( $title, $options, $outputType, true );
+
$text = $this->replaceVariables( $text );
$text = $this->mStripState->unstripBoth( $text );
$text = Sanitizer::removeHTMLtags( $text );
return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
}
+ /**
+ * Call a callback function on all regions of the given text that are not
+ * inside strip markers, and replace those regions with the return value
+ * of the callback. For example, with input:
+ *
+ * aaa<MARKER>bbb
+ *
+ * This will call the callback function twice, with 'aaa' and 'bbb'. Those
+ * two strings will be replaced with the value returned by the callback in
+ * each case.
+ */
function markerSkipCallback( $s, $callback ) {
$i = 0;
$out = '';
return $out;
}
- function serialiseHalfParsedText( $text ) {
- $data = array();
- $data['text'] = $text;
-
- # First, find all strip markers, and store their
- # data in an array.
- $stripState = new StripState;
- $pos = 0;
- while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) )
- && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) )
- {
- $end_pos += strlen( self::MARKER_SUFFIX );
- $marker = substr( $text, $start_pos, $end_pos-$start_pos );
-
- if ( !empty( $this->mStripState->general->data[$marker] ) ) {
- $replaceArray = $stripState->general;
- $stripText = $this->mStripState->general->data[$marker];
- } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
- $replaceArray = $stripState->nowiki;
- $stripText = $this->mStripState->nowiki->data[$marker];
- } else {
- throw new MWException( "Hanging strip marker: '$marker'." );
- }
-
- $replaceArray->setPair( $marker, $stripText );
- $pos = $end_pos;
- }
- $data['stripstate'] = $stripState;
-
- # Now, find all of our links, and store THEIR
- # data in an array! :)
- $links = array( 'internal' => array(), 'interwiki' => array() );
- $pos = 0;
-
- # Internal links
- while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
- list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
-
- $ns = trim( $ns );
- if ( empty( $links['internal'][$ns] ) ) {
- $links['internal'][$ns] = array();
- }
-
- $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
- $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
- $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
- }
-
- $pos = 0;
-
- # Interwiki links
- while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
- $data = substr( $text, $start_pos );
- $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
- $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
- $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
- }
-
- $data['linkholder'] = $links;
-
+ /**
+ * Save the parser state required to convert the given half-parsed text to
+ * HTML. "Half-parsed" in this context means the output of
+ * recursiveTagParse() or internalParse(). This output has strip markers
+ * from replaceVariables (extensionSubstitution() etc.), and link
+ * placeholders from replaceLinkHolders().
+ *
+ * Returns an array which can be serialized and stored persistently. This
+ * array can later be loaded into another parser instance with
+ * unserializeHalfParsedText(). The text can then be safely incorporated into
+ * the return value of a parser hook.
+ */
+ function serializeHalfParsedText( $text ) {
+ wfProfileIn( __METHOD__ );
+ $data = array(
+ 'text' => $text,
+ 'version' => self::HALF_PARSED_VERSION,
+ 'stripState' => $this->mStripState->getSubState( $text ),
+ 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
+ );
+ wfProfileOut( __METHOD__ );
return $data;
}
/**
- * TODO: document
- * @param $data Array
- * @param $intPrefix String unique identifying prefix
+ * Load the parser state given in the $data array, which is assumed to
+ * have been generated by serializeHalfParsedText(). The text contents is
+ * extracted from the array, and its markers are transformed into markers
+ * appropriate for the current Parser instance. This transformed text is
+ * returned, and can be safely included in the return value of a parser
+ * hook.
+ *
+ * If the $data array has been stored persistently, the caller should first
+ * check whether it is still valid, by calling isValidHalfParsedText().
+ *
+ * @param $data Serialized data
* @return String
*/
- function unserialiseHalfParsedText( $data, $intPrefix = null ) {
- if ( !$intPrefix ) {
- $intPrefix = $this->getRandomString();
+ function unserializeHalfParsedText( $data ) {
+ if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
+ throw new MWException( __METHOD__.': invalid version' );
}
# First, extract the strip state.
- $stripState = $data['stripstate'];
- $this->mStripState->general->merge( $stripState->general );
- $this->mStripState->nowiki->merge( $stripState->nowiki );
-
- # Now, extract the text, and renumber links
- $text = $data['text'];
- $links = $data['linkholder'];
-
- # Internal...
- foreach ( $links['internal'] as $ns => $nsLinks ) {
- foreach ( $nsLinks as $key => $entry ) {
- $newKey = $intPrefix . '-' . $key;
- $this->mLinkHolders->internals[$ns][$newKey] = $entry;
-
- $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
- }
- }
+ $texts = array( $data['text'] );
+ $texts = $this->mStripState->merge( $data['stripState'], $texts );
- # Interwiki...
- foreach ( $links['interwiki'] as $key => $entry ) {
- $newKey = "$intPrefix-$key";
- $this->mLinkHolders->interwikis[$newKey] = $entry;
-
- $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
- }
+ # Now renumber links
+ $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
# Should be good to go.
- return $text;
+ return $texts[0];
}
-}
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class StripState {
- var $general, $nowiki;
-
- function __construct() {
- $this->general = new ReplacementArray;
- $this->nowiki = new ReplacementArray;
- }
-
- function unstripGeneral( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripNoWiki( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripBoth( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class OnlyIncludeReplacer {
- var $output = '';
-
- function replace( $matches ) {
- if ( substr( $matches[1], -1 ) === "\n" ) {
- $this->output .= substr( $matches[1], 0, -1 );
- } else {
- $this->output .= $matches[1];
- }
+ /**
+ * Returns true if the given array, presumed to be generated by
+ * serializeHalfParsedText(), is compatible with the current version of the
+ * parser.
+ *
+ * @param $data Array.
+ */
+ function isValidHalfParsedText( $data ) {
+ return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
}
}