* (which in turn the browser understands, and can display).
*
* <pre>
- * There are five main entry points into the Parser class:
+ * There are six main entry points into the Parser class:
* parse()
* produces HTML output
* preSaveTransform().
* Cleans a signature before saving it to preferences
* extractSections()
* Extracts sections from an article for section editing
+ * getTransclusionText()
+ * Extracts the text of a template with only <includeonly>, etc., parsed
*
* Globals used:
* objects: $wgLang, $wgContLang
const OT_WIKI = 2;
const OT_PREPROCESS = 3;
const OT_MSG = 3;
+ const OT_INCLUDES = 4;
// Marker Suffix needs to be accessible staticly.
const MARKER_SUFFIX = "-QINU\x7f";
*/
# Persistent:
var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
- $mSubsts, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex,
+ $mSubstWords, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex,
$mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList,
$mVarCache, $mConf, $mFunctionTagHooks;
$this->mFunctionHooks = array();
$this->mFunctionTagHooks = array();
$this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
- $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery', 'a' );
+ $this->mDefaultStripList = $this->mStripList = array();
$this->mUrlProtocols = wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
wfProfileIn( __METHOD__ );
- $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
CoreParserFunctions::register( $this );
+ CoreTagHooks::register( $this );
$this->initialiseVariables();
wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
// won't pick it up. This is probably expected behavior.
if ( $wgContLang->getConvRuleTitle() ) {
$this->mOutput->setTitleText( $wgContLang->getConvRuleTitle() );
- } elseif ( !( $wgDisableLangConversion
- || isset( $this->mDoubleUnderscores['notitleconvert'] ) ) ) {
- $this->mOutput->setTitleText( $wgContLang->convert( $title ) );
}
$text = $this->mStripState->unstripNoWiki( $text );
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
-//!JF Move to its own function
-
- $uniq_prefix = $this->mUniqPrefix;
- $matches = array();
- $elements = array_keys( $this->mTransparentTagHooks );
- $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
- foreach( $matches as $marker => $data ) {
- list( $element, $content, $params, $tag ) = $data;
- $tagName = strtolower( $element );
- if( isset( $this->mTransparentTagHooks[$tagName] ) ) {
- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName],
- array( $content, $params, $this ) );
- } else {
- $output = $tag;
+ if ( $this->mTransparentTagHooks ) {
+ //!JF Move to its own function
+ $uniq_prefix = $this->mUniqPrefix;
+ $matches = array();
+ $elements = array_keys( $this->mTransparentTagHooks );
+ $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
+
+ foreach( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ $tagName = strtolower( $element );
+ if( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName],
+ array( $content, $params, $this ) );
+ } else {
+ $output = $tag;
+ }
+ $this->mStripState->general->setPair( $marker, $output );
}
- $this->mStripState->general->setPair( $marker, $output );
}
+
+ # This was originally inserted for transparent tag hooks (now deprecated)
+ # but some extensions (notably <poem>) rely on the extra unstripGeneral()
+ # after unstripNoWiki() so they can modify the contents of <nowiki> tags.
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
return $text;
}
+ /**
+ * Get the wikitext of a page as though it was transcluded.
+ *
+ * Specifically <includeonly> etc. are parsed, redirects are followed, comments
+ * are removed, but templates arguments and parser functions are untouched.
+ *
+ * This is not called by the parser itself, see braceSubstitution for its transclusion.
+ */
+ public function getTransclusionText( $title, $options ) {
+ // Must initialize first
+ $this->clearState();
+ $this->setOutputType( self::OT_INCLUDES );
+ $this->mOptions = $options;
+ $this->setTitle( new FakeTitle );
+
+ list( $text, $title ) = $this->getTemplateDom( $title );
+ $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
+ return $this->getPreprocessor()->newFrame()->expand( $text, $flags );
+ }
+
/**
* Get a random string
*
* Get a list of strippable XML-like elements
*/
function getStripList() {
- global $wgRawHtml;
- $elements = $this->mStripList;
- if( $wgRawHtml ) {
- $elements[] = 'html';
- }
- if( $this->mOptions->getUseTeX() ) {
- $elements[] = 'math';
- }
- return $elements;
+ return $this->mStripList;
}
/**
if ( !$tc ) {
$tc = Title::legalChars() . '#%';
# Match a link having the form [[namespace:link|alternate]]trail
- $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
+ $e1 = "/^([{$tc}]*)(\\|.*?)?]](.*)\$/sD";
# Match cases where there is no "]]", which might still be images
$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
}
wfProfileIn( __METHOD__."-e1" );
if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
- $text = $m[2];
+
+ if( $m[2] === '' ) {
+ $text = '';
+ } elseif( $m[2] === '|' ) {
+ $text = $this->getPipeTrickText( $m[1] );
+ } else {
+ $text = substr( $m[2], 1 );
+ }
+
# If we get a ] at the beginning of $m[3] that means we have a link that's something like:
# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
# the real problem is with the $e1 regex
$text .= ']'; # so that replaceExternalLinks($text) works later
$m[3] = substr( $m[3], 1 );
}
+
+ # Handle pipe-trick for [[|<blah>]]
+ $lnk = $m[1] === '' ? $this->getPipeTrickLink( $text ) : $m[1];
# fix up urlencoded title texts
- if( strpos( $m[1], '%' ) !== false ) {
+ if( strpos( $lnk, '%' ) !== false ) {
# Should anchors '#' also be rejected?
- $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode($m[1]) );
+ $lnk = str_replace( array('<', '>'), array('<', '>'), urldecode($lnk) );
}
+
$trail = $m[3];
} elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
$might_be_img = true;
$text = $m[2];
- if ( strpos( $m[1], '%' ) !== false ) {
- $m[1] = urldecode($m[1]);
- }
+ $lnk = strpos( $m[1], '%' ) === false ? $m[1] : urldecode( $m[1] );
$trail = "";
} else { # Invalid form; output directly
$s .= $prefix . '[[' . $line ;
# Don't allow internal links to pages containing
# PROTO: where PROTO is a valid URL protocol; these
# should be external links.
- if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $m[1] ) ) {
+ if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $lnk ) ) {
$s .= $prefix . '[[' . $line ;
wfProfileOut( __METHOD__."-misc" );
continue;
# Make subpage if necessary
if ( $useSubpages ) {
- $link = $this->maybeDoSubpageLink( $m[1], $text );
+ $link = $this->maybeDoSubpageLink( $lnk, $text );
} else {
- $link = $m[1];
+ $link = $lnk;
}
- $noforce = (substr( $m[1], 0, 1 ) !== ':');
+ $noforce = (substr( $lnk, 0, 1 ) !== ':');
if (!$noforce) {
# Strip off leading ':'
$link = substr( $link, 1 );
}
# cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
$s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail;
+ } else {
+ $s .= $prefix . $trail;
}
$this->mOutput->addImage( $nt->getDBkey() );
wfProfileOut( __METHOD__."-image" );
return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
}
+ /**
+ * From the [[title|]] return link-text as though the used typed [[title|link-text]]
+ * @param string $link from [[$link|]]
+ * @return string $text for [[$link|$text]]
+ */
+ function getPipeTrickText( $link ) {
+ return Linker::getPipeTrickText( $link );
+ }
+
+ /**
+ * From the [[|link-text]] return the title as though the user typed [[title|link-text]]
+ * @param string $text from [[|$text]]
+ * @param Title $title to resolve the link against
+ * @return string $link for [[$link|$text]]
+ */
+ function getPipeTrickLink( $text ) {
+ return Linker::getPipeTrickLink( $text, $this->mTitle );
+ }
+
/**#@+
* Used by doBlockLevels()
* @private
$subjPage = $this->mTitle->getSubjectPage();
$value = $subjPage->getPrefixedUrl();
break;
+ case 'pipetrick':
+ $text = $this->mTitle->getText();
+ $value = $this->getPipeTrickText( $text );
+ break;
+ case 'pipetricke':
+ $text = $this->mTitle->getText();
+ $value = wfUrlEncode( str_replace( ' ', '_', $this->getPipeTrickText( $text ) ) );
+ break;
case 'revisionid':
// Let the edit saving system know we should parse the page
// *after* a revision ID has been assigned.
$substIDs = MagicWord::getSubstIDs();
$this->mVariables = new MagicWordArray( $variableIDs );
- $this->mSubsts = new MagicWordArray( $substIDs );
+ $this->mSubstWords = new MagicWordArray( $substIDs );
wfProfileOut( __METHOD__ );
}
wfProfileIn( __METHOD__.'-modifiers' );
if ( !$found ) {
- $substMatch = $this->mSubsts->matchStartAndRemove( $part1 );
+ $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
# Possibilities for substMatch: "subst", "safesubst" or FALSE
- # Whether to include depends also on whether we are in the pre-save-transform
- #
- # safesubst || (subst && PST) || (false && !PST) => transclude (skip the if)
- # (false && PST) || (subst && !PST) => return input (handled by if)
- if ( $substMatch != 'safesubst' && ($substMatch == 'subst' xor $this->ot['wiki']) ) {
+ # Decide whether to expand template or keep wikitext as-is.
+ if ( $this->ot['wiki'] )
+ {
+ if ( $substMatch === false ) {
+ $literal = true; # literal when in PST with no prefix
+ } else {
+ $literal = false; # expand when in PST with subst: or safesubst:
+ }
+ } else {
+ if ( $substMatch == 'subst' ) {
+ $literal = true; # literal when not in PST with plain subst:
+ } else {
+ $literal = false; # expand when not in PST with safesubst: or no prefix
+ }
+ }
+ if ( $literal ) {
$text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
$isLocalObj = true;
$found = true;
$name = $frame->expand( $params['name'] );
$attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
$content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
-
$marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
$isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) &&
( $this->ot['html'] || $this->ot['pre'] );
+ if ( $isFunctionTag ) {
+ $markerType = 'none';
+ } else {
+ $markerType = 'general';
+ }
if ( $this->ot['html'] || $isFunctionTag ) {
$name = strtolower( $name );
$attributes = Sanitizer::decodeTagAttributes( $attrText );
if ( isset( $params['attributes'] ) ) {
$attributes = $attributes + $params['attributes'];
}
- switch ( $name ) {
- case 'html':
- if( $wgRawHtml ) {
- $output = $content;
- break;
- } else {
- throw new MWException( '<html> extension tag encountered unexpectedly' );
- }
- case 'nowiki':
- $content = strtr($content, array('-{' => '-{', '}-' => '}-'));
- $output = Xml::escapeTagsOnly( $content );
- break;
- case 'gallery':
- $output = $this->renderImageGallery( $content, $attributes );
- break;
- case 'a':
- $output = $this->renderHyperlink( $content, $attributes, $frame );
- break;
- case 'math':
- if ( $this->mOptions->getUseTeX() ) {
- $output = $wgContLang->armourMath(
- MathRenderer::renderMath( $content, $attributes ) );
- break;
- }
- /* else let a tag hook handle it (bug 21222) */
- default:
- if( isset( $this->mTagHooks[$name] ) ) {
- # Workaround for PHP bug 35229 and similar
- if ( !is_callable( $this->mTagHooks[$name] ) ) {
- throw new MWException( "Tag hook for $name is not callable\n" );
- }
- $output = call_user_func_array( $this->mTagHooks[$name],
- array( $content, $attributes, $this, $frame ) );
- } elseif( isset( $this->mFunctionTagHooks[$name] ) ) {
- list( $callback, $flags ) = $this->mFunctionTagHooks[$name];
- if( !is_callable( $callback ) )
- throw new MWException( "Tag hook for $name is not callable\n" );
-
- $output = call_user_func_array( $callback,
- array( &$this, $frame, $content, $attributes ) );
- } else {
- $output = '<span class="error">Invalid tag extension name: ' .
- htmlspecialchars( $name ) . '</span>';
- }
+
+ if( isset( $this->mTagHooks[$name] ) ) {
+ # Workaround for PHP bug 35229 and similar
+ if ( !is_callable( $this->mTagHooks[$name] ) ) {
+ throw new MWException( "Tag hook for $name is not callable\n" );
+ }
+ $output = call_user_func_array( $this->mTagHooks[$name],
+ array( $content, $attributes, $this, $frame ) );
+ } elseif( isset( $this->mFunctionTagHooks[$name] ) ) {
+ list( $callback, $flags ) = $this->mFunctionTagHooks[$name];
+ if( !is_callable( $callback ) )
+ throw new MWException( "Tag hook for $name is not callable\n" );
+
+ $output = call_user_func_array( $callback,
+ array( &$this, $frame, $content, $attributes ) );
+ } else {
+ $output = '<span class="error">Invalid tag extension name: ' .
+ htmlspecialchars( $name ) . '</span>';
+ }
+
+ if ( is_array( $output ) ) {
+ // Extract flags to local scope (to override $markerType)
+ $flags = $output;
+ $output = $flags[0];
+ unset( $flags[0] );
+ extract( $flags );
}
} else {
if ( is_null( $attrText ) ) {
}
}
- if( $isFunctionTag ) {
+ if( $markerType === 'none' ) {
return $output;
- } elseif ( $name === 'html' || $name === 'nowiki' ) {
+ } elseif ( $markerType === 'nowiki' ) {
$this->mStripState->nowiki->setPair( $marker, $output );
- } else {
+ } elseif ( $markerType === 'general' ) {
$this->mStripState->general->setPair( $marker, $output );
+ } else {
+ throw new MWException( __METHOD__.': invalid marker type' );
}
return $marker;
}
* @private
*/
function formatHeadings( $text, $origText, $isMain=true ) {
- global $wgMaxTocLevel, $wgContLang, $wgExperimentalHtmlIds;
+ global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds;
$doNumberHeadings = $this->mOptions->getNumberHeadings();
$showEditLink = $this->mOptions->getEditSection();
# Save headline for section edit hint before it's escaped
$headlineHint = $safeHeadline;
- if ( $wgExperimentalHtmlIds ) {
+ if ( $wgHtml5 && $wgExperimentalHtmlIds ) {
# For reverse compatibility, provide an id that's
# HTML4-compatible, like we used to.
#
'~~~' => $sigText
) );
- # Context links: [[|name]] and [[name (context)|]]
- #
+ # Links of the form [[|<blah>]] or [[<blah>|]] perform pipe tricks
+ # Note this only allows the # in the position it works.
global $wgLegalTitleChars;
- $tc = "[$wgLegalTitleChars]";
- $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
-
- $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]]
- $p4 = "/\[\[(:?$nc+:|:|)($tc+?)(($tc+))\\|]]/"; # [[ns:page(context)|]]
- $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]]
- $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]]
-
- # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
- $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
- $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
- $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
-
- $t = $this->mTitle->getText();
- $m = array();
- if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
- $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
- } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
- $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
- } else {
- # if there's no context, don't bother duplicating the title
- $text = preg_replace( $p2, '[[\\1]]', $text );
- }
+ $pipeTrickRe = "/\[\[(?:(\\|)([$wgLegalTitleChars]+)|([#$wgLegalTitleChars]+)\\|)\]\]/";
+ $text = preg_replace_callback( $pipeTrickRe, array( $this, 'pstPipeTrickCallback' ), $text );
# Trim trailing whitespace
$text = rtrim( $text );
return $text;
}
+ /**
+ * Called from pstPass2 to perform the pipe trick on links.
+ * Original was either [[|text]] or [[link|]]
+ *
+ * @param Array ("|" or "", text, link) $m
+ */
+ function pstPipeTrickCallback( $m )
+ {
+ if( $m[1] ) { # [[|<blah>]]
+ $text = $m[2];
+ $link = $this->getPipeTrickLink( $text );
+ } else { # [[<blah>|]]
+ $link = $m[3];
+ $text = $this->getPipeTrickText( $link );
+ }
+
+ return $link === $text ? "[[$link]]" : "[[$link|$text]]";
+ }
+
/**
* Fetch the user's signature text, if any, and normalize to
* validated, ready-to-insert wikitext.
return $oldVal;
}
+ /* An old work-around for bug 2257 - deprecated 2010-02-13 */
function setTransparentTagHook( $tag, $callback ) {
+ wfDeprecated( __METHOD__ );
$tag = strtolower( $tag );
$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
$this->mTransparentTagHooks[$tag] = $callback;
return $this->mLinkHolders->replaceText( $text );
}
- /**
- * Tag hook handler for 'pre'.
- */
- function renderPreTag( $text, $attribs ) {
- // Backwards-compatibility hack
- $content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' );
-
- $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
- return Xml::openElement( 'pre', $attribs ) .
- Xml::escapeTagsOnly( $content ) .
- '</pre>';
- }
-
- /**
- * Tag hook handler for 'a'. Renders a HTML <a> tag, allowing most attributes, filtering href against
- * allowed protocols and spam blacklist.
- **/
- function renderHyperlink( $text, $params, $frame = false ) {
- foreach ( $params as $name => $value ) {
- $params[ $name ] = $this->replaceVariables( $value, $frame );
- }
-
- $whitelist = Sanitizer::attributeWhitelist( 'a' );
- $params = Sanitizer::validateAttributes( $params, $whitelist );
-
- $content = $this->recursiveTagParse( trim( $text ), $frame );
-
- if ( isset( $params[ 'href' ] ) ) {
- $href = $params[ 'href' ];
- $this->mOutput->addExternalLink( $href );
- unset( $params[ 'href' ] );
- } else {
- # Non-link <a> tag
- return Xml::openElement( 'a', $params ) . $content . Xml::closeElement( 'a' );
- }
-
- $sk = $this->mOptions->getSkin();
- $html = $sk->makeExternalLink( $href, $content, false, '', $params );
-
- return $html;
- }
-
/**
* Renders an image gallery from a text with one line per image.
* text labels may be given by using |-style alternative text. E.g.