* settings:
* $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
* $wgNamespacesWithSubpages, $wgAllowExternalImages*,
- * $wgLocaltimezone
+ * $wgLocaltimezone, $wgAllowSpecialInclusion*
*
* * only within ParserOptions
* </pre>
$this->mStripState = array();
$this->mArgStack = array();
$this->mInPre = false;
- $this->mInterwikiLinkHolders = array();
+ $this->mInterwikiLinkHolders = array(
+ 'texts' => array(),
+ 'titles' => array()
+ );
$this->mLinkHolders = array(
'namespaces' => array(),
'dbkeys' => array(),
$text = $this->internalParse( $text );
- $dashReplace = array(
- '/ - /' => " – ", # N dash
- '/(?<=[0-9])-(?=[0-9])/' => "–", # N dash between numbers
- '/ -- /' => " — " # M dash
- );
- $text = preg_replace( array_keys($dashReplace), array_values($dashReplace), $text );
-
$text = $this->unstrip( $text, $this->mStripState );
'/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1 \\2',
# french spaces, Guillemet-right
'/(\\302\\253) /' => '\\1 ',
- '/<hr *>/i' => '<hr />',
- '/<br *>/i' => '<br />',
'/<center *>/i' => '<div class="center">',
'/<\\/center *>/i' => '</div>',
);
$this->replaceLinkHolders( $text );
+ $dashReplace = array(
+ '/ - /' => " – ", # N dash
+ '/(?<=[\d])-(?=[\d])/' => "–", # N dash between numbers
+ '/ -- /' => " — " # M dash
+ );
+ $text = preg_replace( array_keys($dashReplace), array_values($dashReplace), $text );
+
# the position of the convert() call should not be changed. it
# assumes that the links are all replaces and the only thing left
# is the <nowiki> mark.
* @access private
* @static
*/
- function extractTags($tag, $text, &$content, $uniq_prefix = ''){
+ function extractTagsAndParams($tag, $text, &$content, &$tags, &$params, $uniq_prefix = ''){
$rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
if ( !$content ) {
$content = array( );
}
$n = 1;
$stripped = '';
+
+ if ( !$tags ) {
+ $tags = array( );
+ }
+
+ if ( !$params ) {
+ $params = array( );
+ }
+
+ // Hack to support short XML style tags
+ $text = preg_replace( "/<$tag(\\s+[^>]*|\\s*)\\/>/i", "<$tag\\1></$tag>", $text );
+
+ if( $tag == STRIP_COMMENTS ) {
+ $start = '/<!--()/';
+ $end = '/-->/';
+ } else {
+ $start = "/<$tag(\\s+[^>]*|\\s*)>/i";
+ $end = "/<\\/$tag\\s*>/i";
+ }
while ( '' != $text ) {
- if($tag==STRIP_COMMENTS) {
- $p = preg_split( '/<!--/', $text, 2 );
- } else {
- $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
- }
+ $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
$stripped .= $p[0];
- if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
- $text = '';
+ if( count( $p ) < 3 ) {
+ break;
+ }
+ $attributes = $p[1];
+ $inside = $p[2];
+
+ $marker = $rnd . sprintf('%08X', $n++);
+ $stripped .= $marker;
+
+ $tags[$marker] = "<$tag$attributes>";
+ $params[$marker] = Sanitizer::decodeTagAttributes( $attributes );
+
+ $q = preg_split( $end, $inside, 2 );
+ $content[$marker] = $q[0];
+ if( count( $q ) < 1 ) {
+ # No end tag -- let it run out to the end of the text.
+ break;
} else {
- if($tag==STRIP_COMMENTS) {
- $q = preg_split( '/-->/i', $p[1], 2 );
- } else {
- $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
- }
- $marker = $rnd . sprintf('%08X', $n++);
- $content[$marker] = $q[0];
- $stripped .= $marker;
$text = $q[1];
}
}
return $stripped;
}
+ /**
+ * Wrapper function for extractTagsAndParams
+ * for cases where $tags and $params isn't needed
+ * i.e. where tags will never have params, like <nowiki>
+ *
+ * @access private
+ * @static
+ */
+ function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) {
+ $dummy_tags = array();
+ $dummy_params = array();
+
+ return Parser::extractTagsAndParams( $tag, $text, $content,
+ $dummy_tags, $dummy_params, $uniq_prefix );
+ }
+
/**
* Strips and renders nowiki, pre, math, hiero
* If $render is set, performs necessary rendering operations on plugins
$pre_content = array();
$comment_content = array();
$ext_content = array();
+ $ext_tags = array();
+ $ext_params = array();
$gallery_content = array();
# Replace any instances of the placeholders
# Extensions
foreach ( $this->mTagHooks as $tag => $callback ) {
$ext_content[$tag] = array();
- $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
+ $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag],
+ $ext_tags[$tag], $ext_params[$tag], $uniq_prefix );
foreach( $ext_content[$tag] as $marker => $content ) {
+ $full_tag = $ext_tags[$tag][$marker];
+ $params = $ext_params[$tag][$marker];
if ( $render ) {
- $ext_content[$tag][$marker] = $callback( $content );
+ $ext_content[$tag][$marker] = $callback( $content, $params );
} else {
- $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
+ $ext_content[$tag][$marker] = "$full_tag$content</$tag>";
}
}
}
$fname = 'Parser::internalParse';
wfProfileIn( $fname );
- $text = Sanitizer::removeHTMLtags( $text );
+ $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ) );
$text = $this->replaceVariables( $text, $args );
$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
# the real problem is with the $e1 regex
# See bug 1300.
- if (preg_match( "/^\](.*)/s", $m[3], $n ) ) {
+ #
+ # Still some problems for cases where the ] is meant to be outside punctuation,
+ # and no image is in sight. See bug 2095.
+ #
+ if( $text !== '' && preg_match( "/^\](.*)/s", $m[3], $n ) ) {
$text .= ']'; # so that replaceExternalLinks($text) works later
$m[3] = $n[1];
}
if ( $ns == NS_CATEGORY ) {
wfProfileIn( "$fname-category" );
- $t = $nt->getText();
+ $t = $wgContLang->convert($nt->getText());
$s = rtrim($s . "\n"); # bug 87
$wgLinkCache->suspend(); # Don't save in links/brokenlinks
} else {
$sortkey = $text;
}
+ $sortkey = $wgContLang->convertCategoryKey( $sortkey );
$wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
$this->mOutput->addCategoryLink( $t );
$s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
continue;
}
- if ( $nt->isAlwaysKnown() ) {
+ if( $nt->isLocal() && $nt->isAlwaysKnown() ) {
+ /**
+ * Skip lookups for special pages and self-links.
+ * External interwiki links are not included here because
+ * the HTTP urls would break output in the next parse step;
+ * they will have placeholders kept.
+ */
$s .= $sk->makeKnownLinkObj( $nt, $text, '', $trail, $prefix );
} else {
/**
list( $inside, $trail ) = Linker::splitTrail( $trail );
if ( $nt->isExternal() ) {
- $iwRecord = array( $nt->getPrefixedDBkey(), $prefix.$text.$inside );
- $nr = array_push($this->mInterwikiLinkHolders, $iwRecord);
+ $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside );
+ $this->mInterwikiLinkHolders['titles'][] =& $nt;
$retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}";
} else {
$nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() );
}
# Load from database
- $itcamefromthedatabase = false;
+ $replaceHeadings = false;
+ $isHTML = false;
$lastPathLevel = $this->mTemplatePath;
if ( !$found ) {
$ns = NS_TEMPLATE;
# Check for excessive inclusion
$dbk = $title->getPrefixedDBkey();
if ( $this->incrementIncludeCount( $dbk ) ) {
- # This should never be reached.
- $article = new Article( $title );
- $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
- if ( $articleContent !== false ) {
- $found = true;
- $text = $linestart . $articleContent;
- $itcamefromthedatabase = true;
+ if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() ) {
+ # Capture special page output
+ $text = SpecialPage::capturePath( $title );
+ if ( $text && !is_object( $text ) ) {
+ $found = true;
+ $noparse = true;
+ $isHTML = true;
+ $this->mOutput->setCacheTime( -1 );
+ }
+ } else {
+ $article = new Article( $title );
+ $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
+ if ( $articleContent !== false ) {
+ $found = true;
+ $text = $articleContent;
+ $replaceHeadings = true;
+ }
}
}
# If the title is valid but undisplayable, make a link to it
if ( $this->mOutputType == OT_HTML && !$found ) {
- $text = $linestart . '[['.$title->getPrefixedText().']]';
+ $text = '[['.$title->getPrefixedText().']]';
$found = true;
}
# Template cache array insertion
if( $found ) {
$this->mTemplates[$part1] = $text;
+ $text = $linestart . $text;
}
}
}
if( $this->mOutputType == OT_HTML ) {
$text = $this->strip( $text, $this->mStripState );
- $text = Sanitizer::removeHTMLtags( $text );
+ $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs );
}
$text = $this->replaceVariables( $text, $assocArgs );
wfProfileOut( $fname );
return $matches[0];
} else {
- # replace ==section headers==
- # XXX this needs to go away once we have a better parser.
- if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
- if( !is_null( $title ) )
- $encodedname = base64_encode($title->getPrefixedDBkey());
- else
- $encodedname = base64_encode("");
- $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
- PREG_SPLIT_DELIM_CAPTURE);
- $text = '';
- $nsec = 0;
- for( $i = 0; $i < count($m); $i += 2 ) {
- $text .= $m[$i];
- if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
- $hl = $m[$i + 1];
- if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
- $text .= $hl;
- continue;
+ if ( $isHTML ) {
+ # Replace raw HTML by a placeholder
+ # Add a blank line preceding, to prevent it from mucking up
+ # immediately preceding headings
+ $text = "\n\n" . $this->insertStripItem( $text, $this->mStripState );
+ } else {
+ # replace ==section headers==
+ # XXX this needs to go away once we have a better parser.
+ if ( $this->mOutputType != OT_WIKI && $replaceHeadings ) {
+ if( !is_null( $title ) )
+ $encodedname = base64_encode($title->getPrefixedDBkey());
+ else
+ $encodedname = base64_encode("");
+ $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
+ PREG_SPLIT_DELIM_CAPTURE);
+ $text = '';
+ $nsec = 0;
+ for( $i = 0; $i < count($m); $i += 2 ) {
+ $text .= $m[$i];
+ if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
+ $hl = $m[$i + 1];
+ if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
+ $text .= $hl;
+ continue;
+ }
+ preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
+ $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
+ . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
+
+ $nsec++;
}
- preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
- $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
- . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
-
- $nsec++;
}
}
}
+
# Prune lower levels off the recursion check path
$this->mTemplatePath = $lastPathLevel;
* @access private
*/
function formatHeadings( $text, $isMain=true ) {
- global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders, $wgInterwikiLinkHolders;
+ global $wgMaxTocLevel, $wgContLang, $wgLinkHolders, $wgInterwikiLinkHolders;
$doNumberHeadings = $this->mOptions->getNumberHeadings();
$doShowToc = true;
"\$this->mLinkHolders['texts'][\$1]",
$canonized_headline );
$canonized_headline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e',
- "\$this->mInterwikiLinkHolders[\$1][1]",
+ "\$this->mInterwikiLinkHolders['texts'][\$1]",
$canonized_headline );
# strip out HTML
$canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
$tocline = trim( $canonized_headline );
- $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
+ $canonized_headline = urlencode( Sanitizer::decodeCharReferences( str_replace(' ', '_', $tocline) ) );
$replacearray = array(
'%3A' => ':',
'%' => '.'
while ( $s = $dbr->fetchObject($res) ) {
$title = Title::makeTitle( $s->page_namespace, $s->page_title );
$pdbk = $title->getPrefixedDBkey();
- $wgLinkCache->addGoodLink( $s->page_id, $pdbk );
+ $wgLinkCache->addGoodLinkObj( $s->page_id, $title );
if ( $threshold > 0 ) {
$size = $s->page_len;
$searchkey = "<!--LINK $key-->";
$title = $this->mLinkHolders['titles'][$key];
if ( empty( $colours[$pdbk] ) ) {
- $wgLinkCache->addBadLink( $pdbk );
+ $wgLinkCache->addBadLinkObj( $title );
$colours[$pdbk] = 0;
$wgOutputReplace[$searchkey] = $sk->makeBrokenLinkObj( $title,
$this->mLinkHolders['texts'][$key],
# Now process interwiki link holders
# This is quite a bit simpler than internal links
- if ( !empty( $this->mInterwikiLinkHolders ) ) {
+ if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) {
wfProfileIn( $fname.'-interwiki' );
# Make interwiki link HTML
$wgOutputReplace = array();
- foreach( $this->mInterwikiLinkHolders as $i => $lh ) {
- $s = $sk->makeLink( $lh[0], $lh[1] );
- $wgOutputReplace[] = $s;
+ foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) {
+ $title = $this->mInterwikiLinkHolders['titles'][$key];
+ $wgOutputReplace[$key] = $sk->makeLinkObj( $title, $link );
}
$text = preg_replace_callback(
$text );
wfProfileOut( $fname.'-interwiki' );
}
-
+
wfProfileOut( $fname );
return $colours;
}
+
+ /**
+ * Replace <!--LINK--> link placeholders with plain text of links
+ * (not HTML-formatted).
+ * @param string $text
+ * @return string
+ */
+ function replaceLinkHoldersText( $text ) {
+ global $wgUser, $wgLinkCache;
+ global $wgOutputReplace;
+
+ $fname = 'Parser::replaceLinkHoldersText';
+ wfProfileIn( $fname );
+
+ $text = preg_replace_callback(
+ '/<!--(LINK|IWLINK) (.*?)-->/',
+ array( &$this, 'replaceLinkHoldersTextCallback' ),
+ $text );
+
+ wfProfileOut( $fname );
+ return $text;
+ }
+
+ /**
+ * @param array $matches
+ * @return string
+ * @access private
+ */
+ function replaceLinkHoldersTextCallback( $matches ) {
+ $type = $matches[1];
+ $key = $matches[2];
+ if( $type == 'LINK' ) {
+ if( isset( $this->mLinkHolders['texts'][$key] ) ) {
+ return $this->mLinkHolders['texts'][$key];
+ }
+ } elseif( $type == 'IWLINK' ) {
+ if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) {
+ return $this->mInterwikiLinkHolders['texts'][$key];
+ }
+ }
+ return $matches[0];
+ }
/**
* Renders an image gallery from a text with one line per image.
*/
function renderImageGallery( $text ) {
# Setup the parser
- global $wgUser, $wgParser, $wgTitle;
+ global $wgUser, $wgTitle;
$parserOptions = ParserOptions::newFromUser( $wgUser );
-
+ $localParser = new Parser();
+
global $wgLinkCache;
$ig = new ImageGallery();
$ig->setShowBytes( false );
$label = '';
}
- $html = $wgParser->parse( $label , $wgTitle, $parserOptions );
+ $html = $localParser->parse( $label , $wgTitle, $parserOptions );
$html = $html->mText;
$ig->add( new Image( $nt ), $html );
# remember to set an alignment, don't render immediately
$align = 'none';
} elseif ( $wgUseImageResize && ! is_null( $match = $mwWidth->matchVariableStartToEnd($val) ) ) {
+ wfDebug( "MAG_IMG_WIDTH match: $match\n" );
# $match is the image width in pixels
if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $match, $m ) ) {
$width = intval( $m[1] );
}
}
# Strip bad stuff out of the alt text
- $alt = $caption;
- $this->replaceLinkHolders( $alt );
+ $alt = $this->replaceLinkHoldersText( $caption );
$alt = Sanitizer::stripAllTags( $alt );
# Linker does the rest
class ParserOutput
{
var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
- var $mCacheTime; # Used in ParserCache
+ var $mCacheTime; # Timestamp on this article, or -1 for uncacheable. Used in ParserCache.
var $mVersion; # Compatibility check
var $mTitleText; # title text of the chosen language variant
*/
function expired( $touched ) {
global $wgCacheEpoch;
- return $this->getCacheTime() <= $touched ||
+ return $this->getCacheTime() == -1 || // parser says it's uncacheable
+ $this->getCacheTime() <= $touched ||
$this->getCacheTime() <= $wgCacheEpoch ||
!isset( $this->mVersion ) ||
version_compare( $this->mVersion, MW_PARSER_VERSION, "lt" );
var $mDateFormat; # Date format index
var $mEditSection; # Create "edit section" links
var $mNumberHeadings; # Automatically number headings
+ var $mAllowSpecialInclusion; # Allow inclusion of special pages
function getUseTeX() { return $this->mUseTeX; }
function getUseDynamicDates() { return $this->mUseDynamicDates; }
function getDateFormat() { return $this->mDateFormat; }
function getEditSection() { return $this->mEditSection; }
function getNumberHeadings() { return $this->mNumberHeadings; }
+ function getAllowSpecialInclusion() { return $this->mAllowSpecialInclusion; }
+
function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
+ function setAllowSpecialInclusion( $x ) { return wfSetVar( $this->mAllowSpecialInclusion, $x ); }
function setSkin( &$x ) { $this->mSkin =& $x; }
+ function ParserOptions() {
+ global $wgUser;
+ $this->initialiseFromUser( $wgUser );
+ }
+
/**
* Get parser options
* @static
/** Get user options */
function initialiseFromUser( &$userInput ) {
- global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
+ global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages,
+ $wgAllowSpecialInclusion;
$fname = 'ParserOptions::initialiseFromUser';
wfProfileIn( $fname );
if ( !$userInput ) {
$this->mDateFormat = $user->getOption( 'date' );
$this->mEditSection = $user->getOption( 'editsection' );
$this->mNumberHeadings = $user->getOption( 'numberheadings' );
+ $this->mAllowSpecialInclusion = $wgAllowSpecialInclusion;
wfProfileOut( $fname );
}
}