<?php
-// require_once('Tokenizer.php');
-
/**
* File for Parser and related classes
*
# only once and last
$text = $this->doBlockLevels( $text, $linestart );
$text = $this->unstripNoWiki( $text, $this->mStripState );
- if($wgUseTidy) {
- $text = $this->tidy($text);
- }
$this->mOutput->setText( $text );
wfProfileOut( $fname );
return $this->mOutput;
/**
* interface with html tidy, used if $wgUseTidy = true
*
- * @access private
+ * @access public
+ * @static
*/
function tidy ( $text ) {
global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
wfProfileIn( $fname );
$cleansource = '';
+ $opts = '';
switch(strtoupper($wgOutputEncoding)) {
case 'ISO-8859-1':
- $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
+ $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
break;
case 'UTF-8':
- $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
+ $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
break;
default:
- $wgTidyOpts .= ' -raw';
+ $opts .= ' -raw';
}
$wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
- $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
+ $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
if (is_resource($process)) {
fwrite($pipes[0], $wrappedtext);
fclose($pipes[0]);
$text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
}
$text = $this->doAllQuotes( $text );
- $text = $this->replaceExternalLinks( $text );
- $text = $this->doMagicLinks( $text );
$text = $this->replaceInternalLinks ( $text );
# Another call to replace links and images inside captions of images
$text = $this->replaceInternalLinks ( $text );
-
+ $text = $this->replaceExternalLinks( $text );
+ $text = $this->doMagicLinks( $text );
$text = $this->doTableStuff( $text );
$text = $this->formatHeadings( $text, $isMain );
$sk =& $this->mOptions->getSkin();
*/
function replaceInternalLinks( $s ) {
global $wgLang, $wgContLang, $wgLinkCache;
- global $wgNamespacesWithSubpages;
static $fname = 'Parser::replaceInternalLinks' ;
wfProfileIn( $fname );
continue;
}
- # Valid link forms:
- # Foobar -- normal
- # :Foobar -- override special treatment of prefix (images, language links)
- # /Foobar -- convert to CurrentPage/Foobar
- # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
-
- # Look at the first character
- $c = substr($m[1],0,1);
- $noforce = ($c != ':');
-
- # subpage
- if( $c == '/' ) {
- # / at end means we don't want the slash to be shown
- if(substr($m[1],-1,1)=='/') {
- $m[1]=substr($m[1],1,strlen($m[1])-2);
- $noslash=$m[1];
- } else {
- $noslash=substr($m[1],1);
- }
-
- # Some namespaces don't allow subpages
- if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
- # subpages allowed here
- $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
- if( '' == $text ) {
- $text= $m[1];
- } # this might be changed for ugliness reasons
- } else {
- # no subpage allowed, use standard link
- $link = $noslash;
- }
-
- } elseif( $noforce ) { # no subpage
- $link = $m[1];
- } else {
- # We don't want to keep the first character
- $link = substr( $m[1], 1 );
+ # Don't allow internal links to pages containing
+ # PROTO: where PROTO is a valid URL protocol; these
+ # should be external links.
+ if (preg_match('/((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
+ $s .= $prefix . '[[' . $line ;
+ continue;
+ }
+
+ # Make subpage if necessary
+ $link = $this->maybeDoSubpageLink( $m[1], $text );
+
+ $noforce = (substr($m[1], 0, 1) != ':');
+ if (!$noforce) {
+ # Strip off leading ':'
+ $link = substr($link, 1);
}
$wasblank = ( '' == $text );
return $s;
}
+ /**
+ * Handle link to subpage if necessary
+ * @param $target string the source of the link
+ * @param &$text the link text, modified as necessary
+ * @return string the full name of the link
+ * @access private
+ */
+ function maybeDoSubpageLink($target, &$text) {
+ # Valid link forms:
+ # Foobar -- normal
+ # :Foobar -- override special treatment of prefix (images, language links)
+ # /Foobar -- convert to CurrentPage/Foobar
+ # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
+ global $wgNamespacesWithSubpages;
+
+ $fname = 'Parser::maybeDoSubpageLink';
+ wfProfileIn( $fname );
+ # Look at the first character
+ if( $target{0} == '/' ) {
+ # / at end means we don't want the slash to be shown
+ if(substr($target,-1,1)=='/') {
+ $target=substr($target,1,-1);
+ $noslash=$target;
+ } else {
+ $noslash=substr($target,1);
+ }
+
+ # Some namespaces don't allow subpages
+ if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
+ # subpages allowed here
+ $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
+ if( '' === $text ) {
+ $text = $target;
+ } # this might be changed for ugliness reasons
+ } else {
+ # no subpage allowed, use standard link
+ $ret = $target;
+ }
+ } else {
+ # no subpage
+ $ret = $target;
+ }
+
+ wfProfileOut( $fname );
+ return $ret;
+ }
+
/**#@+
* Used by doBlockLevels()
* @access private
# So we check for : in the remainder text to split up the
# title and definition, without b0rking links.
# FIXME: This is not foolproof. Something better in Tokenizer might help.
- if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+ if( preg_match( '/^(.*?):(.*)$/', $t, $match ) ) {
$term = $match[1];
$output .= $term . $this->nextItem( ':' );
$t = $match[2];
if ( ';' == $char ) {
# FIXME: This is dupe of code above
- if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+ if( preg_match( '/^(.*?):(.*)$/', $t, $match ) ) {
$term = $match[1];
$output .= $term . $this->nextItem( ':' );
$t = $match[2];
* @access private
*/
function initialiseVariables() {
+ $fname = 'Parser::initialiseVariables';
+ wfProfileIn( $fname );
global $wgVariableIDs;
$this->mVariables = array();
foreach ( $wgVariableIDs as $id ) {
$mw =& MagicWord::get( $id );
$mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
}
+ wfProfileOut( $fname );
}
/**
$text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
}
# Template substitution
- $regex = '/{{(['.$titleChars.']*)(\\|.*?|)}}/s';
+ $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
$text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
array_pop( $this->mArgStack );
$found = false;
$nowiki = false;
$noparse = false;
- $itcamefromthedatabase = false;
$title = NULL;
+ # Need to know if the template comes at the start of a line,
+ # to treat the beginning of the template like the beginning
+ # of a line for tables and block-level elements.
+ $linestart = $matches[1];
+
# $part1 is the bit before the first |, and must contain only title characters
# $args is a list of arguments, starting from index 0, not including $part1
- $part1 = $matches[1];
- # If the second subpattern matched anything, it will start with |
+ $part1 = $matches[2];
+ # If the third subpattern matched anything, it will start with |
- $args = $this->getTemplateArgs($matches[2]);
+ $args = $this->getTemplateArgs($matches[3]);
$argc = count( $args );
- # {{{}}}
- if ( strpos( $matches[0], '{{{' ) !== false ) {
+ # Don't parse {{{}}} because that's only for template arguments
+ if ( $linestart === '{' ) {
$text = $matches[0];
$found = true;
$noparse = true;
$mwInt =& MagicWord::get( MAG_INT );
if ( $mwInt->matchStartAndRemove( $part1 ) ) {
if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
- $text = wfMsgReal( $part1, $args, true );
+ $text = $linestart . wfMsgReal( $part1, $args, true );
$found = true;
}
}
$mwNs = MagicWord::get( MAG_NS );
if ( $mwNs->matchStartAndRemove( $part1 ) ) {
if ( intval( $part1 ) ) {
- $text = $wgContLang->getNsText( intval( $part1 ) );
+ $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
$found = true;
} else {
$index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
if ( !is_null( $index ) ) {
- $text = $wgContLang->getNsText( $index );
+ $text = $linestart . $wgContLang->getNsText( $index );
$found = true;
}
}
$title = Title::newFromText( $part1 );
if ( !is_null( $title ) ) {
if ( $argc > 0 ) {
- $text = $title->$func( $args[0] );
+ $text = $linestart . $title->$func( $args[0] );
} else {
- $text = $title->$func();
+ $text = $linestart . $title->$func();
}
$found = true;
}
if ( !$found && $argc == 1 ) {
$mwGrammar =& MagicWord::get( MAG_GRAMMAR );
if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
- $text = $wgContLang->convertGrammar( $args[0], $part1 );
+ $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
$found = true;
}
}
# Did we encounter this template already? If yes, it is in the cache
# and we need to check for loops.
if ( !$found && isset( $this->mTemplates[$part1] ) ) {
+ # set $text to cached message.
+ $text = $linestart . $this->mTemplates[$part1];
+ $found = true;
+
# Infinite loop test
if ( isset( $this->mTemplatePath[$part1] ) ) {
$noparse = true;
$found = true;
+ $text .= '<!-- WARNING: template loop detected -->';
}
- # set $text to cached message.
- $text = $this->mTemplates[$part1];
- $found = true;
}
# Load from database
+ $itcamefromthedatabase = false;
if ( !$found ) {
- $title = Title::newFromText( $part1, NS_TEMPLATE );
+ $ns = NS_TEMPLATE;
+ $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
+ if ($subpage !== '') {
+ $ns = $this->mTitle->getNamespace();
+ }
+ $title = Title::newFromText( $part1, $ns );
if ( !is_null( $title ) && !$title->isExternal() ) {
# Check for excessive inclusion
$dbk = $title->getPrefixedDBkey();
$articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
if ( $articleContent !== false ) {
$found = true;
- $text = $articleContent;
+ $text = $linestart . $articleContent;
$itcamefromthedatabase = true;
}
}
# If the title is valid but undisplayable, make a link to it
if ( $this->mOutputType == OT_HTML && !$found ) {
- $text = '[['.$title->getPrefixedText().']]';
+ $text = $linestart . '[['.$title->getPrefixedText().']]';
$found = true;
}
if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
$wgLinkCache->addLinkObj( $title );
}
+
+ # If the template begins with a table or block-level
+ # element, it should be treated as beginning a new line.
+ if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
+ $text = "\n" . $text;
+ }
}
# Empties the template path
$encodedname = base64_encode($title->getPrefixedDBkey());
else
$encodedname = base64_encode("");
- $matches = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
+ $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
PREG_SPLIT_DELIM_CAPTURE);
$text = '';
$nsec = 0;
- for( $i = 0; $i < count($matches); $i += 2 ) {
- $text .= $matches[$i];
- if (!isset($matches[$i + 1]) || $matches[$i + 1] == "") continue;
- $hl = $matches[$i + 1];
+ for( $i = 0; $i < count($m); $i += 2 ) {
+ $text .= $m[$i];
+ if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
+ $hl = $m[$i + 1];
if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
$text .= $hl;
continue;
$nsec++;
}
}
+ }
+
+ # Empties the template path
+ $this->mTemplatePath = array();
+ if ( !$found ) {
+ return $matches[0];
+ } else {
return $text;
}
}