<?php
-include_once('Tokenizer.php');
+require_once('Tokenizer.php');
-# PHP Parser
-#
-# Converts wikitext to HTML.
+if( $GLOBALS['wgUseWikiHiero'] ){
+ require_once('extensions/wikihiero/wikihiero.php');
+}
+if( $GLOBALS['wgUseTimeline'] ){
+ require_once('extensions/timeline/Timeline.php');
+}
+
+# PHP Parser
#
-# Globals used:
-# objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
+# Processes wiki markup
+#
+# There are two main entry points into the Parser class: parse() and preSaveTransform().
+# The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
+#
+# Globals used:
+# objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
#
# NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
#
# settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
-# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
+# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
# $wgLocaltimezone
#
# * only within ParserOptions
+#
+#
+#----------------------------------------
+# Variable substitution O(N^2) attack
+#-----------------------------------------
+# Without countermeasures, it would be possible to attack the parser by saving a page
+# filled with a large number of inclusions of large pages. The size of the generated
+# page would be proportional to the square of the input size. Hence, we limit the number
+# of inclusions of any given page, thus bringing any attack back to O(N).
+#
+
+define( "MAX_INCLUDE_REPEAT", 5 );
+
+# Allowed values for $mOutputType
+define( "OT_HTML", 1 );
+define( "OT_WIKI", 2 );
+define( "OT_MSG", 3 );
+
+# string parameter for extractTags which will cause it
+# to strip HTML comments in addition to regular
+# <XML>-style tags. This should not be anything we
+# may want to use in wikisyntax
+define( "STRIP_COMMENTS", "HTMLCommentStrip" );
+
+# prefix for escaping, used in two functions at least
+define( "UNIQ_PREFIX", "NaodW29");
class Parser
{
# Cleared with clearState():
- var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
+ var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
+ var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
# Temporary:
- var $mOptions, $mTitle;
+ var $mOptions, $mTitle, $mOutputType;
function Parser()
{
$this->mAutonumber = 0;
$this->mLastSection = "";
$this->mDTopen = false;
- $this->mStripState = false;
+ $this->mVariables = false;
+ $this->mIncludeCount = array();
+ $this->mStripState = array();
+ $this->mArgStack = array();
}
-
+
# First pass--just handle <nowiki> sections, pass the rest off
- # to doWikiPass2() which does all the real work.
+ # to internalParse() which does all the real work.
#
# Returns a ParserOutput
#
function parse( $text, &$title, $options, $linestart = true, $clearState = true )
{
+ global $wgUseTidy;
$fname = "Parser::parse";
wfProfileIn( $fname );
if ( $clearState ) {
$this->clearState();
}
-
+
$this->mOptions = $options;
$this->mTitle =& $title;
-
+ $this->mOutputType = OT_HTML;
+
$stripState = NULL;
- $text = $this->strip( $text, $this->mStripState, true );
- $text = $this->doWikiPass2( $text, $linestart );
+ $text = $this->strip( $text, $this->mStripState );
+ $text = $this->internalParse( $text, $linestart );
$text = $this->unstrip( $text, $this->mStripState );
-
+ # Clean up special characters, only run once, next-to-last before doBlockLevels
+ if(!$wgUseTidy) {
+ $fixtags = array(
+ "/<hr *>/i" => '<hr/>',
+ "/<br *>/i" => '<br/>',
+ "/<center *>/i"=>'<div class="center">',
+ "/<\\/center *>/i" => '</div>',
+ # Clean up spare ampersands; note that we probably ought to be
+ # more careful about named entities.
+ '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
+ );
+ $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+ } else {
+ $fixtags = array(
+ "/<center *>/i"=>'<div class="center">',
+ "/<\\/center *>/i" => '</div>'
+ );
+ $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+ }
+ # only once and last
+ $text = $this->doBlockLevels( $text, $linestart );
+ if($wgUseTidy) {
+ $text = $this->tidy($text);
+ }
$this->mOutput->setText( $text );
wfProfileOut( $fname );
return $this->mOutput;
{
return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
}
-
- # Strips <nowiki>, <pre> and <math>
- # Returns the text, and fills an array with data needed in unstrip()
- #
- function strip( $text, &$state, $render = true )
- {
- $state = array(
- 'nwlist' => array(),
- 'nwsecs' => 0,
- 'nwunq' => Parser::getRandomString(),
- 'mathlist' => array(),
- 'mathsecs' => 0,
- 'mathunq' => Parser::getRandomString(),
- 'prelist' => array(),
- 'presecs' => 0,
- 'preunq' => Parser::getRandomString()
- );
+ # Replaces all occurrences of <$tag>content</$tag> in the text
+ # with a random marker and returns the new text. the output parameter
+ # $content will be an associative array filled with data on the form
+ # $unique_marker => content.
+
+ # If $content is already set, the additional entries will be appended
+
+ # If $tag is set to STRIP_COMMENTS, the function will extract
+ # <!-- HTML comments -->
+
+ /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
+ $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
+ if ( !$content ) {
+ $content = array( );
+ }
+ $n = 1;
$stripped = "";
- $stripped2 = "";
- $stripped3 = "";
-
- # Replace any instances of the placeholders
- $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
- $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
- $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
-
+
while ( "" != $text ) {
- $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
+ if($tag==STRIP_COMMENTS) {
+ $p = preg_split( "/<!--/i", $text, 2 );
+ } else {
+ $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+ }
$stripped .= $p[0];
- if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
- $text = "";
+ if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
+ $text = "";
} else {
- $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
- ++$state['nwsecs'];
-
- if ( $render ) {
- $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
+ if($tag==STRIP_COMMENTS) {
+ $q = preg_split( "/-->/i", $p[1], 2 );
} else {
- $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
+ $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
}
-
- $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
+ $marker = $rnd . sprintf("%08X", $n++);
+ $content[$marker] = $q[0];
+ $stripped .= $marker;
$text = $q[1];
}
}
+ return $stripped;
+ }
- if( $this->mOptions->getUseTeX() ) {
- while ( "" != $stripped ) {
- $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
- $stripped2 .= $p[0];
- if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
- $stripped = "";
- } else {
- $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
- ++$state['mathsecs'];
+ # Strips and renders <nowiki>, <pre>, <math>, <hiero>
+ # If $render is set, performs necessary rendering operations on plugins
+ # Returns the text, and fills an array with data needed in unstrip()
+ # If the $state is already a valid strip state, it adds to the state
- if ( $render ) {
- $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
- } else {
- $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
- }
-
- $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
- $stripped = $q[1];
- }
+ # When $stripcomments is set, HTML comments <!-- like this -->
+ # will be stripped in addition to other tags. This is important
+ # for section editing, where these comments cause confusion when
+ # counting the sections in the wikisource
+ function strip( $text, &$state, $stripcomments = false )
+ {
+ $render = ($this->mOutputType == OT_HTML);
+ $nowiki_content = array();
+ $hiero_content = array();
+ $math_content = array();
+ $pre_content = array();
+ $comment_content = array();
+
+ # Replace any instances of the placeholders
+ $uniq_prefix = UNIQ_PREFIX;
+ #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
+
+ $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
+ foreach( $nowiki_content as $marker => $content ){
+ if( $render ){
+ $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
+ } else {
+ $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
}
- } else {
- $stripped2 = $stripped;
}
- while ( "" != $stripped2 ) {
- $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
- $stripped3 .= $p[0];
- if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
- $stripped2 = "";
+ $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+ foreach( $hiero_content as $marker => $content ){
+ if( $render && $GLOBALS['wgUseWikiHiero']){
+ $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
} else {
- $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
- ++$state['presecs'];
+ $hiero_content[$marker] = "<hiero>$content</hiero>";
+ }
+ }
- if ( $render ) {
- $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
+ $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+ foreach( $math_content as $marker => $content ){
+ if( $render ) {
+ if( $this->mOptions->getUseTeX() ) {
+ $math_content[$marker] = renderMath( $content );
} else {
- $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
+ $math_content[$marker] = "<math>$content<math>";
}
-
- $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
- $stripped2 = $q[1];
+ } else {
+ $math_content[$marker] = "<math>$content</math>";
+ }
+ }
+
+ $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
+ foreach( $pre_content as $marker => $content ){
+ if( $render ){
+ $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
+ } else {
+ $pre_content[$marker] = "<pre>$content</pre>";
}
}
- return $stripped3;
+ if($stripcomments) {
+ $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
+ foreach( $comment_content as $marker => $content ){
+ $comment_content[$marker] = "<!--$content-->";
+ }
+ }
+
+ # Merge state with the pre-existing state, if there is one
+ if ( $state ) {
+ $state['nowiki'] = $state['nowiki'] + $nowiki_content;
+ $state['hiero'] = $state['hiero'] + $hiero_content;
+ $state['math'] = $state['math'] + $math_content;
+ $state['pre'] = $state['pre'] + $pre_content;
+ $state['comment'] = $state['comment'] + $comment_content;
+ } else {
+ $state = array(
+ 'nowiki' => $nowiki_content,
+ 'hiero' => $hiero_content,
+ 'math' => $math_content,
+ 'pre' => $pre_content,
+ 'comment' => $comment_content
+ );
+ }
+ return $text;
}
function unstrip( $text, &$state )
{
- for ( $i = 1; $i <= $state['presecs']; ++$i ) {
- $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
+ # Must expand in reverse order, otherwise nested tags will be corrupted
+ $contentDict = end( $state );
+ for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
+ for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
+ $text = str_replace( key( $contentDict ), $content, $text );
+ }
}
- for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
- $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
- }
+ return $text;
+ }
+
+ # Add an item to the strip state
+ # Returns the unique tag which must be inserted into the stripped text
+ # The tag will be replaced with the original text in unstrip()
- for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
- $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
+ function insertStripItem( $text, &$state )
+ {
+ $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
+ if ( !$state ) {
+ $state = array(
+ 'nowiki' => array(),
+ 'hiero' => array(),
+ 'math' => array(),
+ 'pre' => array()
+ );
}
- return $text;
+ $state['item'][$rnd] = $text;
+ return $rnd;
}
+ # This method generates the list of subcategories and pages for a category
function categoryMagic ()
{
global $wgLang , $wgUser ;
- if ( !$this->mOptions->getUseCategoryMagic() ) return ;
- $id = $this->mTitle->getArticleID() ;
- $cat = ucfirst ( wfMsg ( "category" ) ) ;
- $ti = $this->mTitle->getText() ;
- $ti = explode ( ":" , $ti , 2 ) ;
- if ( $cat != $ti[0] ) return "" ;
- $r = "<br break=all>\n" ;
+ if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
- $articles = array() ;
- $parents = array () ;
- $children = array() ;
+ $cns = Namespace::getCategory() ;
+ if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
+
+ $r = "<br style=\"clear:both;\"/>\n";
-# $sk =& $this->mGetSkin();
$sk =& $wgUser->getSkin() ;
- $doesexist = false ;
- if ( $doesexist ) {
- $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
+ $articles = array() ;
+ $children = array() ;
+ $data = array () ;
+ $id = $this->mTitle->getArticleID() ;
+
+ # For existing categories
+ if( $id ) {
+ $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
+ $res = wfQuery ( $sql, DB_READ ) ;
+ while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
} else {
- $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
+ # For non-existing categories
+ $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
+ $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
+ $res = wfQuery ( $sql, DB_READ ) ;
+ while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
}
- $res = wfQuery ( $sql, DB_READ ) ;
- while ( $x = wfFetchObject ( $res ) )
+ # For all pages that link to this category
+ foreach ( $data AS $x )
{
- # $t = new Title ;
- # $t->newFromDBkey ( $x->l_from ) ;
- # $t = $t->getText() ;
$t = $wgLang->getNsText ( $x->cur_namespace ) ;
if ( $t != "" ) $t .= ":" ;
$t .= $x->cur_title ;
- $y = explode ( ":" , $t , 2 ) ;
- if ( count ( $y ) == 2 && $y[0] == $cat ) {
- array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
+ if ( $x->cur_namespace == $cns ) {
+ array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
} else {
- array_push ( $articles , $sk->makeLink ( $t ) ) ;
+ array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
}
}
wfFreeResult ( $res ) ;
- # Children
+ # Showing subcategories
if ( count ( $children ) > 0 )
{
asort ( $children ) ;
$r .= implode ( ", " , $children ) ;
}
- # Articles
+ # Showing pages in this category
if ( count ( $articles ) > 0 )
{
+ $ti = $this->mTitle->getText() ;
asort ( $articles ) ;
- $h = wfMsg( "category_header", $ti[1] );
+ $h = wfMsg( "category_header", $ti );
$r .= "<h2>{$h}</h2>\n" ;
$r .= implode ( ", " , $articles ) ;
}
{
if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
$htmlattrs = $this->getHTMLattrs() ;
-
+
# Strip non-approved attributes from the tag
$t = preg_replace(
"/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
# Strip javascript "expression" from stylesheets. Brute force approach:
# If anythin offensive is found, all attributes of the HTML tag are dropped
- if( preg_match(
+ if( preg_match(
"/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
wfMungeToUtf8( $t ) ) )
{
return trim ( $t ) ;
}
+ /* interface with html tidy, used if $wgUseTidy = true */
+ function tidy ( $text ) {
+ global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
+ global $wgInputEncoding, $wgOutputEncoding;
+ $cleansource = '';
+ switch(strtoupper($wgOutputEncoding)) {
+ case 'ISO-8859-1':
+ $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
+ break;
+ case 'UTF-8':
+ $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
+ break;
+ default:
+ $wgTidyOpts .= ' -raw';
+ }
+
+ $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
+ $descriptorspec = array(
+ 0 => array("pipe", "r"),
+ 1 => array("pipe", "w"),
+ 2 => array("file", "/dev/null", "a")
+ );
+ $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
+ if (is_resource($process)) {
+ fwrite($pipes[0], $text);
+ fclose($pipes[0]);
+ while (!feof($pipes[1])) {
+ $cleansource .= fgets($pipes[1], 1024);
+ }
+ fclose($pipes[1]);
+ $return_value = proc_close($process);
+ }
+ if( $cleansource == '' && $text != '') {
+ return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
+ } else {
+ return $cleansource;
+ }
+ }
+
function doTableStuff ( $t )
{
$t = explode ( "\n" , $t ) ;
$ltr = array () ; # tr attributes
foreach ( $t AS $k => $x )
{
- $x = rtrim ( $x ) ;
+ $x = trim ( $x ) ;
$fc = substr ( $x , 0 , 1 ) ;
if ( "{|" == substr ( $x , 0 , 2 ) )
{
- $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
+ $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
array_push ( $td , false ) ;
array_push ( $ltd , "" ) ;
array_push ( $tr , false ) ;
$t[$k] = $z ;
}
/* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
- {
+ {
$z = trim ( substr ( $x , 2 ) ) ;
$t[$k] = "<caption>{$z}</caption>\n" ;
}*/
{
$z = "" ;
if ( $fc != "+" )
- {
+ {
$tra = array_pop ( $ltr ) ;
if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
array_push ( $tr , true ) ;
$l = array_pop ( $ltd ) ;
if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
- if ( $fc == "|" ) $l = "TD" ;
- else if ( $fc == "!" ) $l = "TH" ;
- else if ( $fc == "+" ) $l = "CAPTION" ;
+ if ( $fc == "|" ) $l = "td" ;
+ else if ( $fc == "!" ) $l = "th" ;
+ else if ( $fc == "+" ) $l = "caption" ;
else $l = "" ;
array_push ( $ltd , $l ) ;
$y = explode ( "|" , $theline , 2 ) ;
return $t ;
}
- # Well, OK, it's actually about 14 passes. But since all the
- # hard lifting is done inside PHP's regex code, it probably
- # wouldn't speed things up much to add a real parser.
- #
- function doWikiPass2( $text, $linestart )
+ // set isMain=false if you call from a template etc. and don't want to do stuff
+ // like TOC insertion for that content
+ function internalParse( $text, $linestart, $args = array(), $isMain=true )
{
- $fname = "OutputPage::doWikiPass2";
+ $fname = "Parser::internalParse";
wfProfileIn( $fname );
-
+
$text = $this->removeHTMLtags( $text );
- $text = $this->replaceVariables( $text );
+ $text = $this->replaceVariables( $text, $args );
# $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
- $text = str_replace ( "<HR>", "<hr>", $text );
$text = $this->doHeadings( $text );
- $text = $this->doBlockLevels( $text, $linestart );
-
if($this->mOptions->getUseDynamicDates()) {
global $wgDateFormatter;
$text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
}
-
$text = $this->replaceExternalLinks( $text );
- $text = $this->replaceInternalLinks ( $text );
+ $text = $this->doTokenizedParser ( $text );
$text = $this->doTableStuff ( $text ) ;
-
- $text = $this->formatHeadings( $text );
-
+ $text = $this->formatHeadings( $text, $isMain );
$sk =& $this->mOptions->getSkin();
$text = $sk->transformContent( $text );
- $text .= $this->categoryMagic () ;
+
+ if ( !isset ( $this->categoryMagicDone ) ) {
+ $text .= $this->categoryMagic () ;
+ $this->categoryMagicDone = true ;
+ }
wfProfileOut( $fname );
return $text;
/* private */ function replaceExternalLinks( $text )
{
- $fname = "OutputPage::replaceExternalLinks";
+ $fname = "Parser::replaceExternalLinks";
wfProfileIn( $fname );
$text = $this->subReplaceExternalLinks( $text, "http", true );
$text = $this->subReplaceExternalLinks( $text, "https", true );
wfProfileOut( $fname );
return $text;
}
-
+
/* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
{
$unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
$uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
-
- # this is the list of separators that should be ignored if they
+
+ # this is the list of separators that should be ignored if they
# are the last character of an URL but that should be included
# if they occur within the URL, e.g. "go to www.foo.com, where .."
# in this case, the last comma should not become part of the URL,
# but in "www.foo.com/123,2342,32.htm" it should.
- $sep = ",;\.:";
+ $sep = ",;\.:";
$fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
$images = "gif|png|jpg|jpeg";
# that the content of the string should be inserted there).
$e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
"((?i){$images})([^{$uc}]|$)/";
-
+
$e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
$sk =& $this->mOptions->getSkin();
} else if ( preg_match( $e2, $line, $m ) ) {
$link = "{$protocol}:{$m[1]}";
$text = $m[2];
- $trail = $m[3];
+ $trail = $m[3];
} else {
$s .= "[{$protocol}:" . $line;
continue;
}
- if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
- else $paren = "";
+ if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
+ $paren = "";
+ } else {
+ # Expand the URL for printable version
+ $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
+ }
$la = $sk->getExternalLinkAttributes( $link, $text );
$s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
/* private */ function handle3Quotes( &$state, $token )
{
- if ( $state["strong"] ) {
- if ( $state["em"] && $state["em"] > $state["strong"] )
+ if ( $state["strong"] !== false ) {
+ if ( $state["em"] !== false && $state["em"] > $state["strong"] )
{
# ''' lala ''lala '''
$s = "</em></strong><em>";
$state["strong"] = FALSE;
} else {
$s = "<strong>";
- $state["strong"] = $token["pos"];
+ $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
}
return $s;
}
/* private */ function handle2Quotes( &$state, $token )
{
- if ( $state["em"] ) {
- if ( $state["strong"] && $state["strong"] > $state["em"] )
+ if ( $state["em"] !== false ) {
+ if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
{
# ''lala'''lala'' ....'''
$s = "</strong></em><strong>";
$state["em"] = FALSE;
} else {
$s = "<em>";
- $state["em"] = $token["pos"];
+ $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
+
}
return $s;
}
-
+
/* private */ function handle5Quotes( &$state, $token )
{
- if ( $state["em"] && $state["strong"] ) {
+ $s = "";
+ if ( $state["em"] !== false && $state["strong"] !== false ) {
if ( $state["em"] < $state["strong"] ) {
$s .= "</strong></em>";
} else {
$s .= "</em></strong>";
}
$state["strong"] = $state["em"] = FALSE;
- } elseif ( $state["em"] ) {
+ } elseif ( $state["em"] !== false ) {
$s .= "</em><strong>";
$state["em"] = FALSE;
$state["strong"] = $token["pos"];
- } elseif ( $state["strong"] ) {
+ } elseif ( $state["strong"] !== false ) {
$s .= "</strong><em>";
$state["strong"] = FALSE;
$state["em"] = $token["pos"];
} else { # not $em and not $strong
$s .= "<strong><em>";
- $state["strong"] = $state["em"] = $token["pos"];
+ $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
}
return $s;
}
- /* private */ function replaceInternalLinks( $str )
+ /* private */ function doTokenizedParser( $str )
{
global $wgLang; # for language specific parser hook
+ global $wgUploadDirectory, $wgUseTimeline;
$tokenizer=Tokenizer::newFromString( $str );
$tokenStack = array();
-
+
$s="";
$state["em"] = FALSE;
$state["strong"] = FALSE;
$tagIsOpen = FALSE;
$threeopen = false;
-
+
# The tokenizer splits the text into tokens and returns them one by one.
# Every call to the tokenizer returns a new token.
while ( $token = $tokenizer->nextToken() )
# simple text with no further markup
$txt = $token["text"];
break;
+ case "blank":
+ # Text that contains blanks that have to be converted to
+ # non-breakable spaces for French.
+ # U+202F NARROW NO-BREAK SPACE might be a better choice, but
+ # browser support for Unicode spacing is poor.
+ $txt = str_replace( " ", " ", $token["text"] );
+ break;
case "[[[":
# remember the tag opened with 3 [
$threeopen = true;
array_push( $tokenStack, $token );
$txt="";
break;
-
+
case "]]]":
case "]]":
# link close tag.
# get text from stack, glue it together, and call the code to handle a
# link
-
+
if ( count( $tokenStack ) == 0 )
{
# stack empty. Found a ]] without an opening [[
}
$lastToken = array_pop( $tokenStack );
}
-
+
$txt = $linkText ."]]";
-
+
if( isset( $lastToken["text"] ) ) {
$prefix = $lastToken["text"];
} else {
$prefix = "";
}
$nextToken = $tokenizer->previewToken();
- if ( $nextToken["type"] == "text" )
+ if ( $nextToken["type"] == "text" )
{
# Preview just looks at it. Now we have to fetch it.
$nextToken = $tokenizer->nextToken();
$txt .= $nextToken["text"];
}
- $txt = $this->handleInternalLink( $txt, $prefix );
+ $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
- # did the tag start with 3 [ ?
+ # did the tag start with 3 [ ?
if($threeopen) {
# show the first as text
$txt = "[".$txt;
$threeopen=false;
}
-
+
}
$tagIsOpen = (count( $tokenStack ) != 0);
break;
case "----":
- $txt = "\n<hr>\n";
+ $txt = "\n<hr />\n";
break;
case "'''":
# This and the three next ones handle quotes
$txt = $this->doMagicISBN( $tokenizer );
}
break;
+ case "<timeline>":
+ if ( $wgUseTimeline &&
+ "" != ( $timelinesrc = $tokenizer->readAllUntil("</timeline>") ) )
+ {
+ $txt = renderTimeline( $timelinesrc );
+ } else {
+ $txt=$token["text"];
+ }
+ break;
default:
# Call language specific Hook.
$txt = $wgLang->processToken( $token, $tokenStack );
{
global $wgLang, $wgLinkCache;
global $wgNamespacesWithSubpages, $wgLanguageCode;
- static $fname = "OutputPage::replaceInternalLinks" ;
+ static $fname = "Parser::handleInternalLink" ;
wfProfileIn( $fname );
wfProfileIn( "$fname-setup" );
#$e2 = "/^(.*)\\b(\\w+)\$/suD";
#$e2 = "/^(.*\\s)(\\S+)\$/suD";
static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
-
+
# Special and Media are pseudo-namespaces; no pages actually exist in them
static $image = FALSE;
if ( !$image ) { $image = Namespace::getImage(); }
if ( !$special ) { $special = Namespace::getSpecial(); }
if ( !$media ) { $media = Namespace::getMedia(); }
- if ( !$category ) { $category = wfMsg ( "category" ) ; }
-
+ if ( !$category ) { $category = Namespace::getCategory(); }
+
$nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
wfProfileOut( "$fname-setup" );
$s = "";
-
+
if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
$text = $m[2];
- $trail = $m[3];
+ $trail = $m[3];
} else { # Invalid form; output directly
$s .= $prefix . "[[" . $line ;
return $s;
}
-
+
/* Valid link forms:
Foobar -- normal
:Foobar -- override special treatment of prefix (images, language links)
$noforce = ($c != ":");
if( $c == "/" ) { # subpage
if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
- $m[1]=substr($m[1],1,strlen($m[1])-2);
+ $m[1]=substr($m[1],1,strlen($m[1])-2);
$noslash=$m[1];
} else {
$noslash=substr($m[1],1);
if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
$link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
if( "" == $text ) {
- $text= $m[1];
+ $text= $m[1];
} # this might be changed for ugliness reasons
} else {
$link = $noslash; # no subpage allowed, use standard link
if( $noforce ) {
if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
- $s .= $prefix . $trail;
- return $s;
+ $s .= $prefix . $trail ;
+ return (trim($s) == '')? '': $s;
}
if( $ns == $image ) {
$s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
$wgLinkCache->addImageLinkObj( $nt );
return $s;
}
+ if ( $ns == $category ) {
+ $t = $nt->getText() ;
+ $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
+ $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
+ $this->mOutput->mCategoryLinks[] = $t ;
+ $s .= $prefix . $trail ;
+ return $s ;
+ }
}
if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
( strpos( $link, "#" ) == FALSE ) ) {
- $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
+ # Self-links are handled specially; generally de-link and change to bold.
+ $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
return $s;
}
- # Category feature
- $catns = strtoupper ( $nt->getDBkey () ) ;
- $catns = explode ( ":" , $catns ) ;
- if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
- else $catns = "" ;
- if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
- $t = explode ( ":" , $nt->getText() ) ;
- array_shift ( $t ) ;
- $t = implode ( ":" , $t ) ;
- $t = $wgLang->ucFirst ( $t ) ;
- $nnt = Title::newFromText ( $category.":".$t ) ;
- $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
- $this->mCategoryLinks[] = $t ;
- $s .= $prefix . $trail ;
- return $s ;
- }
if( $ns == $media ) {
$s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
$wgLinkCache->addImageLinkObj( $nt );
/* private */ function closeParagraph()
{
$result = "";
- if ( 0 != strcmp( "p", $this->mLastSection ) &&
- 0 != strcmp( "", $this->mLastSection ) ) {
- $result = "</" . $this->mLastSection . ">";
+ if ( '' != $this->mLastSection ) {
+ $result = "</" . $this->mLastSection . ">\n";
}
+ $this->mInPre = false;
$this->mLastSection = "";
- return $result."\n";
+ return $result;
}
# getCommon() returns the length of the longest common substring
# of both arguments, starting at the beginning of both.
return $text."\n";
}
- /* private */ function doBlockLevels( $text, $linestart )
- {
- $fname = "OutputPage::doBlockLevels";
+ /* private */ function doBlockLevels( $text, $linestart ) {
+ $fname = "Parser::doBlockLevels";
wfProfileIn( $fname );
+
# Parsing through the text line by line. The main thing
# happening here is handling of block-level elements p, pre,
# and making lists from lines starting with * # : etc.
#
- $a = explode( "\n", $text );
- $text = $lastPref = "";
- $this->mDTopen = $inBlockElem = false;
+ $textLines = explode( "\n", $text );
- if ( ! $linestart ) { $text .= array_shift( $a ); }
- foreach ( $a as $t ) {
- if ( "" != $text ) { $text .= "\n"; }
+ $lastPrefix = $output = $lastLine = '';
+ $this->mDTopen = $inBlockElem = false;
+ $prefixLength = 0;
+ $paragraphStack = false;
- $oLine = $t;
- $opl = strlen( $lastPref );
- $npl = strspn( $t, "*#:;" );
- $pref = substr( $t, 0, $npl );
- $pref2 = str_replace( ";", ":", $pref );
- $t = substr( $t, $npl );
+ if ( !$linestart ) {
+ $output .= array_shift( $textLines );
+ }
+ foreach ( $textLines as $oLine ) {
+ $lastPrefixLength = strlen( $lastPrefix );
+ $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
+ $preOpenMatch = preg_match("/<pre/i", $oLine );
+ if (!$this->mInPre) {
+ $this->mInPre = !empty($preOpenMatch);
+ }
+ if ( !$this->mInPre ) {
+ # Multiple prefixes may abut each other for nested lists.
+ $prefixLength = strspn( $oLine, "*#:;" );
+ $pref = substr( $oLine, 0, $prefixLength );
+
+ # eh?
+ $pref2 = str_replace( ";", ":", $pref );
+ $t = substr( $oLine, $prefixLength );
+ } else {
+ # Don't interpret any other prefixes in preformatted text
+ $prefixLength = 0;
+ $pref = $pref2 = '';
+ $t = $oLine;
+ }
- if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
- $text .= $this->nextItem( substr( $pref, -1 ) );
+ # List generation
+ if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+ # Same as the last item, so no need to deal with nesting or opening stuff
+ $output .= $this->nextItem( substr( $pref, -1 ) );
+ $paragraphStack = false;
if ( ";" == substr( $pref, -1 ) ) {
- $cpos = strpos( $t, ":" );
- if ( ! ( false === $cpos ) ) {
- $term = substr( $t, 0, $cpos );
- $text .= $term . $this->nextItem( ":" );
- $t = substr( $t, $cpos + 1 );
+ # The one nasty exception: definition lists work like this:
+ # ; title : definition text
+ # So we check for : in the remainder text to split up the
+ # title and definition, without b0rking links.
+ # FIXME: This is not foolproof. Something better in Tokenizer might help.
+ if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+ $term = $match[1];
+ $output .= $term . $this->nextItem( ":" );
+ $t = $match[2];
}
}
- } else if (0 != $npl || 0 != $opl) {
- $cpl = $this->getCommon( $pref, $lastPref );
-
- while ( $cpl < $opl ) {
- $text .= $this->closeList( $lastPref{$opl-1} );
- --$opl;
+ } elseif( $prefixLength || $lastPrefixLength ) {
+ # Either open or close a level...
+ $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+ $paragraphStack = false;
+
+ while( $commonPrefixLength < $lastPrefixLength ) {
+ $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+ --$lastPrefixLength;
}
- if ( $npl <= $cpl && $cpl > 0 ) {
- $text .= $this->nextItem( $pref{$cpl-1} );
+ if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
+ $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
}
- while ( $npl > $cpl ) {
- $char = substr( $pref, $cpl, 1 );
- $text .= $this->openList( $char );
+ while ( $prefixLength > $commonPrefixLength ) {
+ $char = substr( $pref, $commonPrefixLength, 1 );
+ $output .= $this->openList( $char );
if ( ";" == $char ) {
- $cpos = strpos( $t, ":" );
- if ( ! ( false === $cpos ) ) {
- $term = substr( $t, 0, $cpos );
- $text .= $term . $this->nextItem( ":" );
- $t = substr( $t, $cpos + 1 );
+ # FIXME: This is dupe of code above
+ if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+ $term = $match[1];
+ $output .= $term . $this->nextItem( ":" );
+ $t = $match[2];
}
}
- ++$cpl;
+ ++$commonPrefixLength;
}
- $lastPref = $pref2;
+ $lastPrefix = $pref2;
}
- if ( 0 == $npl ) { # No prefix--go to paragraph mode
- if ( preg_match(
- "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
- $text .= $this->closeParagraph();
- $inBlockElem = true;
- }
- if ( ! $inBlockElem ) {
- if ( " " == $t{0} ) {
- $newSection = "pre";
- # $t = wfEscapeHTML( $t );
+ if( 0 == $prefixLength ) {
+ # No prefix (not in list)--go to paragraph mode
+ $uniq_prefix = UNIQ_PREFIX;
+ // XXX: use a stack for nestable elements like span, table and div
+ $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
+ $closematch = preg_match(
+ "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
+ "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
+ if ( $openmatch or $closematch ) {
+ $paragraphStack = false;
+ $output .= $this->closeParagraph();
+ if($preOpenMatch and !$preCloseMatch) {
+ $this->mInPre = true;
+ }
+ if ( $closematch ) {
+ $inBlockElem = false;
+ } else {
+ $inBlockElem = true;
}
- else { $newSection = "p"; }
-
- if ( 0 == strcmp( "", trim( $oLine ) ) ) {
- $text .= $this->closeParagraph();
- $text .= "<" . $newSection . ">";
- } else if ( 0 != strcmp( $this->mLastSection,
- $newSection ) ) {
- $text .= $this->closeParagraph();
- if ( 0 != strcmp( "p", $newSection ) ) {
- $text .= "<" . $newSection . ">";
+ } else if ( !$inBlockElem ) {
+ if ( " " == $t{0} ) {
+ // pre
+ if ($this->mLastSection != 'pre') {
+ $paragraphStack = false;
+ $output .= $this->closeParagraph().'<pre>';
+ $this->mLastSection = 'pre';
+ }
+ } else {
+ // paragraph
+ if ( '' == trim($t) ) {
+ if ( $paragraphStack ) {
+ $output .= $paragraphStack.'<br/>';
+ $paragraphStack = false;
+ $this->mLastSection = 'p';
+ } else {
+ if ($this->mLastSection != 'p' ) {
+ $output .= $this->closeParagraph();
+ $this->mLastSection = '';
+ $paragraphStack = "<p>";
+ } else {
+ $paragraphStack = '</p><p>';
+ }
+ }
+ } else {
+ if ( $paragraphStack ) {
+ $output .= $paragraphStack;
+ $paragraphStack = false;
+ $this->mLastSection = 'p';
+ } else if ($this->mLastSection != 'p') {
+ $output .= $this->closeParagraph().'<p>';
+ $this->mLastSection = 'p';
+ }
}
}
- $this->mLastSection = $newSection;
- }
- if ( $inBlockElem &&
- preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
- $inBlockElem = false;
}
}
- $text .= $t;
+ if ($paragraphStack === false) {
+ $output .= $t."\n";
+ }
}
- while ( $npl ) {
- $text .= $this->closeList( $pref2{$npl-1} );
- --$npl;
+ while ( $prefixLength ) {
+ $output .= $this->closeList( $pref2{$prefixLength-1} );
+ --$prefixLength;
}
if ( "" != $this->mLastSection ) {
- if ( "p" != $this->mLastSection ) {
- $text .= "</" . $this->mLastSection . ">";
- }
+ $output .= "</" . $this->mLastSection . ">";
$this->mLastSection = "";
}
+
wfProfileOut( $fname );
- return $text;
+ return $output;
}
- /* private */ function replaceVariables( $text )
+ function getVariableValue( $index ) {
+ global $wgLang, $wgSitename, $wgServer;
+
+ switch ( $index ) {
+ case MAG_CURRENTMONTH:
+ return date( "m" );
+ case MAG_CURRENTMONTHNAME:
+ return $wgLang->getMonthName( date("n") );
+ case MAG_CURRENTMONTHNAMEGEN:
+ return $wgLang->getMonthNameGen( date("n") );
+ case MAG_CURRENTDAY:
+ return date("j");
+ case MAG_PAGENAME:
+ return $this->mTitle->getText();
+ case MAG_NAMESPACE:
+ # return Namespace::getCanonicalName($this->mTitle->getNamespace());
+ return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
+ case MAG_CURRENTDAYNAME:
+ return $wgLang->getWeekdayName( date("w")+1 );
+ case MAG_CURRENTYEAR:
+ return date( "Y" );
+ case MAG_CURRENTTIME:
+ return $wgLang->time( wfTimestampNow(), false );
+ case MAG_NUMBEROFARTICLES:
+ return wfNumberOfArticles();
+ case MAG_SITENAME:
+ return $wgSitename;
+ case MAG_SERVER:
+ return $wgServer;
+ default:
+ return NULL;
+ }
+ }
+
+ function initialiseVariables()
+ {
+ global $wgVariableIDs;
+ $this->mVariables = array();
+ foreach ( $wgVariableIDs as $id ) {
+ $mw =& MagicWord::get( $id );
+ $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
+ }
+ }
+
+ /* private */ function replaceVariables( $text, $args = array() )
{
- global $wgLang, $wgCurOut;
- $fname = "OutputPage::replaceVariables";
+ global $wgLang, $wgScript, $wgArticlePath;
+
+ $fname = "Parser::replaceVariables";
wfProfileIn( $fname );
- $magic = array();
+ $bail = false;
+ if ( !$this->mVariables ) {
+ $this->initialiseVariables();
+ }
+ $titleChars = Title::legalChars();
+ $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
+
+ # This function is called recursively. To keep track of arguments we need a stack:
+ array_push( $this->mArgStack, $args );
- # Basic variables
- # See Language.php for the definition of each magic word
- # As with sigs, this uses the server's local time -- ensure
- # this is appropriate for your audience!
+ # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
+ $GLOBALS['wgCurParser'] =& $this;
+ $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
- $magic[MAG_CURRENTMONTH] = date( "m" );
- $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
- $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
- $magic[MAG_CURRENTDAY] = date("j");
- $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
- $magic[MAG_CURRENTYEAR] = date( "Y" );
- $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
-
- $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
+ array_pop( $this->mArgStack );
+
+ return $text;
+ }
- $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
- if ( $mw->match( $text ) ) {
- $v = wfNumberOfArticles();
- $text = $mw->replace( $v, $text );
- if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
+ function braceSubstitution( $matches )
+ {
+ global $wgLinkCache, $wgLang;
+ $fname = "Parser::braceSubstitution";
+ $found = false;
+ $nowiki = false;
+ $title = NULL;
+
+ # $newline is an optional newline character before the braces
+ # $part1 is the bit before the first |, and must contain only title characters
+ # $args is a list of arguments, starting from index 0, not including $part1
+
+ $newline = $matches[1];
+ $part1 = $matches[2];
+ # If the third subpattern matched anything, it will start with |
+ if ( $matches[3] !== "" ) {
+ $args = explode( "|", substr( $matches[3], 1 ) );
+ } else {
+ $args = array();
+ }
+ $argc = count( $args );
+
+ # SUBST
+ $mwSubst =& MagicWord::get( MAG_SUBST );
+ if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
+ if ( $this->mOutputType != OT_WIKI ) {
+ # Invalid SUBST not replaced at PST time
+ # Return without further processing
+ $text = $matches[0];
+ $found = true;
+ }
+ } elseif ( $this->mOutputType == OT_WIKI ) {
+ # SUBST not found in PST pass, do nothing
+ $text = $matches[0];
+ $found = true;
}
- # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
- # The callbacks are at the bottom of this file
- $wgCurOut = $this;
- $mw =& MagicWord::get( MAG_MSG );
- $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
- if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
+ # MSG, MSGNW and INT
+ if ( !$found ) {
+ # Check for MSGNW:
+ $mwMsgnw =& MagicWord::get( MAG_MSGNW );
+ if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
+ $nowiki = true;
+ } else {
+ # Remove obsolete MSG:
+ $mwMsg =& MagicWord::get( MAG_MSG );
+ $mwMsg->matchStartAndRemove( $part1 );
+ }
- $mw =& MagicWord::get( MAG_MSGNW );
- $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
- if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
+ # Check if it is an internal message
+ $mwInt =& MagicWord::get( MAG_INT );
+ if ( $mwInt->matchStartAndRemove( $part1 ) ) {
+ if ( $this->incrementIncludeCount( "int:$part1" ) ) {
+ $text = wfMsgReal( $part1, $args, true );
+ $found = true;
+ }
+ }
+ }
- wfProfileOut( $fname );
- return $text;
+ # NS
+ if ( !$found ) {
+ # Check for NS: (namespace expansion)
+ $mwNs = MagicWord::get( MAG_NS );
+ if ( $mwNs->matchStartAndRemove( $part1 ) ) {
+ if ( intval( $part1 ) ) {
+ $text = $wgLang->getNsText( intval( $part1 ) );
+ $found = true;
+ } else {
+ $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
+ if ( !is_null( $index ) ) {
+ $text = $wgLang->getNsText( $index );
+ $found = true;
+ }
+ }
+ }
+ }
+
+ # LOCALURL and LOCALURLE
+ if ( !$found ) {
+ $mwLocal = MagicWord::get( MAG_LOCALURL );
+ $mwLocalE = MagicWord::get( MAG_LOCALURLE );
+
+ if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
+ $func = 'getLocalURL';
+ } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
+ $func = 'escapeLocalURL';
+ } else {
+ $func = '';
+ }
+
+ if ( $func !== '' ) {
+ $title = Title::newFromText( $part1 );
+ if ( !is_null( $title ) ) {
+ if ( $argc > 0 ) {
+ $text = $title->$func( $args[0] );
+ } else {
+ $text = $title->$func();
+ }
+ $found = true;
+ }
+ }
+ }
+
+ # Internal variables
+ if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
+ $text = $this->mVariables[$part1];
+ $found = true;
+ $this->mOutput->mContainsOldMagic = true;
+ }
+
+ # Arguments input from the caller
+ $inputArgs = end( $this->mArgStack );
+ if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
+ $text = $inputArgs[$part1];
+ $found = true;
+ }
+
+ # Load from database
+ if ( !$found ) {
+ $title = Title::newFromText( $part1, NS_TEMPLATE );
+ if ( !is_null( $title ) && !$title->isExternal() ) {
+ # Check for excessive inclusion
+ $dbk = $title->getPrefixedDBkey();
+ if ( $this->incrementIncludeCount( $dbk ) ) {
+ $article = new Article( $title );
+ $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
+ if ( $articleContent !== false ) {
+ $found = true;
+ $text = $articleContent;
+
+ }
+ }
+
+ # If the title is valid but undisplayable, make a link to it
+ if ( $this->mOutputType == OT_HTML && !$found ) {
+ $text = "[[" . $title->getPrefixedText() . "]]";
+ $found = true;
+ }
+ }
+ }
+
+ # Recursive parsing, escaping and link table handling
+ # Only for HTML output
+ if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
+ $text = wfEscapeWikiText( $text );
+ } elseif ( $this->mOutputType == OT_HTML && $found ) {
+ # Clean up argument array
+ $assocArgs = array();
+ $index = 1;
+ foreach( $args as $arg ) {
+ $eqpos = strpos( $arg, "=" );
+ if ( $eqpos === false ) {
+ $assocArgs[$index++] = $arg;
+ } else {
+ $name = trim( substr( $arg, 0, $eqpos ) );
+ $value = trim( substr( $arg, $eqpos+1 ) );
+ if ( $value === false ) {
+ $value = "";
+ }
+ if ( $name !== false ) {
+ $assocArgs[$name] = $value;
+ }
+ }
+ }
+
+ # Do not enter included links in link table
+ if ( !is_null( $title ) ) {
+ $wgLinkCache->suspend();
+ }
+
+ # Run full parser on the included text
+ $text = $this->strip( $text, $this->mStripState );
+ $text = $this->internalParse( $text, (bool)$newline, $assocArgs, false );
+ if(!empty($newline)) $text = "\n".$text;
+
+ # Add the result to the strip state for re-inclusion after
+ # the rest of the processing
+ $text = $this->insertStripItem( $text, $this->mStripState );
+
+ # Resume the link cache and register the inclusion as a link
+ if ( !is_null( $title ) ) {
+ $wgLinkCache->resume();
+ $wgLinkCache->addLinkObj( $title );
+ }
+ }
+
+ if ( !$found ) {
+ return $matches[0];
+ } else {
+ return $text;
+ }
+ }
+
+ # Returns true if the function is allowed to include this entity
+ function incrementIncludeCount( $dbk )
+ {
+ if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
+ $this->mIncludeCount[$dbk] = 0;
+ }
+ if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
+ return true;
+ } else {
+ return false;
+ }
}
+
# Cleans up HTML, removes dangerous tags and attributes
/* private */ function removeHTMLtags( $text )
{
- $fname = "OutputPage::removeHTMLtags";
+ global $wgUseTidy, $wgUserHtml;
+ $fname = "Parser::removeHTMLtags";
wfProfileIn( $fname );
- $htmlpairs = array( # Tags that must be closed
- "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
- "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
- "strike", "strong", "tt", "var", "div", "center",
- "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
- "ruby", "rt" , "rb" , "rp"
- );
- $htmlsingle = array(
- "br", "p", "hr", "li", "dt", "dd"
- );
- $htmlnest = array( # Tags that can be nested--??
- "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
- "dl", "font", "big", "small", "sub", "sup"
- );
- $tabletags = array( # Can only appear inside table
- "td", "th", "tr"
- );
+
+ if( $wgUserHtml ) {
+ $htmlpairs = array( # Tags that must be closed
+ "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
+ "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
+ "strike", "strong", "tt", "var", "div", "center",
+ "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
+ "ruby", "rt" , "rb" , "rp", "p"
+ );
+ $htmlsingle = array(
+ "br", "hr", "li", "dt", "dd"
+ );
+ $htmlnest = array( # Tags that can be nested--??
+ "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
+ "dl", "font", "big", "small", "sub", "sup"
+ );
+ $tabletags = array( # Can only appear inside table
+ "td", "th", "tr"
+ );
+ } else {
+ $htmlpairs = array();
+ $htmlsingle = array();
+ $htmlnest = array();
+ $tabletags = array();
+ }
$htmlsingle = array_merge( $tabletags, $htmlsingle );
$htmlelements = array_merge( $htmlsingle, $htmlpairs );
- $htmlattrs = $this->getHTMLattrs () ;
+ $htmlattrs = $this->getHTMLattrs () ;
# Remove HTML comments
- $text = preg_replace( "/<!--.*-->/sU", "", $text );
+ $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
$bits = explode( "<", $text );
$text = array_shift( $bits );
- $tagstack = array(); $tablestack = array();
-
- foreach ( $bits as $x ) {
- $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
- preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
- $x, $regs );
- list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
- error_reporting( $prev );
-
- $badtag = 0 ;
- if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
- # Check our stack
- if ( $slash ) {
- # Closing a tag...
- if ( ! in_array( $t, $htmlsingle ) &&
- ( $ot = array_pop( $tagstack ) ) != $t ) {
- array_push( $tagstack, $ot );
- $badtag = 1;
+ if(!$wgUseTidy) {
+ $tagstack = array(); $tablestack = array();
+ foreach ( $bits as $x ) {
+ $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
+ preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+ $x, $regs );
+ list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+ error_reporting( $prev );
+
+ $badtag = 0 ;
+ if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ # Check our stack
+ if ( $slash ) {
+ # Closing a tag...
+ if ( ! in_array( $t, $htmlsingle ) &&
+ ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
+ if(!empty($ot)) array_push( $tagstack, $ot );
+ $badtag = 1;
+ } else {
+ if ( $t == "table" ) {
+ $tagstack = array_pop( $tablestack );
+ }
+ $newparams = "";
+ }
} else {
- if ( $t == "table" ) {
- $tagstack = array_pop( $tablestack );
+ # Keep track for later
+ if ( in_array( $t, $tabletags ) &&
+ ! in_array( "table", $tagstack ) ) {
+ $badtag = 1;
+ } else if ( in_array( $t, $tagstack ) &&
+ ! in_array ( $t , $htmlnest ) ) {
+ $badtag = 1 ;
+ } else if ( ! in_array( $t, $htmlsingle ) ) {
+ if ( $t == "table" ) {
+ array_push( $tablestack, $tagstack );
+ $tagstack = array();
+ }
+ array_push( $tagstack, $t );
}
- $newparams = "";
+ # Strip non-approved attributes from the tag
+ $newparams = $this->fixTagAttributes($params);
+
}
- } else {
- # Keep track for later
- if ( in_array( $t, $tabletags ) &&
- ! in_array( "table", $tagstack ) ) {
- $badtag = 1;
- } else if ( in_array( $t, $tagstack ) &&
- ! in_array ( $t , $htmlnest ) ) {
- $badtag = 1 ;
- } else if ( ! in_array( $t, $htmlsingle ) ) {
- if ( $t == "table" ) {
- array_push( $tablestack, $tagstack );
- $tagstack = array();
- }
- array_push( $tagstack, $t );
+ if ( ! $badtag ) {
+ $rest = str_replace( ">", ">", $rest );
+ $text .= "<$slash$t $newparams$brace$rest";
+ continue;
}
- # Strip non-approved attributes from the tag
- $newparams = $this->fixTagAttributes($params);
-
}
- if ( ! $badtag ) {
+ $text .= "<" . str_replace( ">", ">", $x);
+ }
+ # Close off any remaining tags
+ while ( $t = array_pop( $tagstack ) ) {
+ $text .= "</$t>\n";
+ if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+ }
+ } else {
+ # this might be possible using tidy itself
+ foreach ( $bits as $x ) {
+ preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+ $x, $regs );
+ @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+ if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ $newparams = $this->fixTagAttributes($params);
$rest = str_replace( ">", ">", $rest );
$text .= "<$slash$t $newparams$brace$rest";
- continue;
+ } else {
+ $text .= "<" . str_replace( ">", ">", $x);
}
- }
- $text .= "<" . str_replace( ">", ">", $x);
- }
- # Close off any remaining tags
- while ( $t = array_pop( $tagstack ) ) {
- $text .= "</$t>\n";
- if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+ }
}
wfProfileOut( $fname );
return $text;
}
-/*
- *
+
+/*
+ *
* This function accomplishes several tasks:
* 1) Auto-number headings if that option is enabled
* 2) Add an [edit] link to sections for logged in users who have enabled the option
* It loops through all headlines, collects the necessary data, then splits up the
* string and re-inserts the newly formatted headlines.
*
- * */
- /* private */ function formatHeadings( $text )
+ */
+
+ /* private */ function formatHeadings( $text, $isMain=true )
{
- $nh=$this->mOptions->getNumberHeadings();
- $st=$this->mOptions->getShowToc();
- if(!$this->mTitle->userCanEdit()) {
- $es=0;
- $esr=0;
+ global $wgInputEncoding,$wgRequest,$wgOut;
+
+ $startsection=$wgRequest->getVal('section');
+ if($startsection) { $startsection--;}
+ $doNumberHeadings = $this->mOptions->getNumberHeadings();
+ $doShowToc = $this->mOptions->getShowToc();
+ if( !$this->mTitle->userCanEdit() ) {
+ $showEditLink = 0;
+ $rightClickHack = 0;
} else {
- $es=$this->mOptions->getEditSection();
- $esr=$this->mOptions->getEditSectionOnRightClick();
+ $showEditLink = $this->mOptions->getEditSection();
+ $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
}
# Inhibit editsection links if requested in the page
$esw =& MagicWord::get( MAG_NOEDITSECTION );
- if ($esw->matchAndRemove( $text )) {
- $es=0;
+ if( $esw->matchAndRemove( $text ) ) {
+ $showEditLink = 0;
}
- # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
+ # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
# do not add TOC
$mw =& MagicWord::get( MAG_NOTOC );
- if ($mw->matchAndRemove( $text ))
- {
- $st = 0;
+ if( $mw->matchAndRemove( $text ) ) {
+ $doShowToc = 0;
}
# never add the TOC to the Main Page. This is an entry page that should not
# be more than 1-2 screens large anyway
- if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
+ if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
+ $doShowToc = 0;
+ }
+
+ # Get all headlines for numbering them and adding funky stuff like [edit]
+ # links - this is for later, but we need the number of headlines right now
+ $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
+
+ # if there are fewer than 4 headlines in the article, do not show TOC
+ if( $numMatches < 4 ) {
+ $doShowToc = 0;
+ }
+
+ # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
+ # override above conditions and always show TOC
+ $mw =& MagicWord::get( MAG_FORCETOC );
+ if ($mw->matchAndRemove( $text ) ) {
+ $doShowToc = 1;
+ }
+
# We need this to perform operations on the HTML
$sk =& $this->mOptions->getSkin();
- # Get all headlines for numbering them and adding funky stuff like [edit]
- # links
- preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
-
# headline counter
- $c=0;
+ $headlineCount = 0;
# Ugh .. the TOC should have neat indentation levels which can be
# passed to the skin functions. These are determined here
$toc = "";
$full = "";
$head = array();
- foreach($matches[3] as $headline) {
- if($level) { $prevlevel=$level;}
- $level=$matches[1][$c];
- if(($nh||$st) && $prevlevel && $level>$prevlevel) {
-
- $h[$level]=0; // reset when we enter a new level
- $toc.=$sk->tocIndent($level-$prevlevel);
- $toclevel+=$level-$prevlevel;
-
- }
- if(($nh||$st) && $level<$prevlevel) {
- $h[$level+1]=0; // reset when we step back a level
- $toc.=$sk->tocUnindent($prevlevel-$level);
- $toclevel-=$prevlevel-$level;
-
+ $sublevelCount = array();
+ $level = 0;
+ $prevlevel = 0;
+ foreach( $matches[3] as $headline ) {
+ $numbering = "";
+ if( $level ) {
+ $prevlevel = $level;
}
- $h[$level]++; // count number of headlines for each level
-
- if($nh||$st) {
- for($i=1;$i<=$level;$i++) {
- if($h[$i]) {
- if($dot) {$numbering.=".";}
- $numbering.=$h[$i];
- $dot=1;
+ $level = $matches[1][$headlineCount];
+ if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
+ # reset when we enter a new level
+ $sublevelCount[$level] = 0;
+ $toc .= $sk->tocIndent( $level - $prevlevel );
+ $toclevel += $level - $prevlevel;
+ }
+ if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
+ # reset when we step back a level
+ $sublevelCount[$level+1]=0;
+ $toc .= $sk->tocUnindent( $prevlevel - $level );
+ $toclevel -= $prevlevel - $level;
+ }
+ # count number of headlines for each level
+ @$sublevelCount[$level]++;
+ if( $doNumberHeadings || $doShowToc ) {
+ $dot = 0;
+ for( $i = 1; $i <= $level; $i++ ) {
+ if( !empty( $sublevelCount[$i] ) ) {
+ if( $dot ) {
+ $numbering .= ".";
+ }
+ $numbering .= $sublevelCount[$i];
+ $dot = 1;
}
}
}
- // The canonized header is a version of the header text safe to use for links
- // Avoid insertion of weird stuff like <math> by expanding the relevant sections
- $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
- $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
+ # The canonized header is a version of the header text safe to use for links
+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
+ $canonized_headline = $this->unstrip( $headline, $this->mStripState );
+
+ # strip out HTML
+ $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
$tocline = trim( $canonized_headline );
- $canonized_headline=str_replace('"',"",$canonized_headline);
- $canonized_headline=str_replace(" ","_",trim($canonized_headline));
- $refer[$c]=$canonized_headline;
- $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
- $refcount[$c]=$refers[$canonized_headline];
+ $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
+ $refer[$headlineCount] = $canonized_headline;
- // Prepend the number to the heading text
-
- if($nh||$st) {
- $tocline=$numbering ." ". $tocline;
-
- // Don't number the heading if it is the only one (looks silly)
- if($nh && count($matches[3]) > 1) {
- $headline=$numbering . " " . $headline; // the two are different if the line contains a link
+ # count how many in assoc. array so we can track dupes in anchors
+ @$refers[$canonized_headline]++;
+ $refcount[$headlineCount]=$refers[$canonized_headline];
+
+ # Prepend the number to the heading text
+
+ if( $doNumberHeadings || $doShowToc ) {
+ $tocline = $numbering . " " . $tocline;
+
+ # Don't number the heading if it is the only one (looks silly)
+ if( $doNumberHeadings && count( $matches[3] ) > 1) {
+ # the two are different if the line contains a link
+ $headline=$numbering . " " . $headline;
}
}
-
- // Create the anchor for linking from the TOC to the section
- $anchor=$canonized_headline;
- if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
- if($st) {
- $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
+
+ # Create the anchor for linking from the TOC to the section
+ $anchor = $canonized_headline;
+ if($refcount[$headlineCount] > 1 ) {
+ $anchor .= "_" . $refcount[$headlineCount];
}
- if($es) {
- $head[$c].=$sk->editSectionLink($c+1);
+ if( $doShowToc ) {
+ $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
}
-
-
- // the headline might have a link
- if(preg_match("/(.*)<a(.*)/",$headline, $headlinematches))
- {
- // if so give an anchor name to the already existent link
- $headline = $headlinematches[1]
- ."<a name=\"".$anchor."\" ".$headlinematches[2];
- } else {
- // else create an anchor link for the headline
- $headline = "<a name=\"".$anchor."\">"
- .$headline
- ."</a>";
+ if( $showEditLink ) {
+ if ( empty( $head[$headlineCount] ) ) {
+ $head[$headlineCount] = "";
+ }
+ $head[$headlineCount] .= $sk->editSectionLink($startsection+$headlineCount+1);
}
-
- // give headline the correct <h#> tag
- $head[$c].="<h".$level.$matches[2][$c] .$headline."</h".$level.">";
-
- // Add the edit section link
-
- if($esr) {
- $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
+
+ # Add the edit section span
+ if( $rightClickHack ) {
+ $headline = $sk->editSectionScript($startsection+$headlineCount+1,$headline);
}
-
- $numbering="";
- $c++;
- $dot=0;
- }
- if($st) {
- $toclines=$c;
- $toc.=$sk->tocUnindent($toclevel);
- $toc=$sk->tocTable($toc);
+ # give headline the correct <h#> tag
+ @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
+
+ $headlineCount++;
}
- // split up and insert constructed headlines
-
- $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
- $i=0;
+ if( $doShowToc ) {
+ $toclines = $headlineCount;
+ $toc .= $sk->tocUnindent( $toclevel );
+ $toc = $sk->tocTable( $toc );
+ }
- foreach($blocks as $block) {
- if(($es) && $c>0 && $i==0) {
- # This is the [edit] link that appears for the top block of text when
+ # split up and insert constructed headlines
+
+ $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
+ $i = 0;
+
+ foreach( $blocks as $block ) {
+ if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
+ # This is the [edit] link that appears for the top block of text when
# section editing is enabled
- $full.=$sk->editSectionLink(0);
+
+ # Disabled because it broke block formatting
+ # For example, a bullet point in the top line
+ # $full .= $sk->editSectionLink(0);
}
- $full.=$block;
- if($st && $toclines>3 && !$i) {
- # Let's add a top anchor just in case we want to link to the top of the page
- $full="<a name=\"top\"></a>".$full.$toc;
+ $full .= $block;
+ if( $doShowToc && !$i) {
+ # Top anchor now in skin
+ $full = $full.$toc;
}
+
+ # If a page is viewed in collapsed mode, a TOC generated
+ # from the wikisource is stored in the title object.
+ # This TOC is now fetched and inserted here if it exists.
+ $collapsedtoc=$wgOut->getToc();
+ if ($collapsedtoc && !$i && $isMain) {
+ $full = $full.$collapsedtoc;
+ }
if( !empty( $head[$i] ) ) {
$full .= $head[$i];
}
$i++;
}
-
+
return $full;
}
+ /* Generates a HTML-formatted table of contents which links to individual sections
+ from the wikisource. Used for collapsing long pages.
+ */
+ /* static */ function getTocFromSource( $text ) {
+
+ global $wgUser,$wgInputEncoding,$wgTitle,$wgOut,$wgParser;
+ $sk = $wgUser->getSkin();
+
+ $striparray=array();
+ $oldtype=$wgParser->mOutputType;
+ $wgParser->mOutputType=OT_WIKI;
+ $text=$wgParser->strip($text, $striparray, true);
+ $wgParser->mOutputType=$oldtype;
+
+ $numMatches = preg_match_all( "/^(=+)(.*?)=+|^<h([1-6]).*?>(.*?)<\/h[1-6].*?>/mi",$text,$matches);
+
+ # no headings: text cannot be collapsed
+ if( $numMatches == 0 ) {
+ return "";
+ }
+
+ # We combine the headlines into a bundle and convert them to HTML
+ # in order to make stripping out the wikicrap easier.
+ $combined=implode("!@@@!",$matches[2]);
+ $myout=$wgParser->parse($combined,$wgTitle,$wgOut->mParserOptions);
+ $combined_html=$myout->getText();
+ $headlines=array();
+ $headlines=explode("!@@@!",$combined_html);
+
+ # headline counter
+ $headlineCount = 0;
+ $toclevel = 0;
+ $toc = "";
+ $full = "";
+ $head = array();
+ $sublevelCount = array();
+ $level = 0;
+ $prevlevel = 0;
+ foreach( $headlines as $headline ) {
+ $headline=trim($headline);
+ $numbering = "";
+ if( $level ) {
+ $prevlevel = $level;
+ }
+ $level = $matches[1][$headlineCount];
+
+ # wikisource headings need to be converted into numbers
+ # =foo= equals <h1>foo</h1>, ==foo== equals <h2>foo</h2> etc.
+ if(strpos($level,"=")!==false) {
+ $level=strlen($level);
+ }
+
+ if( $prevlevel && $level > $prevlevel ) {
+ # reset when we enter a new level
+ $sublevelCount[$level] = 0;
+ $toc .= $sk->tocIndent( $level - $prevlevel );
+ $toclevel += $level - $prevlevel;
+ }
+ if( $level < $prevlevel ) {
+ # reset when we step back a level
+ $sublevelCount[$level+1]=0;
+ $toc .= $sk->tocUnindent( $prevlevel - $level );
+ $toclevel -= $prevlevel - $level;
+ }
+ # count number of headlines for each level
+ @$sublevelCount[$level]++;
+ $dot = 0;
+ for( $i = 1; $i <= $level; $i++ ) {
+ if( !empty( $sublevelCount[$i] ) ) {
+ if( $dot ) {
+ $numbering .= ".";
+ }
+ $numbering .= $sublevelCount[$i];
+ $dot = 1;
+ }
+ }
+
+
+ # The canonized header is a version of the header text safe to use for links
+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
+ $state=array();
+ $canonized_headline = Parser::unstrip( $headline, $state);
+
+ # strip out HTML
+ $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
+ $tocline = trim( $canonized_headline );
+ $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
+ $refer[$headlineCount] = $canonized_headline;
+
+ # count how many in assoc. array so we can track dupes in anchors
+ @$refers[$canonized_headline]++;
+ $refcount[$headlineCount]=$refers[$canonized_headline];
+ $tocline = $numbering . " " . $tocline;
+
+ # Create the anchor for linking from the TOC to the section
+ $anchor = trim($canonized_headline);
+
+ if($refcount[$headlineCount] > 1 ) {
+ $anchor .= "_" . $refcount[$headlineCount];
+ }
+ $headlineCount++;
+ $toc .= $sk->tocLine($anchor,$tocline,$toclevel,$headlineCount);
+ }
+ $toclines = $headlineCount;
+ $toc .= $sk->tocUnindent( $toclevel );
+ $toc = $sk->tocTable( $toc );
+ return $toc;
+
+ }
/* private */ function doMagicISBN( &$tokenizer )
{
global $wgLang;
}
$num = str_replace( "-", "", $isbn );
$num = str_replace( " ", "", $num );
-
+
if ( "" == $num ) {
$text = "ISBN $blank$x";
} else {
$rfc .= $x{0};
$x = substr( $x, 1 );
}
-
+
if ( "" == $rfc ) {
$text .= "RFC $blank$x";
} else {
function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
{
$this->mOptions = $options;
- $this->mTitle = $title;
+ $this->mTitle =& $title;
+ $this->mOutputType = OT_WIKI;
+
if ( $clearState ) {
$this->clearState();
}
-
+
$stripState = false;
- $text = str_replace("\r\n", "\n", $text);
+ $pairs = array(
+ "\r\n" => "\n",
+ );
+ $text = str_replace(array_keys($pairs), array_values($pairs), $text);
+ // now with regexes
+ $pairs = array(
+ "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
+ "/<br *?>/i" => "<br/>",
+ );
+ $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
$text = $this->strip( $text, $stripState, false );
$text = $this->pstPass2( $text, $user );
$text = $this->unstrip( $text, $stripState );
/* private */ function pstPass2( $text, &$user )
{
- global $wgLang, $wgLocaltimezone;
+ global $wgLang, $wgLocaltimezone, $wgCurParser;
+
+ # Variable replacement
+ # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
+ $text = $this->replaceVariables( $text );
# Signatures
#
" (" . date( "T" ) . ")";
if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
+ $text = preg_replace( "/~~~~~/", $d, $text );
$text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
Namespace::getUser() ) . ":$n|$k]] $d", $text );
$text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
} else {
$text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
}
-
- # {{SUBST:xxx}} variables
- #
+
+ /*
$mw =& MagicWord::get( MAG_SUBST );
- $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
+ $wgCurParser = $this->fork();
+ $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
+ $this->merge( $wgCurParser );
+ */
# Trim trailing whitespace
- # MAG_END (__END__) tag allows for trailing
+ # MAG_END (__END__) tag allows for trailing
# whitespace to be deliberately included
$text = rtrim( $text );
$mw =& MagicWord::get( MAG_END );
return $text;
}
+ # Set up some variables which are usually set up in parse()
+ # so that an external function can call some class members with confidence
+ function startExternalParse( &$title, $options, $outputType, $clearState = true )
+ {
+ $this->mTitle =& $title;
+ $this->mOptions = $options;
+ $this->mOutputType = $outputType;
+ if ( $clearState ) {
+ $this->clearState();
+ }
+ }
+
+ function transformMsg( $text, $options ) {
+ global $wgTitle;
+ static $executing = false;
+
+ # Guard against infinite recursion
+ if ( $executing ) {
+ return $text;
+ }
+ $executing = true;
+
+ $this->mTitle = $wgTitle;
+ $this->mOptions = $options;
+ $this->mOutputType = OT_MSG;
+ $this->clearState();
+ $text = $this->replaceVariables( $text );
+ $executing = false;
+ return $text;
+ }
}
class ParserOutput
function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
+
+ function merge( $other ) {
+ $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
+ $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
+ $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
+ }
+
}
class ParserOptions
var $mDateFormat; # Date format index
var $mEditSection; # Create "edit section" links
var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
- var $mPrintable; # Generate printable output
var $mNumberHeadings; # Automatically number headings
var $mShowToc; # Show table of contents
function getDateFormat() { return $this->mDateFormat; }
function getEditSection() { return $this->mEditSection; }
function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
- function getPrintable() { return $this->mPrintable; }
function getNumberHeadings() { return $this->mNumberHeadings; }
function getShowToc() { return $this->mShowToc; }
function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
- function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
- /* static */ function newFromUser( &$user )
+ /* static */ function newFromUser( &$user )
{
$popts = new ParserOptions;
- $popts->initialiseFromUser( &$user );
+ $popts->initialiseFromUser( $user );
return $popts;
}
- function initialiseFromUser( &$userInput )
+ function initialiseFromUser( &$userInput )
{
global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
-
+
if ( !$userInput ) {
$user = new User;
+ $user->setLoaded( true );
} else {
$user =& $userInput;
}
$this->mDateFormat = $user->getOption( "date" );
$this->mEditSection = $user->getOption( "editsection" );
$this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
- $this->mPrintable = false;
$this->mNumberHeadings = $user->getOption( "numberheadings" );
$this->mShowToc = $user->getOption( "showtoc" );
}
}
-
-# Regex callbacks, used in OutputPage::replaceVariables
-
-# Just get rid of the dangerous stuff
-# Necessary because replaceVariables is called after removeHTMLtags,
-# and message text can come from any user
-function wfReplaceMsgVar( $matches ) {
- global $wgCurOut, $wgLinkCache;
- $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
- $wgLinkCache->suspend();
- $text = $wgCurOut->replaceInternalLinks( $text );
- $wgLinkCache->resume();
- $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
- return $text;
-}
-# Effective <nowiki></nowiki>
-# Not real <nowiki> because this is called after nowiki sections are processed
-function wfReplaceMsgnwVar( $matches ) {
- global $wgCurOut, $wgLinkCache;
- $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
- $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
- return $text;
+# Regex callbacks, used in Parser::replaceVariables
+function wfBraceSubstitution( $matches )
+{
+ global $wgCurParser;
+ return $wgCurParser->braceSubstitution( $matches );
}
-
-
?>