<?php
-include_once('Tokenizer.php');
+// require_once('Tokenizer.php');
if( $GLOBALS['wgUseWikiHiero'] ){
- include_once('wikihiero.php');
+ require_once('extensions/wikihiero/wikihiero.php');
+}
+if( $GLOBALS['wgUseTimeline'] ){
+ require_once('extensions/timeline/Timeline.php');
}
# PHP Parser
define( "OT_WIKI", 2 );
define( "OT_MSG", 3 );
+# string parameter for extractTags which will cause it
+# to strip HTML comments in addition to regular
+# <XML>-style tags. This should not be anything we
+# may want to use in wikisyntax
+define( "STRIP_COMMENTS", "HTMLCommentStrip" );
+
# prefix for escaping, used in two functions at least
define( "UNIQ_PREFIX", "NaodW29");
#
function parse( $text, &$title, $options, $linestart = true, $clearState = true )
{
+ global $wgUseTidy;
$fname = "Parser::parse";
wfProfileIn( $fname );
$text = $this->internalParse( $text, $linestart );
$text = $this->unstrip( $text, $this->mStripState );
# Clean up special characters, only run once, next-to-last before doBlockLevels
- $fixtags = array(
- "/<hr *>/i" => '<hr/>',
- "/<br *>/i" => '<br/>',
- "/<center *>/i"=>'<div class="center">',
- "/<\\/center *>/i" => '</div>',
- # Clean up spare ampersands; note that we probably ought to be
- # more careful about named entities.
- '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
- );
- $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
-
+ if(!$wgUseTidy) {
+ $fixtags = array(
+ # french spaces, last one Guillemet-left
+ "/ (\\?|:|!|\\302\\273)/i"=>" \\1",
+ # french spaces, Guillemet-right
+ "/(\\302\\253) /i"=>"\\1 ",
+ "/<hr *>/i" => '<hr/>',
+ "/<br *>/i" => '<br/>',
+ "/<center *>/i"=>'<div class="center">',
+ "/<\\/center *>/i" => '</div>',
+ # Clean up spare ampersands; note that we probably ought to be
+ # more careful about named entities.
+ '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
+ );
+ $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+ } else {
+ $fixtags = array(
+ # french spaces, last one Guillemet-left
+ "/ (\\?|:|!|\\302\\273)/i"=>" \\1",
+ # french spaces, Guillemet-right
+ "/(\\302\\253) /i"=>"\\1 ",
+ "/<center *>/i"=>'<div class="center">',
+ "/<\\/center *>/i" => '</div>'
+ );
+ $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+ }
# only once and last
$text = $this->doBlockLevels( $text, $linestart );
-
+ if($wgUseTidy) {
+ $text = $this->tidy($text);
+ }
$this->mOutput->setText( $text );
wfProfileOut( $fname );
return $this->mOutput;
# If $content is already set, the additional entries will be appended
+ # If $tag is set to STRIP_COMMENTS, the function will extract
+ # <!-- HTML comments -->
+
/* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
$rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
if ( !$content ) {
$stripped = "";
while ( "" != $text ) {
- $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+ if($tag==STRIP_COMMENTS) {
+ $p = preg_split( "/<!--/i", $text, 2 );
+ } else {
+ $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+ }
$stripped .= $p[0];
if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
$text = "";
} else {
- $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+ if($tag==STRIP_COMMENTS) {
+ $q = preg_split( "/-->/i", $p[1], 2 );
+ } else {
+ $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+ }
$marker = $rnd . sprintf("%08X", $n++);
$content[$marker] = $q[0];
$stripped .= $marker;
return $stripped;
}
- # Strips <nowiki>, <pre> and <math>
+ # Strips and renders <nowiki>, <pre>, <math>, <hiero>
+ # If $render is set, performs necessary rendering operations on plugins
# Returns the text, and fills an array with data needed in unstrip()
# If the $state is already a valid strip state, it adds to the state
- #
- function strip( $text, &$state )
+
+ # When $stripcomments is set, HTML comments <!-- like this -->
+ # will be stripped in addition to other tags. This is important
+ # for section editing, where these comments cause confusion when
+ # counting the sections in the wikisource
+ function strip( $text, &$state, $stripcomments = false )
{
$render = ($this->mOutputType == OT_HTML);
$nowiki_content = array();
$hiero_content = array();
+ $timeline_content = array();
$math_content = array();
$pre_content = array();
- $item_content = array();
+ $comment_content = array();
# Replace any instances of the placeholders
$uniq_prefix = UNIQ_PREFIX;
}
}
- if( $GLOBALS['wgUseWikiHiero'] ){
- $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
- foreach( $hiero_content as $marker => $content ){
- if( $render ){
- $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
- } else {
- $hiero_content[$marker] = "<hiero>$content</hiero>";
- }
+ $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+ foreach( $hiero_content as $marker => $content ){
+ if( $render && $GLOBALS['wgUseWikiHiero']){
+ $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
+ } else {
+ $hiero_content[$marker] = "<hiero>$content</hiero>";
+ }
+ }
+
+ $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
+ foreach( $timeline_content as $marker => $content ){
+ if( $render && $GLOBALS['wgUseTimeline']){
+ $timeline_content[$marker] = renderTimeline( $content );
+ } else {
+ $timeline_content[$marker] = "<timeline>$content</timeline>";
}
}
- if( $this->mOptions->getUseTeX() ){
- $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
- foreach( $math_content as $marker => $content ){
- if( $render ){
+ $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+ foreach( $math_content as $marker => $content ){
+ if( $render ) {
+ if( $this->mOptions->getUseTeX() ) {
$math_content[$marker] = renderMath( $content );
} else {
- $math_content[$marker] = "<math>$content</math>";
+ $math_content[$marker] = "<math>$content<math>";
}
+ } else {
+ $math_content[$marker] = "<math>$content</math>";
}
}
$pre_content[$marker] = "<pre>$content</pre>";
}
}
+ if($stripcomments) {
+ $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
+ foreach( $comment_content as $marker => $content ){
+ $comment_content[$marker] = "<!--$content-->";
+ }
+ }
# Merge state with the pre-existing state, if there is one
if ( $state ) {
$state['nowiki'] = $state['nowiki'] + $nowiki_content;
$state['hiero'] = $state['hiero'] + $hiero_content;
+ $state['timeline'] = $state['timeline'] + $timeline_content;
$state['math'] = $state['math'] + $math_content;
$state['pre'] = $state['pre'] + $pre_content;
+ $state['comment'] = $state['comment'] + $comment_content;
} else {
$state = array(
'nowiki' => $nowiki_content,
'hiero' => $hiero_content,
+ 'timeline' => $timeline_content,
'math' => $math_content,
'pre' => $pre_content,
- 'item' => $item_content
+ 'comment' => $comment_content
);
}
return $text;
'nowiki' => array(),
'hiero' => array(),
'math' => array(),
- 'pre' => array(),
- 'item' => array()
+ 'pre' => array()
);
}
$state['item'][$rnd] = $text;
return $rnd;
}
+ # This method generates the list of subcategories and pages for a category
function categoryMagic ()
{
global $wgLang , $wgUser ;
- if ( !$this->mOptions->getUseCategoryMagic() ) return ;
- $id = $this->mTitle->getArticleID() ;
- $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
- $ti = $this->mTitle->getText() ;
- $ti = explode ( ":" , $ti , 2 ) ;
- if ( $cat != $ti[0] ) return "" ;
- $r = '<br style="clear:both;"/>\n';
+ if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
- $articles = array() ;
- $parents = array () ;
- $children = array() ;
+ $cns = Namespace::getCategory() ;
+ if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
+
+ $r = "<br style=\"clear:both;\"/>\n";
-# $sk =& $this->mGetSkin();
$sk =& $wgUser->getSkin() ;
+ $articles = array() ;
+ $children = array() ;
$data = array () ;
- $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
- $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
-
- $res = wfQuery ( $sql1, DB_READ ) ;
- while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
+ $id = $this->mTitle->getArticleID() ;
- $res = wfQuery ( $sql2, DB_READ ) ;
+ # FIXME: add limits
+ $t = wfStrencode( $this->mTitle->getDBKey() );
+ $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
+ $res = wfQuery ( $sql, DB_READ ) ;
while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-
+ # For all pages that link to this category
foreach ( $data AS $x )
{
$t = $wgLang->getNsText ( $x->cur_namespace ) ;
if ( $t != "" ) $t .= ":" ;
$t .= $x->cur_title ;
- $y = explode ( ":" , $t , 2 ) ;
- if ( count ( $y ) == 2 && $y[0] == $cat ) {
- array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
+ if ( $x->cur_namespace == $cns ) {
+ array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
} else {
- array_push ( $articles , $sk->makeLink ( $t ) ) ;
+ array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
}
}
wfFreeResult ( $res ) ;
- # Children
- if ( count ( $children ) > 0 )
- {
- asort ( $children ) ;
+ # Showing subcategories
+ if ( count ( $children ) > 0 ) {
$r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
$r .= implode ( ", " , $children ) ;
}
- # Articles
- if ( count ( $articles ) > 0 )
- {
- asort ( $articles ) ;
- $h = wfMsg( "category_header", $ti[1] );
+ # Showing pages in this category
+ if ( count ( $articles ) > 0 ) {
+ $ti = $this->mTitle->getText() ;
+ $h = wfMsg( "category_header", $ti );
$r .= "<h2>{$h}</h2>\n" ;
$r .= implode ( ", " , $articles ) ;
}
return trim ( $t ) ;
}
+ /* interface with html tidy, used if $wgUseTidy = true */
+ function tidy ( $text ) {
+ global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
+ global $wgInputEncoding, $wgOutputEncoding;
+ $cleansource = '';
+ switch(strtoupper($wgOutputEncoding)) {
+ case 'ISO-8859-1':
+ $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
+ break;
+ case 'UTF-8':
+ $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
+ break;
+ default:
+ $wgTidyOpts .= ' -raw';
+ }
+
+ $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
+ $descriptorspec = array(
+ 0 => array("pipe", "r"),
+ 1 => array("pipe", "w"),
+ 2 => array("file", "/dev/null", "a")
+ );
+ $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
+ if (is_resource($process)) {
+ fwrite($pipes[0], $text);
+ fclose($pipes[0]);
+ while (!feof($pipes[1])) {
+ $cleansource .= fgets($pipes[1], 1024);
+ }
+ fclose($pipes[1]);
+ $return_value = proc_close($process);
+ }
+ if( $cleansource == '' && $text != '') {
+ return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
+ } else {
+ return $cleansource;
+ }
+ }
+
function doTableStuff ( $t )
{
$t = explode ( "\n" , $t ) ;
$ltr = array () ; # tr attributes
foreach ( $t AS $k => $x )
{
- $x = rtrim ( $x ) ;
+ $x = trim ( $x ) ;
$fc = substr ( $x , 0 , 1 ) ;
if ( "{|" == substr ( $x , 0 , 2 ) )
{
return $t ;
}
- function internalParse( $text, $linestart, $args = array() )
+ # Parses the text and adds the result to the strip state
+ # Returns the strip tag
+ function stripParse( $text, $linestart, $args )
+ {
+ $text = $this->strip( $text, $this->mStripState );
+ $text = $this->internalParse( $text, $linestart, $args, false );
+ if( $linestart ) {
+ $text = "\n" . $text;
+ }
+ return $this->insertStripItem( $text, $this->mStripState );
+ }
+
+ function internalParse( $text, $linestart, $args = array(), $isMain=true )
{
$fname = "Parser::internalParse";
wfProfileIn( $fname );
$text = $this->removeHTMLtags( $text );
$text = $this->replaceVariables( $text, $args );
- # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
+ $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
$text = $this->doHeadings( $text );
if($this->mOptions->getUseDynamicDates()) {
global $wgDateFormatter;
$text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
}
+ $text = $this->doAllQuotes( $text );
$text = $this->replaceExternalLinks( $text );
- $text = $this->doTokenizedParser ( $text );
+ $text = $this->replaceInternalLinks ( $text );
+ $text = $this->replaceInternalLinks ( $text );
+ //$text = $this->doTokenizedParser ( $text );
$text = $this->doTableStuff ( $text ) ;
- $text = $this->formatHeadings( $text );
+ $text = $this->magicISBN( $text );
+ $text = $this->magicRFC( $text );
+ $text = $this->formatHeadings( $text, $isMain );
$sk =& $this->mOptions->getSkin();
$text = $sk->transformContent( $text );
- $text .= $this->categoryMagic () ;
+ if ( !isset ( $this->categoryMagicDone ) ) {
+ $text .= $this->categoryMagic () ;
+ $this->categoryMagicDone = true ;
+ }
wfProfileOut( $fname );
return $text;
return $text;
}
+ /* private */ function doAllQuotes( $text )
+ {
+ $outtext = "";
+ $lines = explode( "\r\n", $text );
+ foreach ( $lines as $line ) {
+ $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
+ }
+ return $outtext;
+ }
+
+ /* private */ function doQuotes( $pre, $text, $mode )
+ {
+ if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
+ $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
+ $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
+ if ( substr ($m[2], 0, 1) == "'" ) {
+ $m[2] = substr ($m[2], 1);
+ if ($mode == "em") {
+ return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
+ } else if ($mode == "strong") {
+ return $m1_strong . $this->doQuotes ( "", $m[2], "" );
+ } else if (($mode == "emstrong") || ($mode == "both")) {
+ return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
+ } else if ($mode == "strongem") {
+ return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
+ } else {
+ return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
+ }
+ } else {
+ if ($mode == "strong") {
+ return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
+ } else if ($mode == "em") {
+ return $m1_em . $this->doQuotes ( "", $m[2], "" );
+ } else if ($mode == "emstrong") {
+ return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
+ } else if (($mode == "strongem") || ($mode == "both")) {
+ return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
+ } else {
+ return $m[1] . $this->doQuotes ( "", $m[2], "em" );
+ }
+ }
+ } else {
+ $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
+ $text_em = ($text == "") ? "" : "<em>{$text}</em>";
+ if ($mode == "") {
+ return $pre . $text;
+ } else if ($mode == "em") {
+ return $pre . $text_em;
+ } else if ($mode == "strong") {
+ return $pre . $text_strong;
+ } else if ($mode == "strongem") {
+ return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
+ } else {
+ return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
+ }
+ }
+ }
+
# Note: we have to do external links before the internal ones,
# and otherwise take great care in the order of things here, so
# that we don't end up interpreting some URLs twice.
return $s;
}
- /* private */ function handle3Quotes( &$state, $token )
- {
- if ( $state["strong"] !== false ) {
- if ( $state["em"] !== false && $state["em"] > $state["strong"] )
- {
- # ''' lala ''lala '''
- $s = "</em></strong><em>";
- } else {
- $s = "</strong>";
- }
- $state["strong"] = FALSE;
- } else {
- $s = "<strong>";
- $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
- }
- return $s;
- }
- /* private */ function handle2Quotes( &$state, $token )
- {
- if ( $state["em"] !== false ) {
- if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
- {
- # ''lala'''lala'' ....'''
- $s = "</strong></em><strong>";
- } else {
- $s = "</em>";
- }
- $state["em"] = FALSE;
- } else {
- $s = "<em>";
- $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
-
- }
- return $s;
- }
-
- /* private */ function handle5Quotes( &$state, $token )
- {
- $s = "";
- if ( $state["em"] !== false && $state["strong"] !== false ) {
- if ( $state["em"] < $state["strong"] ) {
- $s .= "</strong></em>";
- } else {
- $s .= "</em></strong>";
- }
- $state["strong"] = $state["em"] = FALSE;
- } elseif ( $state["em"] !== false ) {
- $s .= "</em><strong>";
- $state["em"] = FALSE;
- $state["strong"] = $token["pos"];
- } elseif ( $state["strong"] !== false ) {
- $s .= "</strong><em>";
- $state["strong"] = FALSE;
- $state["em"] = $token["pos"];
- } else { # not $em and not $strong
- $s .= "<strong><em>";
- $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
- }
- return $s;
- }
-
- /* private */ function doTokenizedParser( $str )
- {
- global $wgLang; # for language specific parser hook
-
- $tokenizer=Tokenizer::newFromString( $str );
- $tokenStack = array();
-
- $s="";
- $state["em"] = FALSE;
- $state["strong"] = FALSE;
- $tagIsOpen = FALSE;
- $threeopen = false;
-
- # The tokenizer splits the text into tokens and returns them one by one.
- # Every call to the tokenizer returns a new token.
- while ( $token = $tokenizer->nextToken() )
- {
- switch ( $token["type"] )
- {
- case "text":
- # simple text with no further markup
- $txt = $token["text"];
- break;
- case "[[[":
- # remember the tag opened with 3 [
- $threeopen = true;
- case "[[":
- # link opening tag.
- # FIXME : Treat orphaned open tags (stack not empty when text is over)
- $tagIsOpen = TRUE;
- array_push( $tokenStack, $token );
- $txt="";
- break;
-
- case "]]]":
- case "]]":
- # link close tag.
- # get text from stack, glue it together, and call the code to handle a
- # link
-
- if ( count( $tokenStack ) == 0 )
- {
- # stack empty. Found a ]] without an opening [[
- $txt = "]]";
- } else {
- $linkText = "";
- $lastToken = array_pop( $tokenStack );
- while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
- {
- if( !empty( $lastToken["text"] ) ) {
- $linkText = $lastToken["text"] . $linkText;
- }
- $lastToken = array_pop( $tokenStack );
- }
-
- $txt = $linkText ."]]";
-
- if( isset( $lastToken["text"] ) ) {
- $prefix = $lastToken["text"];
- } else {
- $prefix = "";
- }
- $nextToken = $tokenizer->previewToken();
- if ( $nextToken["type"] == "text" )
- {
- # Preview just looks at it. Now we have to fetch it.
- $nextToken = $tokenizer->nextToken();
- $txt .= $nextToken["text"];
- }
- $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
-
- # did the tag start with 3 [ ?
- if($threeopen) {
- # show the first as text
- $txt = "[".$txt;
- $threeopen=false;
- }
-
- }
- $tagIsOpen = (count( $tokenStack ) != 0);
- break;
- case "----":
- $txt = "\n<hr />\n";
- break;
- case "'''":
- # This and the three next ones handle quotes
- $txt = $this->handle3Quotes( $state, $token );
- break;
- case "''":
- $txt = $this->handle2Quotes( $state, $token );
- break;
- case "'''''":
- $txt = $this->handle5Quotes( $state, $token );
- break;
- case "":
- # empty token
- $txt="";
- break;
- case "RFC ":
- if ( $tagIsOpen ) {
- $txt = "RFC ";
- } else {
- $txt = $this->doMagicRFC( $tokenizer );
- }
- break;
- case "ISBN ":
- if ( $tagIsOpen ) {
- $txt = "ISBN ";
- } else {
- $txt = $this->doMagicISBN( $tokenizer );
- }
- break;
- default:
- # Call language specific Hook.
- $txt = $wgLang->processToken( $token, $tokenStack );
- if ( NULL == $txt ) {
- # An unkown token. Highlight.
- $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
- $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
- }
- break;
- }
- # If we're parsing the interior of a link, don't append the interior to $s,
- # but push it to the stack so it can be processed when a ]] token is found.
- if ( $tagIsOpen && $txt != "" ) {
- $token["type"] = "text";
- $token["text"] = $txt;
- array_push( $tokenStack, $token );
- } else {
- $s .= $txt;
- }
- } #end while
- if ( count( $tokenStack ) != 0 )
- {
- # still objects on stack. opened [[ tag without closing ]] tag.
- $txt = "";
- while ( $lastToken = array_pop( $tokenStack ) )
- {
- if ( $lastToken["type"] == "text" )
- {
- $txt = $lastToken["text"] . $txt;
- } else {
- $txt = $lastToken["type"] . $txt;
- }
- }
- $s .= $txt;
- }
- return $s;
- }
-
- /* private */ function handleInternalLink( $line, $prefix )
+ /* private */ function replaceInternalLinks( $s )
{
global $wgLang, $wgLinkCache;
global $wgNamespacesWithSubpages, $wgLanguageCode;
- static $fname = "Parser::handleInternalLink" ;
+ static $fname = "Parser::replaceInternalLink" ;
wfProfileIn( $fname );
wfProfileIn( "$fname-setup" );
static $tc = FALSE;
- if ( !$tc ) { $tc = Title::legalChars() . "#"; }
+ # the % is needed to support urlencoded titles as well
+ if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
$sk =& $this->mOptions->getSkin();
+ $a = explode( "[[", " " . $s );
+ $s = array_shift( $a );
+ $s = substr( $s, 1 );
+
# Match a link having the form [[namespace:link|alternate]]trail
static $e1 = FALSE;
if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
if ( !$image ) { $image = Namespace::getImage(); }
if ( !$special ) { $special = Namespace::getSpecial(); }
if ( !$media ) { $media = Namespace::getMedia(); }
- if ( !$category ) { $category = wfMsg ( "category" ) ; }
+ if ( !$category ) { $category = Namespace::getCategory(); }
$nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
+ if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
+ $new_prefix = $m[2];
+ $s = $m[1];
+ } else {
+ $new_prefix="";
+ }
+
wfProfileOut( "$fname-setup" );
- $s = "";
- if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
- $text = $m[2];
- $trail = $m[3];
- } else { # Invalid form; output directly
- $s .= $prefix . "[[" . $line ;
- return $s;
- }
+ foreach ( $a as $line ) {
+ $prefix = $new_prefix;
- /* Valid link forms:
- Foobar -- normal
- :Foobar -- override special treatment of prefix (images, language links)
- /Foobar -- convert to CurrentPage/Foobar
- /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
- */
- $c = substr($m[1],0,1);
- $noforce = ($c != ":");
- if( $c == "/" ) { # subpage
- if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
- $m[1]=substr($m[1],1,strlen($m[1])-2);
- $noslash=$m[1];
- } else {
- $noslash=substr($m[1],1);
+ if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
+ $text = $m[2];
+ # fix up urlencoded title texts
+ if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
+ $trail = $m[3];
+ } else { # Invalid form; output directly
+ $s .= $prefix . "[[" . $line ;
+ wfProfileOut( $fname );
+ continue;
}
- if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
- $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
- if( "" == $text ) {
- $text= $m[1];
- } # this might be changed for ugliness reasons
+
+ /* Valid link forms:
+ Foobar -- normal
+ :Foobar -- override special treatment of prefix (images, language links)
+ /Foobar -- convert to CurrentPage/Foobar
+ /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
+ */
+ $c = substr($m[1],0,1);
+ $noforce = ($c != ":");
+ if( $c == "/" ) { # subpage
+ if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
+ $m[1]=substr($m[1],1,strlen($m[1])-2);
+ $noslash=$m[1];
+ } else {
+ $noslash=substr($m[1],1);
+ }
+ if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
+ $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
+ if( "" == $text ) {
+ $text= $m[1];
+ } # this might be changed for ugliness reasons
+ } else {
+ $link = $noslash; # no subpage allowed, use standard link
+ }
+ } elseif( $noforce ) { # no subpage
+ $link = $m[1];
} else {
- $link = $noslash; # no subpage allowed, use standard link
+ $link = substr( $m[1], 1 );
}
- } elseif( $noforce ) { # no subpage
- $link = $m[1];
- } else {
- $link = substr( $m[1], 1 );
- }
- if( "" == $text )
+ $wasblank = ( "" == $text );
+ if( $wasblank )
$text = $link;
- $nt = Title::newFromText( $link );
- if( !$nt ) {
- $s .= $prefix . "[[" . $line;
- return $s;
- }
- $ns = $nt->getNamespace();
- $iw = $nt->getInterWiki();
- if( $noforce ) {
- if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
- array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
- return (trim($s) == '')? '': $s;
+ $nt = Title::newFromText( $link );
+ if( !$nt ) {
+ $s .= $prefix . "[[" . $line;
+ wfProfileOut( $fname );
+ continue;
+ }
+ $ns = $nt->getNamespace();
+ $iw = $nt->getInterWiki();
+ if( $noforce ) {
+ if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
+ array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
+ $s .= $prefix . $trail ;
+ wfProfileOut( $fname );
+ $s .= (trim($s) == '')? '': $s;
+ continue;
+ }
+ if ( $ns == $image ) {
+ $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
+ $wgLinkCache->addImageLinkObj( $nt );
+ wfProfileOut( $fname );
+ continue;
+ }
+ if ( $ns == $category ) {
+ $t = $nt->getText() ;
+ $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
+
+ $wgLinkCache->suspend(); # Don't save in links/brokenlinks
+ $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
+ $wgLinkCache->resume();
+
+ $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
+ $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
+ $this->mOutput->mCategoryLinks[] = $t ;
+ $s .= $prefix . $trail ;
+ wfProfileOut( $fname );
+ continue;
+ }
+ }
+ if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
+ ( strpos( $link, "#" ) == FALSE ) ) {
+ # Self-links are handled specially; generally de-link and change to bold.
+ $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
+ wfProfileOut( $fname );
+ continue;
}
- if( $ns == $image ) {
- $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
- $wgLinkCache->addImageLinkObj( $nt );
- return $s;
- }
- }
- if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
- ( strpos( $link, "#" ) == FALSE ) ) {
- $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
- return $s;
- }
-
- # Category feature
- $catns = strtoupper ( $nt->getDBkey () ) ;
- $catns = explode ( ":" , $catns ) ;
- if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
- else $catns = "" ;
- if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
- $t = explode ( ":" , $nt->getText() ) ;
- array_shift ( $t ) ;
- $t = implode ( ":" , $t ) ;
- $t = $wgLang->ucFirst ( $t ) ;
- $nnt = Title::newFromText ( $category.":".$t ) ;
- $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
- $this->mOutput->mCategoryLinks[] = $t ;
- $s .= $prefix . $trail ;
- return $s ;
- }
-
- if( $ns == $media ) {
- $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
- $wgLinkCache->addImageLinkObj( $nt );
- return $s;
- } elseif( $ns == $special ) {
- $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
- return $s;
- }
- $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
+ if( $ns == $media ) {
+ $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
+ $wgLinkCache->addImageLinkObj( $nt );
+ wfProfileOut( $fname );
+ continue;
+ } elseif( $ns == $special ) {
+ $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
+ wfProfileOut( $fname );
+ continue;
+ }
+ $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
+ }
wfProfileOut( $fname );
return $s;
}
return $text."\n";
}
- /* private */ function doBlockLevels( $text, $linestart )
- {
+ /* private */ function doBlockLevels( $text, $linestart ) {
$fname = "Parser::doBlockLevels";
wfProfileIn( $fname );
+
# Parsing through the text line by line. The main thing
# happening here is handling of block-level elements p, pre,
# and making lists from lines starting with * # : etc.
#
- $a = explode( "\n", $text );
+ $textLines = explode( "\n", $text );
- $lastPref = $text = $lastLine = '';
+ $lastPrefix = $output = $lastLine = '';
$this->mDTopen = $inBlockElem = false;
- $npl = 0;
- $pstack = false;
-
- if ( ! $linestart ) { $text .= array_shift( $a ); }
- foreach ( $a as $t ) {
- $oLine = $t;
- $preCloseMatch = preg_match("/<\\/pre/i", $t );
- $preOpenMatch = preg_match("/<pre/i", $t );
+ $prefixLength = 0;
+ $paragraphStack = false;
+
+ if ( !$linestart ) {
+ $output .= array_shift( $textLines );
+ }
+ foreach ( $textLines as $oLine ) {
+ $lastPrefixLength = strlen( $lastPrefix );
+ $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
+ $preOpenMatch = preg_match("/<pre/i", $oLine );
if (!$this->mInPre) {
- $this->mInPre = ($preOpenMatch)? true : false;
+ $this->mInPre = !empty($preOpenMatch);
}
if ( !$this->mInPre ) {
- $opl = strlen( $lastPref );
- $npl = strspn( $t, "*#:;" );
- $pref = substr( $t, 0, $npl );
+ # Multiple prefixes may abut each other for nested lists.
+ $prefixLength = strspn( $oLine, "*#:;" );
+ $pref = substr( $oLine, 0, $prefixLength );
+
+ # eh?
$pref2 = str_replace( ";", ":", $pref );
- $t = substr( $t, $npl );
- // list generation
- if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
- $text .= $this->nextItem( substr( $pref, -1 ) );
- if ( $pstack ) { $pstack = false; }
-
- if ( ";" == substr( $pref, -1 ) ) {
- $cpos = strpos( $t, ":" );
- if ( false !== $cpos ) {
- $term = substr( $t, 0, $cpos );
- $text .= $term . $this->nextItem( ":" );
- $t = substr( $t, $cpos + 1 );
- }
- }
- } else if (0 != $npl || 0 != $opl) {
- $cpl = $this->getCommon( $pref, $lastPref );
- if ( $pstack ) { $pstack = false; }
+ $t = substr( $oLine, $prefixLength );
+ } else {
+ # Don't interpret any other prefixes in preformatted text
+ $prefixLength = 0;
+ $pref = $pref2 = '';
+ $t = $oLine;
+ }
- while ( $cpl < $opl ) {
- $text .= $this->closeList( $lastPref{$opl-1} );
- --$opl;
+ # List generation
+ if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+ # Same as the last item, so no need to deal with nesting or opening stuff
+ $output .= $this->nextItem( substr( $pref, -1 ) );
+ $paragraphStack = false;
+
+ if ( ";" == substr( $pref, -1 ) ) {
+ # The one nasty exception: definition lists work like this:
+ # ; title : definition text
+ # So we check for : in the remainder text to split up the
+ # title and definition, without b0rking links.
+ # FIXME: This is not foolproof. Something better in Tokenizer might help.
+ if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+ $term = $match[1];
+ $output .= $term . $this->nextItem( ":" );
+ $t = $match[2];
}
- if ( $npl <= $cpl && $cpl > 0 ) {
- $text .= $this->nextItem( $pref{$cpl-1} );
- }
- while ( $npl > $cpl ) {
- $char = substr( $pref, $cpl, 1 );
- $text .= $this->openList( $char );
-
- if ( ";" == $char ) {
- $cpos = strpos( $t, ":" );
- if ( ! ( false === $cpos ) ) {
- $term = substr( $t, 0, $cpos );
- $text .= $term . $this->nextItem( ":" );
- $t = substr( $t, $cpos + 1 );
- }
+ }
+ } elseif( $prefixLength || $lastPrefixLength ) {
+ # Either open or close a level...
+ $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+ $paragraphStack = false;
+
+ while( $commonPrefixLength < $lastPrefixLength ) {
+ $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+ --$lastPrefixLength;
+ }
+ if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
+ $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
+ }
+ while ( $prefixLength > $commonPrefixLength ) {
+ $char = substr( $pref, $commonPrefixLength, 1 );
+ $output .= $this->openList( $char );
+
+ if ( ";" == $char ) {
+ # FIXME: This is dupe of code above
+ if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+ $term = $match[1];
+ $output .= $term . $this->nextItem( ":" );
+ $t = $match[2];
}
- ++$cpl;
}
- $lastPref = $pref2;
+ ++$commonPrefixLength;
}
+ $lastPrefix = $pref2;
}
- if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
+ if( 0 == $prefixLength ) {
+ # No prefix (not in list)--go to paragraph mode
$uniq_prefix = UNIQ_PREFIX;
// XXX: use a stack for nestable elements like span, table and div
- $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
+ $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
$closematch = preg_match(
"/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
- "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
+ "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
if ( $openmatch or $closematch ) {
- if ( $pstack ) { $pstack = false; }
- $text .= $this->closeParagraph();
+ $paragraphStack = false;
+ $output .= $this->closeParagraph();
if($preOpenMatch and !$preCloseMatch) {
$this->mInPre = true;
}
} else {
$inBlockElem = true;
}
- } else if ( !$inBlockElem ) {
- if ( " " == $t{0} ) {
+ } else if ( !$inBlockElem && !$this->mInPre ) {
+ if ( " " == $t{0} and trim($t) != '' ) {
// pre
if ($this->mLastSection != 'pre') {
- $pstack = false;
- $text .= $this->closeParagraph().'<pre>';
+ $paragraphStack = false;
+ $output .= $this->closeParagraph().'<pre>';
$this->mLastSection = 'pre';
}
} else {
// paragraph
if ( '' == trim($t) ) {
- if ( $pstack ) {
- $text .= $pstack.'<br/>';
- $pstack = false;
+ if ( $paragraphStack ) {
+ $output .= $paragraphStack.'<br/>';
+ $paragraphStack = false;
$this->mLastSection = 'p';
} else {
if ($this->mLastSection != 'p' ) {
- $text .= $this->closeParagraph();
+ $output .= $this->closeParagraph();
$this->mLastSection = '';
- $pstack = "<p>";
+ $paragraphStack = "<p>";
} else {
- $pstack = '</p><p>';
+ $paragraphStack = '</p><p>';
}
}
} else {
- if ( $pstack ) {
- $text .= $pstack;
- $pstack = false;
+ if ( $paragraphStack ) {
+ $output .= $paragraphStack;
+ $paragraphStack = false;
$this->mLastSection = 'p';
} else if ($this->mLastSection != 'p') {
- $text .= $this->closeParagraph().'<p>';
+ $output .= $this->closeParagraph().'<p>';
$this->mLastSection = 'p';
}
}
}
}
}
- if ($pstack === false) {
- $text .= $t."\n";
+ if ($paragraphStack === false) {
+ $output .= $t."\n";
}
}
- while ( $npl ) {
- $text .= $this->closeList( $pref2{$npl-1} );
- --$npl;
+ while ( $prefixLength ) {
+ $output .= $this->closeList( $pref2{$prefixLength-1} );
+ --$prefixLength;
}
if ( "" != $this->mLastSection ) {
- $text .= "</" . $this->mLastSection . ">";
+ $output .= "</" . $this->mLastSection . ">";
$this->mLastSection = "";
}
wfProfileOut( $fname );
- return $text;
+ return $output;
}
function getVariableValue( $index ) {
return $wgLang->getMonthNameGen( date("n") );
case MAG_CURRENTDAY:
return date("j");
+ case MAG_PAGENAME:
+ return $this->mTitle->getText();
+ case MAG_NAMESPACE:
+ # return Namespace::getCanonicalName($this->mTitle->getNamespace());
+ return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
case MAG_CURRENTDAYNAME:
return $wgLang->getWeekdayName( date("w")+1 );
case MAG_CURRENTYEAR:
$this->initialiseVariables();
}
$titleChars = Title::legalChars();
- $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
# This function is called recursively. To keep track of arguments we need a stack:
array_push( $this->mArgStack, $args );
# PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
$GLOBALS['wgCurParser'] =& $this;
+
+ # Argument substitution
+ if ( $this->mOutputType == OT_HTML ) {
+ $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
+ }
+ # Double brace substitution
+ $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
$text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
array_pop( $this->mArgStack );
+ wfProfileOut( $fname );
return $text;
}
$fname = "Parser::braceSubstitution";
$found = false;
$nowiki = false;
+ $noparse = false;
+
$title = NULL;
# $newline is an optional newline character before the braces
$args = array();
}
$argc = count( $args );
+
+ # {{{}}}
+ if ( strpos( $matches[0], "{{{" ) !== false ) {
+ $text = $matches[0];
+ $found = true;
+ $noparse = true;
+ }
# SUBST
- $mwSubst =& MagicWord::get( MAG_SUBST );
- if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
- if ( $this->mOutputType != OT_WIKI ) {
- # Invalid SUBST not replaced at PST time
- # Return without further processing
+ if ( !$found ) {
+ $mwSubst =& MagicWord::get( MAG_SUBST );
+ if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
+ if ( $this->mOutputType != OT_WIKI ) {
+ # Invalid SUBST not replaced at PST time
+ # Return without further processing
+ $text = $matches[0];
+ $found = true;
+ $noparse= true;
+ }
+ } elseif ( $this->mOutputType == OT_WIKI ) {
+ # SUBST not found in PST pass, do nothing
$text = $matches[0];
$found = true;
}
- } elseif ( $this->mOutputType == OT_WIKI ) {
- # SUBST not found in PST pass, do nothing
- $text = $matches[0];
- $found = true;
}
# MSG, MSGNW and INT
$found = true;
$this->mOutput->mContainsOldMagic = true;
}
-
+/*
# Arguments input from the caller
$inputArgs = end( $this->mArgStack );
if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
$text = $inputArgs[$part1];
$found = true;
}
-
+*/
# Load from database
if ( !$found ) {
$title = Title::newFromText( $part1, NS_TEMPLATE );
# Only for HTML output
if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
$text = wfEscapeWikiText( $text );
- } elseif ( $this->mOutputType == OT_HTML && $found ) {
+ } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
# Clean up argument array
$assocArgs = array();
$index = 1;
}
# Run full parser on the included text
- $text = $this->strip( $text, $this->mStripState );
- $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
-
- # Add the result to the strip state for re-inclusion after
- # the rest of the processing
- $text = $this->insertStripItem( $text, $this->mStripState );
+ $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
# Resume the link cache and register the inclusion as a link
if ( !is_null( $title ) ) {
if ( !$found ) {
return $matches[0];
} else {
- return $newline . $text;
+ return $text;
+ }
+ }
+
+ # Triple brace replacement -- used for template arguments
+ function argSubstitution( $matches )
+ {
+ $newline = $matches[1];
+ $arg = trim( $matches[2] );
+ $text = $matches[0];
+ $inputArgs = end( $this->mArgStack );
+
+ if ( array_key_exists( $arg, $inputArgs ) ) {
+ $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
}
+
+ return $text;
}
# Returns true if the function is allowed to include this entity
# Cleans up HTML, removes dangerous tags and attributes
/* private */ function removeHTMLtags( $text )
{
+ global $wgUseTidy, $wgUserHtml;
$fname = "Parser::removeHTMLtags";
wfProfileIn( $fname );
- $htmlpairs = array( # Tags that must be closed
- "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
- "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
- "strike", "strong", "tt", "var", "div", "center",
- "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
- "ruby", "rt" , "rb" , "rp", "p"
- );
- $htmlsingle = array(
- "br", "hr", "li", "dt", "dd"
- );
- $htmlnest = array( # Tags that can be nested--??
- "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
- "dl", "font", "big", "small", "sub", "sup"
- );
- $tabletags = array( # Can only appear inside table
- "td", "th", "tr"
- );
+
+ if( $wgUserHtml ) {
+ $htmlpairs = array( # Tags that must be closed
+ "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
+ "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
+ "strike", "strong", "tt", "var", "div", "center",
+ "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
+ "ruby", "rt" , "rb" , "rp", "p"
+ );
+ $htmlsingle = array(
+ "br", "hr", "li", "dt", "dd"
+ );
+ $htmlnest = array( # Tags that can be nested--??
+ "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
+ "dl", "font", "big", "small", "sub", "sup"
+ );
+ $tabletags = array( # Can only appear inside table
+ "td", "th", "tr"
+ );
+ } else {
+ $htmlpairs = array();
+ $htmlsingle = array();
+ $htmlnest = array();
+ $tabletags = array();
+ }
$htmlsingle = array_merge( $tabletags, $htmlsingle );
$htmlelements = array_merge( $htmlsingle, $htmlpairs );
$htmlattrs = $this->getHTMLattrs () ;
# Remove HTML comments
- $text = preg_replace( "/<!--.*-->/sU", "", $text );
+ $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
$bits = explode( "<", $text );
$text = array_shift( $bits );
- $tagstack = array(); $tablestack = array();
-
- foreach ( $bits as $x ) {
- $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
- preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
- $x, $regs );
- list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
- error_reporting( $prev );
-
- $badtag = 0 ;
- if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
- # Check our stack
- if ( $slash ) {
- # Closing a tag...
- if ( ! in_array( $t, $htmlsingle ) &&
- ( $ot = array_pop( $tagstack ) ) != $t ) {
- array_push( $tagstack, $ot );
- $badtag = 1;
+ if(!$wgUseTidy) {
+ $tagstack = array(); $tablestack = array();
+ foreach ( $bits as $x ) {
+ $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
+ preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+ $x, $regs );
+ list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+ error_reporting( $prev );
+
+ $badtag = 0 ;
+ if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ # Check our stack
+ if ( $slash ) {
+ # Closing a tag...
+ if ( ! in_array( $t, $htmlsingle ) &&
+ ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
+ if(!empty($ot)) array_push( $tagstack, $ot );
+ $badtag = 1;
+ } else {
+ if ( $t == "table" ) {
+ $tagstack = array_pop( $tablestack );
+ }
+ $newparams = "";
+ }
} else {
- if ( $t == "table" ) {
- $tagstack = array_pop( $tablestack );
+ # Keep track for later
+ if ( in_array( $t, $tabletags ) &&
+ ! in_array( "table", $tagstack ) ) {
+ $badtag = 1;
+ } else if ( in_array( $t, $tagstack ) &&
+ ! in_array ( $t , $htmlnest ) ) {
+ $badtag = 1 ;
+ } else if ( ! in_array( $t, $htmlsingle ) ) {
+ if ( $t == "table" ) {
+ array_push( $tablestack, $tagstack );
+ $tagstack = array();
+ }
+ array_push( $tagstack, $t );
}
- $newparams = "";
+ # Strip non-approved attributes from the tag
+ $newparams = $this->fixTagAttributes($params);
+
}
- } else {
- # Keep track for later
- if ( in_array( $t, $tabletags ) &&
- ! in_array( "table", $tagstack ) ) {
- $badtag = 1;
- } else if ( in_array( $t, $tagstack ) &&
- ! in_array ( $t , $htmlnest ) ) {
- $badtag = 1 ;
- } else if ( ! in_array( $t, $htmlsingle ) ) {
- if ( $t == "table" ) {
- array_push( $tablestack, $tagstack );
- $tagstack = array();
- }
- array_push( $tagstack, $t );
+ if ( ! $badtag ) {
+ $rest = str_replace( ">", ">", $rest );
+ $text .= "<$slash$t $newparams$brace$rest";
+ continue;
}
- # Strip non-approved attributes from the tag
- $newparams = $this->fixTagAttributes($params);
-
}
- if ( ! $badtag ) {
+ $text .= "<" . str_replace( ">", ">", $x);
+ }
+ # Close off any remaining tags
+ while ( $t = array_pop( $tagstack ) ) {
+ $text .= "</$t>\n";
+ if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+ }
+ } else {
+ # this might be possible using tidy itself
+ foreach ( $bits as $x ) {
+ preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+ $x, $regs );
+ @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+ if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ $newparams = $this->fixTagAttributes($params);
$rest = str_replace( ">", ">", $rest );
$text .= "<$slash$t $newparams$brace$rest";
- continue;
+ } else {
+ $text .= "<" . str_replace( ">", ">", $x);
}
- }
- $text .= "<" . str_replace( ">", ">", $x);
- }
- # Close off any remaining tags
- while ( $t = array_pop( $tagstack ) ) {
- $text .= "</$t>\n";
- if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+ }
}
wfProfileOut( $fname );
return $text;
}
+
/*
*
* This function accomplishes several tasks:
*
*/
- /* private */ function formatHeadings( $text )
+ /* private */ function formatHeadings( $text, $isMain=true )
{
+ global $wgInputEncoding;
+
$doNumberHeadings = $this->mOptions->getNumberHeadings();
$doShowToc = $this->mOptions->getShowToc();
if( !$this->mTitle->userCanEdit() ) {
# strip out HTML
$canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
$tocline = trim( $canonized_headline );
- $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
+ $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
+ # strip out urlencoded (inserted for french spaces, e.g. first space in 'something : something')
+ $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
$refer[$headlineCount] = $canonized_headline;
# count how many in assoc. array so we can track dupes in anchors
# $full .= $sk->editSectionLink(0);
}
$full .= $block;
- if( $doShowToc && !$i) {
+ if( $doShowToc && !$i && $isMain) {
# Top anchor now in skin
$full = $full.$toc;
}
return $full;
}
- /* private */ function doMagicISBN( &$tokenizer )
+ /* private */ function magicISBN( $text )
{
global $wgLang;
- # Check whether next token is a text token
- # If yes, fetch it and convert the text into a
- # Special::BookSources link
- $token = $tokenizer->previewToken();
- while ( $token["type"] == "" )
- {
- $tokenizer->nextToken();
- $token = $tokenizer->previewToken();
- }
- if ( $token["type"] == "text" )
- {
- $token = $tokenizer->nextToken();
- $x = $token["text"];
- $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ $a = split( "ISBN ", " $text" );
+ if ( count ( $a ) < 2 ) return $text;
+ $text = substr( array_shift( $a ), 1);
+ $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ foreach ( $a as $x ) {
$isbn = $blank = "" ;
while ( " " == $x{0} ) {
$blank .= " ";
$num = str_replace( " ", "", $num );
if ( "" == $num ) {
- $text = "ISBN $blank$x";
+ $text .= "ISBN $blank$x";
} else {
$titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
- $text = "<a href=\"" .
+ $text .= "<a href=\"" .
$titleObj->escapeLocalUrl( "isbn={$num}" ) .
"\" class=\"internal\">ISBN $isbn</a>";
$text .= $x;
}
- } else {
- $text = "ISBN ";
}
return $text;
}
- /* private */ function doMagicRFC( &$tokenizer )
+ /* private */ function magicRFC( $text )
{
global $wgLang;
- # Check whether next token is a text token
- # If yes, fetch it and convert the text into a
- # link to an RFC source
- $token = $tokenizer->previewToken();
- while ( $token["type"] == "" )
- {
- $tokenizer->nextToken();
- $token = $tokenizer->previewToken();
- }
- if ( $token["type"] == "text" )
- {
- $token = $tokenizer->nextToken();
- $x = $token["text"];
- $valid = "0123456789";
+ $a = split( "RFC ", " $text" );
+ if ( count ( $a ) < 2 ) return $text;
+ $text = substr( array_shift( $a ), 1);
+ $valid = "0123456789";
+ foreach ( $a as $x ) {
$rfc = $blank = "" ;
while ( " " == $x{0} ) {
$blank .= " ";
$url = str_replace( "$1", $rfc, $url);
$sk =& $this->mOptions->getSkin();
$la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
- $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
+ $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
}
- } else {
- $text = "RFC ";
}
return $text;
}
class ParserOutput
{
var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
+ var $mTouched; # Used for caching
function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
$containsOldMagic = false )
$this->mLanguageLinks = $languageLinks;
$this->mCategoryLinks = $categoryLinks;
$this->mContainsOldMagic = $containsOldMagic;
+ $this->mTouched = "";
}
function getText() { return $this->mText; }
function getLanguageLinks() { return $this->mLanguageLinks; }
function getCategoryLinks() { return $this->mCategoryLinks; }
+ function getTouched() { return $this->mTouched; }
function containsOldMagic() { return $this->mContainsOldMagic; }
function setText( $text ) { return wfSetVar( $this->mText, $text ); }
function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
+ function setTouched( $t ) { return wfSetVar( $this->mTouched, $t ); }
function merge( $other ) {
$this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
/* static */ function newFromUser( &$user )
{
$popts = new ParserOptions;
- $popts->initialiseFromUser( &$user );
+ $popts->initialiseFromUser( $user );
return $popts;
}
function wfBraceSubstitution( $matches )
{
global $wgCurParser;
+ $titleChars = Title::legalChars();
+
+ # not really nested stuff, just multiple includes separated by titlechars
+ if(preg_match("/^([^}{]*)}}([^}{]*{{)(.*)$/s", $matches[2], $m)) {
+ $text = wfInternalBraceSubstitution( $m[1] );
+ $string = $text.$m[2].$m[3];
+ while(preg_match("/^([^}{]*){{([$titleChars]*?)(}}[^}{]*{{.*)?$/s", $string, $m)) {
+ $text = wfInternalBraceSubstitution( $m[2] );
+ $trail = !empty($m[3])? preg_replace("/^}}/", '', $m[3]):'';
+ $string = $m[1].$text.$trail;
+ }
+ return $string;
+ }
+
+ # Double brace substitution, expand bar in {{foo{{bar}}}}
+ $i = 0;
+ while(preg_match("/{{([$titleChars]*?)}}/", $matches[2], $internalmatches) and $i < 30) {
+ $text = wfInternalBraceSubstitution( $internalmatches[1] );
+ $matches[0] = str_replace($internalmatches[0], $text , $matches[0]);
+ $matches[2] = str_replace($internalmatches[0], $text , $matches[2]);
+ $i++;
+ }
+
return $wgCurParser->braceSubstitution( $matches );
}
+function wfArgSubstitution( $matches )
+{
+ global $wgCurParser;
+ return $wgCurParser->argSubstitution( $matches );
+}
+
+# XXX: i don't think this is the most elegant way to do it..
+function wfInternalBraceSubstitution( $part1 ) {
+ global $wgLinkCache, $wgLang, $wgCurParser;
+ $fname = "wfInternalBraceSubstitution";
+ $found = false;
+ $nowiki = false;
+ $noparse = false;
+
+ $title = NULL;
+
+ # $newline is an optional newline character before the braces
+ # $part1 is the bit before the first |, and must contain only title characters
+ # $args is a list of arguments, starting from index 0, not including $part1
+
+ # SUBST
+ if ( !$found ) {
+ $mwSubst =& MagicWord::get( MAG_SUBST );
+ if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
+ if ( $wgCurParser->mOutputType != OT_WIKI ) {
+ # Invalid SUBST not replaced at PST time
+ # Return without further processing
+ $text = $matches[0];
+ $found = true;
+ $noparse= true;
+ }
+ } elseif ( $wgCurParser->mOutputType == OT_WIKI ) {
+ # SUBST not found in PST pass, do nothing
+ $text = $matches[0];
+ $found = true;
+ }
+ }
+
+ # MSG, MSGNW and INT
+ if ( !$found ) {
+ # Check for MSGNW:
+ $mwMsgnw =& MagicWord::get( MAG_MSGNW );
+ if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
+ $nowiki = true;
+ } else {
+ # Remove obsolete MSG:
+ $mwMsg =& MagicWord::get( MAG_MSG );
+ $mwMsg->matchStartAndRemove( $part1 );
+ }
+
+ # Check if it is an internal message
+ $mwInt =& MagicWord::get( MAG_INT );
+ if ( $mwInt->matchStartAndRemove( $part1 ) ) {
+ if ( $wgCurParser->incrementIncludeCount( "int:$part1" ) ) {
+ $text = wfMsgReal( $part1, array(), true );
+ $found = true;
+ }
+ }
+ }
+
+ # NS
+ if ( !$found ) {
+ # Check for NS: (namespace expansion)
+ $mwNs = MagicWord::get( MAG_NS );
+ if ( $mwNs->matchStartAndRemove( $part1 ) ) {
+ if ( intval( $part1 ) ) {
+ $text = $wgLang->getNsText( intval( $part1 ) );
+ $found = true;
+ } else {
+ $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
+ if ( !is_null( $index ) ) {
+ $text = $wgLang->getNsText( $index );
+ $found = true;
+ }
+ }
+ }
+ }
+
+ # LOCALURL and LOCALURLE
+ if ( !$found ) {
+ $mwLocal = MagicWord::get( MAG_LOCALURL );
+ $mwLocalE = MagicWord::get( MAG_LOCALURLE );
+
+ if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
+ $func = 'getLocalURL';
+ } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
+ $func = 'escapeLocalURL';
+ } else {
+ $func = '';
+ }
+
+ if ( $func !== '' ) {
+ $title = Title::newFromText( $part1 );
+ if ( !is_null( $title ) ) {
+ $text = $title->$func();
+ $found = true;
+ }
+ }
+ }
+
+ # Internal variables
+ if ( !$found && array_key_exists( $part1, $wgCurParser->mVariables ) ) {
+ $text = $wgCurParser->mVariables[$part1];
+ $found = true;
+ $wgCurParser->mOutput->mContainsOldMagic = true;
+ }
+
+ # Load from database
+ if ( !$found ) {
+ $title = Title::newFromText( $part1, NS_TEMPLATE );
+ if ( !is_null( $title ) && !$title->isExternal() ) {
+ # Check for excessive inclusion
+ $dbk = $title->getPrefixedDBkey();
+ if ( $wgCurParser->incrementIncludeCount( $dbk ) ) {
+ $article = new Article( $title );
+ $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
+ if ( $articleContent !== false ) {
+ $found = true;
+ $text = $articleContent;
+
+ }
+ }
+
+ # If the title is valid but undisplayable, make a link to it
+ if ( $wgCurParser->mOutputType == OT_HTML && !$found ) {
+ $text = "[[" . $title->getPrefixedText() . "]]";
+ $found = true;
+ }
+ }
+ }
+
+ if ( !$found ) {
+ return $matches[0];
+ } else {
+ return $text;
+ }
+}
?>