include_once('wikihiero.php');
}
-# PHP Parser
-#
+# PHP Parser
+#
# Processes wiki markup
#
-# There are two main entry points into the Parser class: parse() and preSaveTransform().
+# There are two main entry points into the Parser class: parse() and preSaveTransform().
# The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
#
-# Globals used:
+# Globals used:
# objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
#
# NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
#
# settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
-# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
+# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
# $wgLocaltimezone
#
# * only within ParserOptions
# Variable substitution O(N^2) attack
#-----------------------------------------
# Without countermeasures, it would be possible to attack the parser by saving a page
-# filled with a large number of inclusions of large pages. The size of the generated
-# page would be proportional to the square of the input size. Hence, we limit the number
+# filled with a large number of inclusions of large pages. The size of the generated
+# page would be proportional to the square of the input size. Hence, we limit the number
# of inclusions of any given page, thus bringing any attack back to O(N).
#
-define( "MAX_INCLUDE_REPEAT", 5 );
-# Recursion depth of variable/inclusion evaluation
-define( "MAX_INCLUDE_PASSES", 3 );
+define( "MAX_INCLUDE_REPEAT", 5 );
# Allowed values for $mOutputType
define( "OT_HTML", 1 );
class Parser
{
# Cleared with clearState():
- var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
- var $mVariables, $mIncludeCount;
+ var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
+ var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
# Temporary:
var $mOptions, $mTitle, $mOutputType;
$this->mVariables = false;
$this->mIncludeCount = array();
$this->mStripState = array();
+ $this->mArgStack = array();
}
-
+
# First pass--just handle <nowiki> sections, pass the rest off
- # to doWikiPass2() which does all the real work.
+ # to internalParse() which does all the real work.
#
# Returns a ParserOutput
#
if ( $clearState ) {
$this->clearState();
}
-
+
$this->mOptions = $options;
$this->mTitle =& $title;
$this->mOutputType = OT_HTML;
-
+
$stripState = NULL;
$text = $this->strip( $text, $this->mStripState );
- $text = $this->doWikiPass2( $text, $linestart );
+ $text = $this->internalParse( $text, $linestart );
$text = $this->unstrip( $text, $this->mStripState );
-
+ # Clean up special characters, only run once, next-to-last before doBlockLevels
+ $fixtags = array(
+ "/<hr *>/i" => '<hr/>',
+ "/<br *>/i" => '<br/>',
+ "/<center *>/i"=>'<div class="center">',
+ "/<\\/center *>/i" => '</div>',
+ # Clean up spare ampersands; note that we probably ought to be
+ # more careful about named entities.
+ '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
+ );
+ $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+
+ # only once and last
+ $text = $this->doBlockLevels( $text, $linestart );
+
$this->mOutput->setText( $text );
wfProfileOut( $fname );
return $this->mOutput;
return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
}
- # Replaces all occurences of <$tag>content</$tag> in the text
+ # Replaces all occurrences of <$tag>content</$tag> in the text
# with a random marker and returns the new text. the output parameter
# $content will be an associative array filled with data on the form
# $unique_marker => content.
while ( "" != $text ) {
$p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
$stripped .= $p[0];
- if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
- $text = "";
+ if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
+ $text = "";
} else {
$q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
$marker = $rnd . sprintf("%08X", $n++);
}
}
return $stripped;
- }
+ }
# Strips <nowiki>, <pre> and <math>
# Returns the text, and fills an array with data needed in unstrip()
function strip( $text, &$state )
{
$render = ($this->mOutputType == OT_HTML);
- if ( $state ) {
- $nowiki_content = $state['nowiki'];
- $hiero_content = $state['hiero'];
- $math_content = $state['math'];
- $pre_content = $state['pre'];
- $item_content = $state['item'];
- } else {
- $nowiki_content = array();
- $hiero_content = array();
- $math_content = array();
- $pre_content = array();
- $item_content = array();
- }
+ $nowiki_content = array();
+ $hiero_content = array();
+ $math_content = array();
+ $pre_content = array();
+ $item_content = array();
# Replace any instances of the placeholders
$uniq_prefix = UNIQ_PREFIX;
- $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
+ #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
$text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
foreach( $nowiki_content as $marker => $content ){
$pre_content[$marker] = "<pre>$content</pre>";
}
}
-
- $state = array(
- 'nowiki' => $nowiki_content,
- 'hiero' => $hiero_content,
- 'math' => $math_content,
- 'pre' => $pre_content,
- 'item' => $item_content
- );
+
+ # Merge state with the pre-existing state, if there is one
+ if ( $state ) {
+ $state['nowiki'] = $state['nowiki'] + $nowiki_content;
+ $state['hiero'] = $state['hiero'] + $hiero_content;
+ $state['math'] = $state['math'] + $math_content;
+ $state['pre'] = $state['pre'] + $pre_content;
+ } else {
+ $state = array(
+ 'nowiki' => $nowiki_content,
+ 'hiero' => $hiero_content,
+ 'math' => $math_content,
+ 'pre' => $pre_content,
+ 'item' => $item_content
+ );
+ }
return $text;
}
function unstrip( $text, &$state )
{
# Must expand in reverse order, otherwise nested tags will be corrupted
- /*
- $dicts = array( 'item', 'pre', 'math', 'hiero', 'nowiki' );
- foreach ( $dicts as $dictName ) {
- $content_dict = $state[$dictName];
- foreach( $content_dict as $marker => $content ){
- $text = str_replace( $marker, $content, $text );
- }
- }*/
-
$contentDict = end( $state );
- for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
+ for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
$text = str_replace( key( $contentDict ), $content, $text );
}
return $text;
}
-
+
# Add an item to the strip state
# Returns the unique tag which must be inserted into the stripped text
# The tag will be replaced with the original text in unstrip()
{
$rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
if ( !$state ) {
- $state = array(
+ $state = array(
'nowiki' => array(),
'hiero' => array(),
'math' => array(),
$state['item'][$rnd] = $text;
return $rnd;
}
-
+
+ # This method generates the list of subcategories and pages for a category
function categoryMagic ()
{
global $wgLang , $wgUser ;
- if ( !$this->mOptions->getUseCategoryMagic() ) return ;
- $id = $this->mTitle->getArticleID() ;
- $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
- $ti = $this->mTitle->getText() ;
- $ti = explode ( ":" , $ti , 2 ) ;
- if ( $cat != $ti[0] ) return "" ;
- $r = '<br style="clear:both;"/>\n';
+ if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
- $articles = array() ;
- $parents = array () ;
- $children = array() ;
+ $cns = Namespace::getCategory() ;
+ if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
+
+ $r = "<br style=\"clear:both;\"/>\n";
-# $sk =& $this->mGetSkin();
$sk =& $wgUser->getSkin() ;
+ $articles = array() ;
+ $children = array() ;
$data = array () ;
- $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
- $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
+ $id = $this->mTitle->getArticleID() ;
- $res = wfQuery ( $sql1, DB_READ ) ;
+ # For existing categories
+ $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
+ $res = wfQuery ( $sql, DB_READ ) ;
while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
- $res = wfQuery ( $sql2, DB_READ ) ;
+ # For non-existing categories
+ $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
+ $res = wfQuery ( $sql, DB_READ ) ;
while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-
+ # For all pages that link to this category
foreach ( $data AS $x )
{
$t = $wgLang->getNsText ( $x->cur_namespace ) ;
if ( $t != "" ) $t .= ":" ;
$t .= $x->cur_title ;
- $y = explode ( ":" , $t , 2 ) ;
- if ( count ( $y ) == 2 && $y[0] == $cat ) {
- array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
+ if ( $x->cur_namespace == $cns ) {
+ array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
} else {
- array_push ( $articles , $sk->makeLink ( $t ) ) ;
+ array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
}
}
wfFreeResult ( $res ) ;
- # Children
+ # Showing subcategories
if ( count ( $children ) > 0 )
{
asort ( $children ) ;
$r .= implode ( ", " , $children ) ;
}
- # Articles
+ # Showing pages in this category
if ( count ( $articles ) > 0 )
{
+ $ti = $this->mTitle->getText() ;
asort ( $articles ) ;
- $h = wfMsg( "category_header", $ti[1] );
+ $h = wfMsg( "category_header", $ti );
$r .= "<h2>{$h}</h2>\n" ;
$r .= implode ( ", " , $articles ) ;
}
{
if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
$htmlattrs = $this->getHTMLattrs() ;
-
+
# Strip non-approved attributes from the tag
$t = preg_replace(
"/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
# Strip javascript "expression" from stylesheets. Brute force approach:
# If anythin offensive is found, all attributes of the HTML tag are dropped
- if( preg_match(
+ if( preg_match(
"/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
wfMungeToUtf8( $t ) ) )
{
$ltr = array () ; # tr attributes
foreach ( $t AS $k => $x )
{
- $x = rtrim ( $x ) ;
+ $x = trim ( $x ) ;
$fc = substr ( $x , 0 , 1 ) ;
if ( "{|" == substr ( $x , 0 , 2 ) )
{
$t[$k] = $z ;
}
/* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
- {
+ {
$z = trim ( substr ( $x , 2 ) ) ;
$t[$k] = "<caption>{$z}</caption>\n" ;
}*/
{
$z = "" ;
if ( $fc != "+" )
- {
+ {
$tra = array_pop ( $ltr ) ;
if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
array_push ( $tr , true ) ;
return $t ;
}
- # Well, OK, it's actually about 14 passes. But since all the
- # hard lifting is done inside PHP's regex code, it probably
- # wouldn't speed things up much to add a real parser.
- #
- function doWikiPass2( $text, $linestart )
+ function internalParse( $text, $linestart, $args = array() )
{
- $fname = "Parser::doWikiPass2";
+ $fname = "Parser::internalParse";
wfProfileIn( $fname );
-
+
$text = $this->removeHTMLtags( $text );
- $text = $this->replaceVariables( $text );
+ $text = $this->replaceVariables( $text, $args );
# $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
$text = $this->doHeadings( $text );
-
if($this->mOptions->getUseDynamicDates()) {
global $wgDateFormatter;
$text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
}
-
$text = $this->replaceExternalLinks( $text );
$text = $this->doTokenizedParser ( $text );
-
$text = $this->doTableStuff ( $text ) ;
-
$text = $this->formatHeadings( $text );
-
$sk =& $this->mOptions->getSkin();
$text = $sk->transformContent( $text );
- $fixtags = array(
- "/<hr *>/i" => '<hr/>',
- "/<br *>/i" => '<br/>',
- "/<center *>/i"=>'<span style="text-align:center;">',
- "/<\\/center *>/i" => '</span>'
- );
- $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
-
- # Clean up spare ampersands; note that we probably ought to be
- # more careful about named entities.
- $text = preg_replace(
- '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/',
- '&',
- $text );
- $text .= $this->categoryMagic () ;
-
- # needs to be called last
- $text = $this->doBlockLevels( $text, $linestart );
+ if ( !isset ( $this->categoryMagicDone ) ) {
+ $text .= $this->categoryMagic () ;
+ $this->categoryMagicDone = true ;
+ }
wfProfileOut( $fname );
return $text;
wfProfileOut( $fname );
return $text;
}
-
+
/* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
{
$unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
$uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
-
- # this is the list of separators that should be ignored if they
+
+ # this is the list of separators that should be ignored if they
# are the last character of an URL but that should be included
# if they occur within the URL, e.g. "go to www.foo.com, where .."
# in this case, the last comma should not become part of the URL,
# but in "www.foo.com/123,2342,32.htm" it should.
- $sep = ",;\.:";
+ $sep = ",;\.:";
$fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
$images = "gif|png|jpg|jpeg";
# that the content of the string should be inserted there).
$e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
"((?i){$images})([^{$uc}]|$)/";
-
+
$e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
$sk =& $this->mOptions->getSkin();
} else if ( preg_match( $e2, $line, $m ) ) {
$link = "{$protocol}:{$m[1]}";
$text = $m[2];
- $trail = $m[3];
+ $trail = $m[3];
} else {
$s .= "[{$protocol}:" . $line;
continue;
$state["strong"] = FALSE;
} else {
$s = "<strong>";
- $state["strong"] = $token["pos"];
+ $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
}
return $s;
}
$state["em"] = FALSE;
} else {
$s = "<em>";
- $state["em"] = $token["pos"];
+ $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
+
}
return $s;
}
-
+
/* private */ function handle5Quotes( &$state, $token )
{
$s = "";
$state["em"] = $token["pos"];
} else { # not $em and not $strong
$s .= "<strong><em>";
- $state["strong"] = $state["em"] = $token["pos"];
+ $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
}
return $s;
}
$tokenizer=Tokenizer::newFromString( $str );
$tokenStack = array();
-
+
$s="";
$state["em"] = FALSE;
$state["strong"] = FALSE;
$tagIsOpen = FALSE;
$threeopen = false;
-
+
# The tokenizer splits the text into tokens and returns them one by one.
# Every call to the tokenizer returns a new token.
while ( $token = $tokenizer->nextToken() )
array_push( $tokenStack, $token );
$txt="";
break;
-
+
case "]]]":
case "]]":
# link close tag.
# get text from stack, glue it together, and call the code to handle a
# link
-
+
if ( count( $tokenStack ) == 0 )
{
# stack empty. Found a ]] without an opening [[
}
$lastToken = array_pop( $tokenStack );
}
-
+
$txt = $linkText ."]]";
-
+
if( isset( $lastToken["text"] ) ) {
$prefix = $lastToken["text"];
} else {
$prefix = "";
}
$nextToken = $tokenizer->previewToken();
- if ( $nextToken["type"] == "text" )
+ if ( $nextToken["type"] == "text" )
{
# Preview just looks at it. Now we have to fetch it.
$nextToken = $tokenizer->nextToken();
$txt .= $nextToken["text"];
}
- $fakestate = $this->mStripState;
- $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
+ $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
- # did the tag start with 3 [ ?
+ # did the tag start with 3 [ ?
if($threeopen) {
# show the first as text
$txt = "[".$txt;
$threeopen=false;
}
-
+
}
$tagIsOpen = (count( $tokenStack ) != 0);
break;
$txt = $lastToken["text"] . $txt;
} else {
$txt = $lastToken["type"] . $txt;
- }
+ }
}
$s .= $txt;
}
#$e2 = "/^(.*)\\b(\\w+)\$/suD";
#$e2 = "/^(.*\\s)(\\S+)\$/suD";
static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
-
+
# Special and Media are pseudo-namespaces; no pages actually exist in them
static $image = FALSE;
if ( !$image ) { $image = Namespace::getImage(); }
if ( !$special ) { $special = Namespace::getSpecial(); }
if ( !$media ) { $media = Namespace::getMedia(); }
- if ( !$category ) { $category = wfMsg ( "category" ) ; }
-
+ if ( !$category ) { $category = Namespace::getCategory(); ; }
+
$nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
wfProfileOut( "$fname-setup" );
$s = "";
-
+
if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
$text = $m[2];
- $trail = $m[3];
+ $trail = $m[3];
} else { # Invalid form; output directly
$s .= $prefix . "[[" . $line ;
return $s;
}
-
+
/* Valid link forms:
Foobar -- normal
:Foobar -- override special treatment of prefix (images, language links)
$noforce = ($c != ":");
if( $c == "/" ) { # subpage
if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
- $m[1]=substr($m[1],1,strlen($m[1])-2);
+ $m[1]=substr($m[1],1,strlen($m[1])-2);
$noslash=$m[1];
} else {
$noslash=substr($m[1],1);
if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
$link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
if( "" == $text ) {
- $text= $m[1];
+ $text= $m[1];
} # this might be changed for ugliness reasons
} else {
$link = $noslash; # no subpage allowed, use standard link
$wgLinkCache->addImageLinkObj( $nt );
return $s;
}
+ if ( $ns == $category ) {
+ $t = $nt->getText() ;
+ $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
+ $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
+ $this->mOutput->mCategoryLinks[] = $t ;
+ $s .= $prefix . $trail ;
+ return $s ;
+ }
}
if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
( strpos( $link, "#" ) == FALSE ) ) {
return $s;
}
- # Category feature
- $catns = strtoupper ( $nt->getDBkey () ) ;
- $catns = explode ( ":" , $catns ) ;
- if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
- else $catns = "" ;
- if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
- $t = explode ( ":" , $nt->getText() ) ;
- array_shift ( $t ) ;
- $t = implode ( ":" , $t ) ;
- $t = $wgLang->ucFirst ( $t ) ;
- $nnt = Title::newFromText ( $category.":".$t ) ;
- $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
- $this->mOutput->mCategoryLinks[] = $t ;
- $s .= $prefix . $trail ;
- return $s ;
- }
-
if( $ns == $media ) {
$s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
$wgLinkCache->addImageLinkObj( $nt );
if ( '' != $this->mLastSection ) {
$result = "</" . $this->mLastSection . ">\n";
}
+ $this->mInPre = false;
$this->mLastSection = "";
return $result;
}
# and making lists from lines starting with * # : etc.
#
$a = explode( "\n", $text );
+
$lastPref = $text = $lastLine = '';
$this->mDTopen = $inBlockElem = false;
+ $npl = 0;
+ $pstack = false;
if ( ! $linestart ) { $text .= array_shift( $a ); }
foreach ( $a as $t ) {
- if ( "" != $text ) { $text .= "\n"; }
-
$oLine = $t;
$opl = strlen( $lastPref );
- $npl = strspn( $t, "*#:;" );
- $pref = substr( $t, 0, $npl );
- $pref2 = str_replace( ";", ":", $pref );
- $t = substr( $t, $npl );
+ $preCloseMatch = preg_match("/<\\/pre/i", $t );
+ $preOpenMatch = preg_match("/<pre/i", $t );
+ if (!$this->mInPre) {
+ $this->mInPre = !empty($preOpenMatch);
+ }
+ if ( !$this->mInPre ) {
+ $npl = strspn( $t, "*#:;" );
+ $pref = substr( $t, 0, $npl );
+ $pref2 = str_replace( ";", ":", $pref );
+ $t = substr( $t, $npl );
+ } else {
+ $npl = 0;
+ $pref = $pref2 = '';
+ }
+ // list generation
if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
$text .= $this->nextItem( substr( $pref, -1 ) );
+ if ( $pstack ) { $pstack = false; }
if ( ";" == substr( $pref, -1 ) ) {
$cpos = strpos( $t, ":" );
- if ( ! ( false === $cpos ) ) {
+ if ( false !== $cpos ) {
$term = substr( $t, 0, $cpos );
$text .= $term . $this->nextItem( ":" );
$t = substr( $t, $cpos + 1 );
}
} else if (0 != $npl || 0 != $opl) {
$cpl = $this->getCommon( $pref, $lastPref );
+ if ( $pstack ) { $pstack = false; }
while ( $cpl < $opl ) {
$text .= $this->closeList( $lastPref{$opl-1} );
}
$lastPref = $pref2;
}
- if ( 0 == $npl ) { # No prefix--go to paragraph mode
+ if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
$uniq_prefix = UNIQ_PREFIX;
// XXX: use a stack for nestable elements like span, table and div
- $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p)/i", $t );
- $closematch = preg_match(
+ $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
+ $closematch = preg_match(
"/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
- "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre)/i", $t );
+ "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
if ( $openmatch or $closematch ) {
+ if ( $pstack ) { $pstack = false; }
$text .= $this->closeParagraph();
+ if($preOpenMatch and !$preCloseMatch) {
+ $this->mInPre = true;
+ }
if ( $closematch ) {
$inBlockElem = false;
} else {
}
} else if ( !$inBlockElem ) {
if ( " " == $t{0} ) {
- $newSection = "pre";
+ // pre
if ($this->mLastSection != 'pre') {
- $text .= $this->closeParagraph();
- $text .= "<" . $newSection . ">";
- $this->mLastSection = $newSection;
+ $pstack = false;
+ $text .= $this->closeParagraph().'<pre>';
+ $this->mLastSection = 'pre';
}
- } else {
- $newSection = "p";
+ } else {
+ // paragraph
if ( '' == trim($t) ) {
- if ( '' == trim($lastLine) ) {
- $text .= $this->closeParagraph();
- $text .= "<" . $newSection . "><br/>";
- $this->mLastSection = $newSection;
+ if ( $pstack ) {
+ $text .= $pstack.'<br/>';
+ $pstack = false;
+ $this->mLastSection = 'p';
} else {
- $t = '';
+ if ($this->mLastSection != 'p' ) {
+ $text .= $this->closeParagraph();
+ $this->mLastSection = '';
+ $pstack = "<p>";
+ } else {
+ $pstack = '</p><p>';
+ }
}
- } else if ($this->mLastSection != $newSection) {
- $text .= $this->closeParagraph();
- $text .= "<" . $newSection . ">";
- $this->mLastSection = $newSection;
- }
+ } else {
+ if ( $pstack ) {
+ $text .= $pstack;
+ $pstack = false;
+ $this->mLastSection = 'p';
+ } else if ($this->mLastSection != 'p') {
+ $text .= $this->closeParagraph().'<p>';
+ $this->mLastSection = 'p';
+ }
+ }
}
-
- }
+ }
+ }
+ if ($pstack === false) {
+ $text .= $t."\n";
}
- $lastLine = $t;
- $text .= $t;
}
while ( $npl ) {
$text .= $this->closeList( $pref2{$npl-1} );
$text .= "</" . $this->mLastSection . ">";
$this->mLastSection = "";
}
+
wfProfileOut( $fname );
return $text;
}
}
}
- /* private */ function replaceVariables( $text )
+ /* private */ function replaceVariables( $text, $args = array() )
{
- global $wgLang, $wgCurParser;
- global $wgScript, $wgArticlePath;
+ global $wgLang, $wgScript, $wgArticlePath;
$fname = "Parser::replaceVariables";
wfProfileIn( $fname );
-
+
$bail = false;
if ( !$this->mVariables ) {
$this->initialiseVariables();
}
$titleChars = Title::legalChars();
- $regex = "/{{([$titleChars\\|]*?)}}/s";
-
- # "Recursive" variable expansion: run it through a couple of passes
- for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
- $oldText = $text;
-
- # It's impossible to rebind a global in PHP
- # Instead, we run the substitution on a copy, then merge the changed fields back in
- $wgCurParser = $this->fork();
-
- $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
- if ( $oldText == $text ) {
- $bail = true;
- }
- $this->merge( $wgCurParser );
- }
+ $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
- return $text;
- }
+ # This function is called recursively. To keep track of arguments we need a stack:
+ array_push( $this->mArgStack, $args );
- # Returns a copy of this object except with various variables cleared
- # This copy can be re-merged with the parent after operations on the copy
- function fork()
- {
- $copy = $this;
- $copy->mOutput = new ParserOutput;
- return $copy;
- }
+ # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
+ $GLOBALS['wgCurParser'] =& $this;
+ $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
- # Merges a copy split off with fork()
- function merge( &$copy )
- {
- # Output objects
- $this->mOutput->merge( $copy->mOutput );
-
- # Include throttling arrays
- foreach( $copy->mIncludeCount as $dbk => $count ) {
- if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
- $this->mIncludeCount[$dbk] += $count;
- } else {
- $this->mIncludeCount[$dbk] = $count;
- }
- }
+ array_pop( $this->mArgStack );
- # Strip states
- foreach( $copy->mStripState as $dictName => $contentDict ) {
- $this->mStripState[$dictName] += $contentDict;
- }
+ return $text;
}
function braceSubstitution( $matches )
$fname = "Parser::braceSubstitution";
$found = false;
$nowiki = false;
-
- $text = $matches[1];
+ $title = NULL;
+
+ # $newline is an optional newline character before the braces
+ # $part1 is the bit before the first |, and must contain only title characters
+ # $args is a list of arguments, starting from index 0, not including $part1
+
+ $newline = $matches[1];
+ $part1 = $matches[2];
+ # If the third subpattern matched anything, it will start with |
+ if ( $matches[3] !== "" ) {
+ $args = explode( "|", substr( $matches[3], 1 ) );
+ } else {
+ $args = array();
+ }
+ $argc = count( $args );
# SUBST
$mwSubst =& MagicWord::get( MAG_SUBST );
- if ( $mwSubst->matchStartAndRemove( $text ) ) {
+ if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
if ( $this->mOutputType != OT_WIKI ) {
# Invalid SUBST not replaced at PST time
# Return without further processing
$text = $matches[0];
$found = true;
}
-
+
# MSG, MSGNW and INT
if ( !$found ) {
# Check for MSGNW:
$mwMsgnw =& MagicWord::get( MAG_MSGNW );
- if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
+ if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
$nowiki = true;
} else {
# Remove obsolete MSG:
$mwMsg =& MagicWord::get( MAG_MSG );
- $mwMsg->matchStartAndRemove( $text );
+ $mwMsg->matchStartAndRemove( $part1 );
}
-
+
# Check if it is an internal message
$mwInt =& MagicWord::get( MAG_INT );
- if ( $mwInt->matchStartAndRemove( $text ) ) {
- $text = wfMsg( $text );
- $found = true;
+ if ( $mwInt->matchStartAndRemove( $part1 ) ) {
+ if ( $this->incrementIncludeCount( "int:$part1" ) ) {
+ $text = wfMsgReal( $part1, $args, true );
+ $found = true;
+ }
}
}
-
+
# NS
if ( !$found ) {
# Check for NS: (namespace expansion)
$mwNs = MagicWord::get( MAG_NS );
- if ( $mwNs->matchStartAndRemove( $text ) ) {
- if ( intval( $text ) ) {
- $text = $wgLang->getNsText( intval( $text ) );
+ if ( $mwNs->matchStartAndRemove( $part1 ) ) {
+ if ( intval( $part1 ) ) {
+ $text = $wgLang->getNsText( intval( $part1 ) );
$found = true;
} else {
- $index = Namespace::getCanonicalIndex( strtolower( $text ) );
+ $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
if ( !is_null( $index ) ) {
$text = $wgLang->getNsText( $index );
$found = true;
}
}
}
-
+
# LOCALURL and LOCALURLE
if ( !$found ) {
$mwLocal = MagicWord::get( MAG_LOCALURL );
$mwLocalE = MagicWord::get( MAG_LOCALURLE );
- if ( $mwLocal->matchStartAndRemove( $text ) ) {
+ if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
$func = 'getLocalURL';
- } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
+ } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
$func = 'escapeLocalURL';
} else {
$func = '';
}
-
+
if ( $func !== '' ) {
- $args = explode( "|", $text );
- $n = count( $args );
- if ( $n > 0 ) {
- $title = Title::newFromText( $args[0] );
- if ( !is_null( $title ) ) {
- if ( $n > 1 ) {
- $text = $title->$func( $args[1] );
- } else {
- $text = $title->$func();
- }
- $found = true;
+ $title = Title::newFromText( $part1 );
+ if ( !is_null( $title ) ) {
+ if ( $argc > 0 ) {
+ $text = $title->$func( $args[0] );
+ } else {
+ $text = $title->$func();
}
+ $found = true;
}
- }
+ }
}
-
- # Check for a match against internal variables
- if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
- $text = $this->mVariables[$text];
+
+ # Internal variables
+ if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
+ $text = $this->mVariables[$part1];
$found = true;
$this->mOutput->mContainsOldMagic = true;
- }
-
+ }
+
+ # Arguments input from the caller
+ $inputArgs = end( $this->mArgStack );
+ if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
+ $text = $inputArgs[$part1];
+ $found = true;
+ }
+
# Load from database
if ( !$found ) {
- $title = Title::newFromText( $text, NS_TEMPLATE );
- if ( is_object( $title ) && !$title->isExternal() ) {
+ $title = Title::newFromText( $part1, NS_TEMPLATE );
+ if ( !is_null( $title ) && !$title->isExternal() ) {
# Check for excessive inclusion
$dbk = $title->getPrefixedDBkey();
- if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
- $this->mIncludeCount[$dbk] = 0;
- }
- if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
+ if ( $this->incrementIncludeCount( $dbk ) ) {
$article = new Article( $title );
$articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
if ( $articleContent !== false ) {
$found = true;
$text = $articleContent;
-
- # Escaping and link table handling
- # Not required for preSaveTransform()
- if ( $this->mOutputType == OT_HTML ) {
- if ( $nowiki ) {
- $text = wfEscapeWikiText( $text );
- } else {
- $text = $this->removeHTMLtags( $text );
- }
- # Do not enter included links in link table
- $wgLinkCache->suspend();
-
- # Run full parser on the included text
- $text = $this->strip( $text, $this->mStripState );
- $text = $this->doWikiPass2( $text, true );
-
- # Add the result to the strip state for re-inclusion after
- # the rest of the processing
- $text = $this->insertStripItem( $text, $this->mStripState );
-
- # Resume the link cache and register the inclusion as a link
- $wgLinkCache->resume();
- $wgLinkCache->addLinkObj( $title );
- }
- }
- }
+ }
+ }
# If the title is valid but undisplayable, make a link to it
if ( $this->mOutputType == OT_HTML && !$found ) {
}
}
+ # Recursive parsing, escaping and link table handling
+ # Only for HTML output
+ if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
+ $text = wfEscapeWikiText( $text );
+ } elseif ( $this->mOutputType == OT_HTML && $found ) {
+ # Clean up argument array
+ $assocArgs = array();
+ $index = 1;
+ foreach( $args as $arg ) {
+ $eqpos = strpos( $arg, "=" );
+ if ( $eqpos === false ) {
+ $assocArgs[$index++] = $arg;
+ } else {
+ $name = trim( substr( $arg, 0, $eqpos ) );
+ $value = trim( substr( $arg, $eqpos+1 ) );
+ if ( $value === false ) {
+ $value = "";
+ }
+ if ( $name !== false ) {
+ $assocArgs[$name] = $value;
+ }
+ }
+ }
+
+ # Do not enter included links in link table
+ if ( !is_null( $title ) ) {
+ $wgLinkCache->suspend();
+ }
+
+ # Run full parser on the included text
+ $text = $this->strip( $text, $this->mStripState );
+ $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
+
+ # Add the result to the strip state for re-inclusion after
+ # the rest of the processing
+ $text = $this->insertStripItem( $text, $this->mStripState );
+
+ # Resume the link cache and register the inclusion as a link
+ if ( !is_null( $title ) ) {
+ $wgLinkCache->resume();
+ $wgLinkCache->addLinkObj( $title );
+ }
+ }
+
if ( !$found ) {
return $matches[0];
} else {
- return $text;
+ return $newline . $text;
}
}
+ # Returns true if the function is allowed to include this entity
+ function incrementIncludeCount( $dbk )
+ {
+ if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
+ $this->mIncludeCount[$dbk] = 0;
+ }
+ if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+
# Cleans up HTML, removes dangerous tags and attributes
/* private */ function removeHTMLtags( $text )
{
$fname = "Parser::removeHTMLtags";
wfProfileIn( $fname );
$htmlpairs = array( # Tags that must be closed
- "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
+ "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
"h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
"strike", "strong", "tt", "var", "div", "center",
"blockquote", "ol", "ul", "dl", "table", "caption", "pre",
$htmlsingle = array_merge( $tabletags, $htmlsingle );
$htmlelements = array_merge( $htmlsingle, $htmlpairs );
- $htmlattrs = $this->getHTMLattrs () ;
+ $htmlattrs = $this->getHTMLattrs () ;
# Remove HTML comments
$text = preg_replace( "/<!--.*-->/sU", "", $text );
}
# Strip non-approved attributes from the tag
$newparams = $this->fixTagAttributes($params);
-
+
}
if ( ! $badtag ) {
$rest = str_replace( ">", ">", $rest );
return $text;
}
-/*
- *
+/*
+ *
* This function accomplishes several tasks:
* 1) Auto-number headings if that option is enabled
* 2) Add an [edit] link to sections for logged in users who have enabled the option
*
* It loops through all headlines, collects the necessary data, then splits up the
* string and re-inserts the newly formatted headlines.
- *
+ *
*/
/* private */ function formatHeadings( $text )
if( $esw->matchAndRemove( $text ) ) {
$showEditLink = 0;
}
- # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
+ # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
# do not add TOC
$mw =& MagicWord::get( MAG_NOTOC );
if( $mw->matchAndRemove( $text ) ) {
$prevlevel = $level;
}
$level = $matches[1][$headlineCount];
- if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
+ if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
# reset when we enter a new level
$sublevelCount[$level] = 0;
$toc .= $sk->tocIndent( $level - $prevlevel );
$toclevel += $level - $prevlevel;
- }
+ }
if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
# reset when we step back a level
$sublevelCount[$level+1]=0;
$numbering .= ".";
}
$numbering .= $sublevelCount[$i];
- $dot = 1;
+ $dot = 1;
}
}
}
# The canonized header is a version of the header text safe to use for links
# Avoid insertion of weird stuff like <math> by expanding the relevant sections
- $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
-
+ $canonized_headline = $this->unstrip( $headline, $this->mStripState );
+
# strip out HTML
$canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
- $tocline = trim( $canonized_headline );
- $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
+ $tocline = trim( $canonized_headline );
+ $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
$refer[$headlineCount] = $canonized_headline;
-
+
# count how many in assoc. array so we can track dupes in anchors
@$refers[$canonized_headline]++;
$refcount[$headlineCount]=$refers[$canonized_headline];
# Prepend the number to the heading text
-
+
if( $doNumberHeadings || $doShowToc ) {
$tocline = $numbering . " " . $tocline;
-
+
# Don't number the heading if it is the only one (looks silly)
if( $doNumberHeadings && count( $matches[3] ) > 1) {
# the two are different if the line contains a link
$headline=$numbering . " " . $headline;
}
}
-
+
# Create the anchor for linking from the TOC to the section
$anchor = $canonized_headline;
if($refcount[$headlineCount] > 1 ) {
}
$head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
}
-
+
# Add the edit section span
if( $rightClickHack ) {
- $headline = $sk->editSectionScript($headlineCount+1,$headline);
+ $headline = $sk->editSectionScript($headlineCount+1,$headline);
}
# give headline the correct <h#> tag
@$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
-
+
$headlineCount++;
- }
+ }
if( $doShowToc ) {
$toclines = $headlineCount;
}
# split up and insert constructed headlines
-
+
$blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
$i = 0;
foreach( $blocks as $block ) {
if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
- # This is the [edit] link that appears for the top block of text when
+ # This is the [edit] link that appears for the top block of text when
# section editing is enabled
- $full .= $sk->editSectionLink(0);
+
+ # Disabled because it broke block formatting
+ # For example, a bullet point in the top line
+ # $full .= $sk->editSectionLink(0);
}
$full .= $block;
if( $doShowToc && !$i) {
}
$i++;
}
-
+
return $full;
}
}
$num = str_replace( "-", "", $isbn );
$num = str_replace( " ", "", $num );
-
+
if ( "" == $num ) {
$text = "ISBN $blank$x";
} else {
$rfc .= $x{0};
$x = substr( $x, 1 );
}
-
+
if ( "" == $rfc ) {
$text .= "RFC $blank$x";
} else {
$this->mOptions = $options;
$this->mTitle =& $title;
$this->mOutputType = OT_WIKI;
-
+
if ( $clearState ) {
$this->clearState();
}
-
+
$stripState = false;
$pairs = array(
"\r\n" => "\n",
} else {
$text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
}
-
+
/*
$mw =& MagicWord::get( MAG_SUBST );
$wgCurParser = $this->fork();
$text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
$this->merge( $wgCurParser );
*/
-
+
# Trim trailing whitespace
- # MAG_END (__END__) tag allows for trailing
+ # MAG_END (__END__) tag allows for trailing
# whitespace to be deliberately included
$text = rtrim( $text );
$mw =& MagicWord::get( MAG_END );
# Set up some variables which are usually set up in parse()
# so that an external function can call some class members with confidence
- function startExternalParse( &$title, $options, $outputType, $clearState = true )
+ function startExternalParse( &$title, $options, $outputType, $clearState = true )
{
$this->mTitle =& $title;
$this->mOptions = $options;
function transformMsg( $text, $options ) {
global $wgTitle;
static $executing = false;
-
+
# Guard against infinite recursion
if ( $executing ) {
return $text;
$this->mOutputType = OT_MSG;
$this->clearState();
$text = $this->replaceVariables( $text );
-
+
$executing = false;
return $text;
}
function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
- /* static */ function newFromUser( &$user )
+ /* static */ function newFromUser( &$user )
{
$popts = new ParserOptions;
- $popts->initialiseFromUser( &$user );
+ $popts->initialiseFromUser( $user );
return $popts;
}
- function initialiseFromUser( &$userInput )
+ function initialiseFromUser( &$userInput )
{
global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
-
+
if ( !$userInput ) {
$user = new User;
$user->setLoaded( true );