includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 class Parser
  47 {
  48         # Cleared with clearState():
  49         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  50         var $mVariables, $mIncludeCount;
  51
  52         # Temporary:
  53         var $mOptions, $mTitle, $mOutputType;
  54
  55         function Parser()
  56         {
  57                 $this->clearState();
  58         }
  59
  60         function clearState()
  61         {
  62                 $this->mOutput = new ParserOutput;
  63                 $this->mAutonumber = 0;
  64                 $this->mLastSection = "";
  65                 $this->mDTopen = false;
  66                 $this->mVariables = false;
  67                 $this->mIncludeCount = array();
  68                 $this->mStripState = array();
  69         }
  70
  71         # First pass--just handle <nowiki> sections, pass the rest off
  72         # to doWikiPass2() which does all the real work.
  73         #
  74         # Returns a ParserOutput
  75         #
  76         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  77         {
  78                 $fname = "Parser::parse";
  79                 wfProfileIn( $fname );
  80
  81                 if ( $clearState ) {
  82                         $this->clearState();
  83                 }
  84
  85                 $this->mOptions = $options;
  86                 $this->mTitle =& $title;
  87                 $this->mOutputType = OT_HTML;
  88
  89                 $stripState = NULL;
  90                 $text = $this->strip( $text, $this->mStripState );
  91                 $text = $this->doWikiPass2( $text, $linestart );
  92                 $text = $this->unstrip( $text, $this->mStripState );
  93
  94                 $this->mOutput->setText( $text );
  95                 wfProfileOut( $fname );
  96                 return $this->mOutput;
  97         }
  98
  99         /* static */ function getRandomString()
 100         {
 101                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 102         }
 103
 104         # Replaces all occurences of <$tag>content</$tag> in the text
 105         # with a random marker and returns the new text. the output parameter
 106         # $content will be an associative array filled with data on the form
 107         # $unique_marker => content.
 108
 109         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 110                 $result = array();
 111                 $rnd = $uniq_prefix . Parser::getRandomString();
 112                 $content = array( );
 113                 $n = 1;
 114                 $stripped = "";
 115
 116                 while ( "" != $text ) {
 117                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 118                         $stripped .= $p[0];
 119                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 120                                 $text = "";
 121                         } else {
 122                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 123                                 $marker = $rnd . sprintf("%08X", $n++);
 124                                 $content[$marker] = $q[0];
 125                                 $stripped .= $marker;
 126                                 $text = $q[1];
 127                         }
 128                 }
 129                 return $stripped;
 130         }
 131
 132         # Strips <nowiki>, <pre> and <math>
 133         # Returns the text, and fills an array with data needed in unstrip()
 134         #
 135         function strip( $text, &$state )
 136         {
 137                 $render = ($this->mOutputType == OT_HTML);
 138                 $nowiki_content = array();
 139                 $hiero_content = array();
 140                 $math_content = array();
 141                 $pre_content = array();
 142
 143                 # Replace any instances of the placeholders
 144                 $uniq_prefix = "NaodW29";
 145                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 146
 147                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 148                 foreach( $nowiki_content as $marker => $content ){
 149                         if( $render ){
 150                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 151                         } else {
 152                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 153                         }
 154                 }
 155
 156                 if( $GLOBALS['wgUseWikiHiero'] ){
 157                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 158                         foreach( $hiero_content as $marker => $content ){
 159                                 if( $render ){
 160                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 161                                 } else {
 162                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 163                                 }
 164                         }
 165                 }
 166
 167                 if( $this->mOptions->getUseTeX() ){
 168                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 169                         foreach( $math_content as $marker => $content ){
 170                                 if( $render ){
 171                                         $math_content[$marker] = renderMath( $content );
 172                                 } else {
 173                                         $math_content[$marker] = "<math>$content</math>";
 174                                 }
 175                         }
 176                 }
 177
 178                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 179                 foreach( $pre_content as $marker => $content ){
 180                         if( $render ){
 181                                 $pre_content[$marker] = "\n<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 182                         } else {
 183                                 $pre_content[$marker] = "\n<pre>$content</pre>";
 184                         }
 185                 }
 186
 187                 # Must expand in reverse order, otherwise nested tags will be corrupted
 188                 $state = array( $pre_content, $math_content, $hiero_content, $nowiki_content );
 189                 return $text;
 190         }
 191
 192         function unstrip( $text, &$state )
 193         {
 194                 foreach( $state as $content_dict ){
 195                         foreach( $content_dict as $marker => $content ){
 196                                 $text = str_replace( $marker, $content, $text );
 197                         }
 198                 }
 199                 return $text;
 200         }
 201
 202         function categoryMagic ()
 203         {
 204                 global $wgLang , $wgUser ;
 205                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 206                 $id = $this->mTitle->getArticleID() ;
 207                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 208                 $ti = $this->mTitle->getText() ;
 209                 $ti = explode ( ":" , $ti , 2 ) ;
 210                 if ( $cat != $ti[0] ) return "" ;
 211                 $r = "<br break='all' />\n" ;
 212
 213                 $articles = array() ;
 214                 $parents = array () ;
 215                 $children = array() ;
 216
 217
 218 #               $sk =& $this->mGetSkin();
 219                 $sk =& $wgUser->getSkin() ;
 220
 221                 $data = array () ;
 222                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 223                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 224
 225                 $res = wfQuery ( $sql1, DB_READ ) ;
 226                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 227
 228                 $res = wfQuery ( $sql2, DB_READ ) ;
 229                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 230
 231
 232                 foreach ( $data AS $x )
 233                 {
 234                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 235                         if ( $t != "" ) $t .= ":" ;
 236                         $t .= $x->cur_title ;
 237
 238                         $y = explode ( ":" , $t , 2 ) ;
 239                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 240                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 241                         } else {
 242                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 243                         }
 244                 }
 245                 wfFreeResult ( $res ) ;
 246
 247                 # Children
 248                 if ( count ( $children ) > 0 )
 249                 {
 250                         asort ( $children ) ;
 251                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 252                         $r .= implode ( ", " , $children ) ;
 253                 }
 254
 255                 # Articles
 256                 if ( count ( $articles ) > 0 )
 257                 {
 258                         asort ( $articles ) ;
 259                         $h =  wfMsg( "category_header", $ti[1] );
 260                         $r .= "<h2>{$h}</h2>\n" ;
 261                         $r .= implode ( ", " , $articles ) ;
 262                 }
 263
 264
 265                 return $r ;
 266         }
 267
 268         function getHTMLattrs ()
 269         {
 270                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 271                                 "title", "align", "lang", "dir", "width", "height",
 272                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 273                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 274                                 /* FONT */ "type", "start", "value", "compact",
 275                                 /* For various lists, mostly deprecated but safe */
 276                                 "summary", "width", "border", "frame", "rules",
 277                                 "cellspacing", "cellpadding", "valign", "char",
 278                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 279                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 280                                 "id", "class", "name", "style" /* For CSS */
 281                                 );
 282                 return $htmlattrs ;
 283         }
 284
 285         function fixTagAttributes ( $t )
 286         {
 287                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 288                 $htmlattrs = $this->getHTMLattrs() ;
 289
 290                 # Strip non-approved attributes from the tag
 291                 $t = preg_replace(
 292                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 293                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 294                         $t);
 295                 # Strip javascript "expression" from stylesheets. Brute force approach:
 296                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 297
 298                 if( preg_match(
 299                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 300                         wfMungeToUtf8( $t ) ) )
 301                 {
 302                         $t="";
 303                 }
 304
 305                 return trim ( $t ) ;
 306         }
 307
 308         function doTableStuff ( $t )
 309         {
 310                 $t = explode ( "\n" , $t ) ;
 311                 $td = array () ; # Is currently a td tag open?
 312                         $ltd = array () ; # Was it TD or TH?
 313                         $tr = array () ; # Is currently a tr tag open?
 314                         $ltr = array () ; # tr attributes
 315                         foreach ( $t AS $k => $x )
 316                         {
 317                                 $x = rtrim ( $x ) ;
 318                                 $fc = substr ( $x , 0 , 1 ) ;
 319                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 320                                 {
 321                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 322                                         array_push ( $td , false ) ;
 323                                         array_push ( $ltd , "" ) ;
 324                                         array_push ( $tr , false ) ;
 325                                         array_push ( $ltr , "" ) ;
 326                                 }
 327                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 328                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 329                                 {
 330                                         $z = "</table>\n" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                 }
 337                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 338                                                 {
 339                                                 $z = trim ( substr ( $x , 2 ) ) ;
 340                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 341                                                 }*/
 342                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 343                                 {
 344                                         $x = substr ( $x , 1 ) ;
 345                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 346                                         $z = "" ;
 347                                         $l = array_pop ( $ltd ) ;
 348                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 349                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 350                                         array_pop ( $ltr ) ;
 351                                         $t[$k] = $z ;
 352                                         array_push ( $tr , false ) ;
 353                                         array_push ( $td , false ) ;
 354                                         array_push ( $ltd , "" ) ;
 355                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 356                                 }
 357                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 358                                 {
 359                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 360                                         {
 361                                                 $fc = "+" ;
 362                                                 $x = substr ( $x , 1 ) ;
 363                                         }
 364                                         $after = substr ( $x , 1 ) ;
 365                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 366                                         $after = explode ( "||" , $after ) ;
 367                                         $t[$k] = "" ;
 368                                         foreach ( $after AS $theline )
 369                                         {
 370                                                 $z = "" ;
 371                                                 if ( $fc != "+" )
 372                                                 {
 373                                                         $tra = array_pop ( $ltr ) ;
 374                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 375                                                         array_push ( $tr , true ) ;
 376                                                         array_push ( $ltr , "" ) ;
 377                                                 }
 378
 379                                                 $l = array_pop ( $ltd ) ;
 380                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 381                                                 if ( $fc == "|" ) $l = "td" ;
 382                                                 else if ( $fc == "!" ) $l = "th" ;
 383                                                 else if ( $fc == "+" ) $l = "caption" ;
 384                                                 else $l = "" ;
 385                                                 array_push ( $ltd , $l ) ;
 386                                                 $y = explode ( "|" , $theline , 2 ) ;
 387                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 388                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 389                                                 $t[$k] .= $y ;
 390                                                 array_push ( $td , true ) ;
 391                                         }
 392                                 }
 393                         }
 394
 395                 # Closing open td, tr && table
 396                 while ( count ( $td ) > 0 )
 397                 {
 398                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 399                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 400                         $t[] = "</table>" ;
 401                 }
 402
 403                 $t = implode ( "\n" , $t ) ;
 404                 #               $t = $this->removeHTMLtags( $t );
 405                 return $t ;
 406         }
 407
 408         # Well, OK, it's actually about 14 passes.  But since all the
 409         # hard lifting is done inside PHP's regex code, it probably
 410         # wouldn't speed things up much to add a real parser.
 411         #
 412         function doWikiPass2( $text, $linestart )
 413         {
 414                 $fname = "Parser::doWikiPass2";
 415                 wfProfileIn( $fname );
 416
 417                 $text = $this->removeHTMLtags( $text );
 418                 $text = $this->replaceVariables( $text );
 419
 420                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 421
 422                 $text = $this->doHeadings( $text );
 423
 424                 if($this->mOptions->getUseDynamicDates()) {
 425                         global $wgDateFormatter;
 426                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 427                 }
 428
 429                 $text = $this->replaceExternalLinks( $text );
 430                 $text = $this->doTokenizedParser ( $text );
 431                 $text = $this->doTableStuff ( $text ) ;
 432
 433                 $text = $this->formatHeadings( $text );
 434
 435                 $sk =& $this->mOptions->getSkin();
 436                 $text = $sk->transformContent( $text );
 437                 $fixtags = array(
 438                         "/<hr *>/i" => '<hr/>',
 439                         "/<br *>/i" => '<br/>',
 440                         "/<center *>/i"=>'<span style="text-align:center;">',
 441                         "/<\\/center *>/i" => '</span>'
 442                 );
 443                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 444                 $text = $this->doBlockLevels( $text, $linestart );
 445                 $text .= $this->categoryMagic () ;
 446
 447                 wfProfileOut( $fname );
 448                 return $text;
 449         }
 450
 451
 452         /* private */ function doHeadings( $text )
 453         {
 454                 for ( $i = 6; $i >= 1; --$i ) {
 455                         $h = substr( "======", 0, $i );
 456                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 457                           "<h{$i}>\\1</h{$i}>\\2", $text );
 458                 }
 459                 return $text;
 460         }
 461
 462         # Note: we have to do external links before the internal ones,
 463         # and otherwise take great care in the order of things here, so
 464         # that we don't end up interpreting some URLs twice.
 465
 466         /* private */ function replaceExternalLinks( $text )
 467         {
 468                 $fname = "Parser::replaceExternalLinks";
 469                 wfProfileIn( $fname );
 470                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 471                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 472                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 473                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 474                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 475                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 476                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 477                 wfProfileOut( $fname );
 478                 return $text;
 479         }
 480
 481         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 482         {
 483                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 484                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 485
 486                 # this is  the list of separators that should be ignored if they
 487                 # are the last character of an URL but that should be included
 488                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 489                 # in this case, the last comma should not become part of the URL,
 490                 # but in "www.foo.com/123,2342,32.htm" it should.
 491                 $sep = ",;\.:";
 492                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 493                 $images = "gif|png|jpg|jpeg";
 494
 495                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 496                 # they are interpreted as part of the string (used to tell PHP
 497                 # that the content of the string should be inserted there).
 498                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 499                   "((?i){$images})([^{$uc}]|$)/";
 500
 501                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 502                 $sk =& $this->mOptions->getSkin();
 503
 504                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 505                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 506                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 507                 }
 508                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 509                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 510                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 511                   "</a>\\5", $s );
 512                 $s = str_replace( $unique, $protocol, $s );
 513
 514                 $a = explode( "[{$protocol}:", " " . $s );
 515                 $s = array_shift( $a );
 516                 $s = substr( $s, 1 );
 517
 518                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 519                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 520
 521                 foreach ( $a as $line ) {
 522                         if ( preg_match( $e1, $line, $m ) ) {
 523                                 $link = "{$protocol}:{$m[1]}";
 524                                 $trail = $m[2];
 525                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 526                                 else { $text = wfEscapeHTML( $link ); }
 527                         } else if ( preg_match( $e2, $line, $m ) ) {
 528                                 $link = "{$protocol}:{$m[1]}";
 529                                 $text = $m[2];
 530                                 $trail = $m[3];
 531                         } else {
 532                                 $s .= "[{$protocol}:" . $line;
 533                                 continue;
 534                         }
 535                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 536                                 $paren = "";
 537                         } else {
 538                                 # Expand the URL for printable version
 539                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 540                         }
 541                         $la = $sk->getExternalLinkAttributes( $link, $text );
 542                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 543
 544                 }
 545                 return $s;
 546         }
 547
 548         /* private */ function handle3Quotes( &$state, $token )
 549         {
 550                 if ( $state["strong"] ) {
 551                         if ( $state["em"] && $state["em"] > $state["strong"] )
 552                         {
 553                                 # ''' lala ''lala '''
 554                                 $s = "</em></strong><em>";
 555                         } else {
 556                                 $s = "</strong>";
 557                         }
 558                         $state["strong"] = FALSE;
 559                 } else {
 560                         $s = "<strong>";
 561                         $state["strong"] = $token["pos"];
 562                 }
 563                 return $s;
 564         }
 565
 566         /* private */ function handle2Quotes( &$state, $token )
 567         {
 568                 if ( $state["em"] ) {
 569                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 570                         {
 571                                 # ''lala'''lala'' ....'''
 572                                 $s = "</strong></em><strong>";
 573                         } else {
 574                                 $s = "</em>";
 575                         }
 576                         $state["em"] = FALSE;
 577                 } else {
 578                         $s = "<em>";
 579                         $state["em"] = $token["pos"];
 580                 }
 581                 return $s;
 582         }
 583
 584         /* private */ function handle5Quotes( &$state, $token )
 585         {
 586                 $s = "";
 587                 if ( $state["em"] && $state["strong"] ) {
 588                         if ( $state["em"] < $state["strong"] ) {
 589                                 $s .= "</strong></em>";
 590                         } else {
 591                                 $s .= "</em></strong>";
 592                         }
 593                         $state["strong"] = $state["em"] = FALSE;
 594                 } elseif ( $state["em"] ) {
 595                         $s .= "</em><strong>";
 596                         $state["em"] = FALSE;
 597                         $state["strong"] = $token["pos"];
 598                 } elseif ( $state["strong"] ) {
 599                         $s .= "</strong><em>";
 600                         $state["strong"] = FALSE;
 601                         $state["em"] = $token["pos"];
 602                 } else { # not $em and not $strong
 603                         $s .= "<strong><em>";
 604                         $state["strong"] = $state["em"] = $token["pos"];
 605                 }
 606                 return $s;
 607         }
 608
 609         /* private */ function doTokenizedParser( $str )
 610         {
 611                 global $wgLang; # for language specific parser hook
 612
 613                 $tokenizer=Tokenizer::newFromString( $str );
 614                 $tokenStack = array();
 615
 616                 $s="";
 617                 $state["em"]      = FALSE;
 618                 $state["strong"]  = FALSE;
 619                 $tagIsOpen = FALSE;
 620                 $threeopen = false;
 621
 622                 # The tokenizer splits the text into tokens and returns them one by one.
 623                 # Every call to the tokenizer returns a new token.
 624                 while ( $token = $tokenizer->nextToken() )
 625                 {
 626                         $threeopen = false;
 627                         switch ( $token["type"] )
 628                         {
 629                                 case "text":
 630                                         # simple text with no further markup
 631                                         $txt = $token["text"];
 632                                         break;
 633                                 case "[[[":
 634                                         # remember the tag opened with 3 [
 635                                         $threeopen = true;
 636                                 case "[[":
 637                                         # link opening tag.
 638                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 639                                         $tagIsOpen = TRUE;
 640                                         array_push( $tokenStack, $token );
 641                                         $txt="";
 642                                         break;
 643
 644                                 case "]]]":
 645                                 case "]]":
 646                                         # link close tag.
 647                                         # get text from stack, glue it together, and call the code to handle a
 648                                         # link
 649
 650                                         if ( count( $tokenStack ) == 0 )
 651                                         {
 652                                                 # stack empty. Found a ]] without an opening [[
 653                                                 $txt = "]]";
 654                                         } else {
 655                                                 $linkText = "";
 656                                                 $lastToken = array_pop( $tokenStack );
 657                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 658                                                 {
 659                                                         if( !empty( $lastToken["text"] ) ) {
 660                                                                 $linkText = $lastToken["text"] . $linkText;
 661                                                         }
 662                                                         $lastToken = array_pop( $tokenStack );
 663                                                 }
 664
 665                                                 $txt = $linkText ."]]";
 666
 667                                                 if( isset( $lastToken["text"] ) ) {
 668                                                         $prefix = $lastToken["text"];
 669                                                 } else {
 670                                                         $prefix = "";
 671                                                 }
 672                                                 $nextToken = $tokenizer->previewToken();
 673                                                 if ( $nextToken["type"] == "text" )
 674                                                 {
 675                                                         # Preview just looks at it. Now we have to fetch it.
 676                                                         $nextToken = $tokenizer->nextToken();
 677                                                         $txt .= $nextToken["text"];
 678                                                 }
 679                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 680
 681                                                 # did the tag start with 3 [ ?
 682                                                 if($threeopen) {
 683                                                         # show the first as text
 684                                                         $txt = "[".$txt;
 685                                                         $threeopen=false;
 686                                                 }
 687
 688                                         }
 689                                         $tagIsOpen = (count( $tokenStack ) != 0);
 690                                         break;
 691                                 case "----":
 692                                         $txt = "\n<hr />\n";
 693                                         break;
 694                                 case "'''":
 695                                         # This and the three next ones handle quotes
 696                                         $txt = $this->handle3Quotes( $state, $token );
 697                                         break;
 698                                 case "''":
 699                                         $txt = $this->handle2Quotes( $state, $token );
 700                                         break;
 701                                 case "'''''":
 702                                         $txt = $this->handle5Quotes( $state, $token );
 703                                         break;
 704                                 case "":
 705                                         # empty token
 706                                         $txt="";
 707                                         break;
 708                                 case "RFC ":
 709                                         if ( $tagIsOpen ) {
 710                                                 $txt = "RFC ";
 711                                         } else {
 712                                                 $txt = $this->doMagicRFC( $tokenizer );
 713                                         }
 714                                         break;
 715                                 case "ISBN ":
 716                                         if ( $tagIsOpen ) {
 717                                                 $txt = "ISBN ";
 718                                         } else {
 719                                                 $txt = $this->doMagicISBN( $tokenizer );
 720                                         }
 721                                         break;
 722                                 default:
 723                                         # Call language specific Hook.
 724                                         $txt = $wgLang->processToken( $token, $tokenStack );
 725                                         if ( NULL == $txt ) {
 726                                                 # An unkown token. Highlight.
 727                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 728                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 729                                         }
 730                                         break;
 731                         }
 732                         # If we're parsing the interior of a link, don't append the interior to $s,
 733                         # but push it to the stack so it can be processed when a ]] token is found.
 734                         if ( $tagIsOpen  && $txt != "" ) {
 735                                 $token["type"] = "text";
 736                                 $token["text"] = $txt;
 737                                 array_push( $tokenStack, $token );
 738                         } else {
 739                                 $s .= $txt;
 740                         }
 741                 } #end while
 742                 if ( count( $tokenStack ) != 0 )
 743                 {
 744                         # still objects on stack. opened [[ tag without closing ]] tag.
 745                         $txt = "";
 746                         while ( $lastToken = array_pop( $tokenStack ) )
 747                         {
 748                                 if ( $lastToken["type"] == "text" )
 749                                 {
 750                                         $txt = $lastToken["text"] . $txt;
 751                                 } else {
 752                                         $txt = $lastToken["type"] . $txt;
 753                                 }
 754                         }
 755                         $s .= $txt;
 756                 }
 757                 return $s;
 758         }
 759
 760         /* private */ function handleInternalLink( $line, $prefix )
 761         {
 762                 global $wgLang, $wgLinkCache;
 763                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 764                 static $fname = "Parser::handleInternalLink" ;
 765                 wfProfileIn( $fname );
 766
 767                 wfProfileIn( "$fname-setup" );
 768                 static $tc = FALSE;
 769                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 770                 $sk =& $this->mOptions->getSkin();
 771
 772                 # Match a link having the form [[namespace:link|alternate]]trail
 773                 static $e1 = FALSE;
 774                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 775                 # Match the end of a line for a word that's not followed by whitespace,
 776                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 777                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 778                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 779                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 780
 781
 782                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 783                 static $image = FALSE;
 784                 static $special = FALSE;
 785                 static $media = FALSE;
 786                 static $category = FALSE;
 787                 if ( !$image ) { $image = Namespace::getImage(); }
 788                 if ( !$special ) { $special = Namespace::getSpecial(); }
 789                 if ( !$media ) { $media = Namespace::getMedia(); }
 790                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 791
 792                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 793
 794                 wfProfileOut( "$fname-setup" );
 795                 $s = "";
 796
 797                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 798                         $text = $m[2];
 799                         $trail = $m[3];
 800                 } else { # Invalid form; output directly
 801                         $s .= $prefix . "[[" . $line ;
 802                         return $s;
 803                 }
 804
 805                 /* Valid link forms:
 806                 Foobar -- normal
 807                 :Foobar -- override special treatment of prefix (images, language links)
 808                 /Foobar -- convert to CurrentPage/Foobar
 809                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 810                 */
 811                 $c = substr($m[1],0,1);
 812                 $noforce = ($c != ":");
 813                 if( $c == "/" ) { # subpage
 814                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 815                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 816                                 $noslash=$m[1];
 817                         } else {
 818                                 $noslash=substr($m[1],1);
 819                         }
 820                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 821                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 822                                 if( "" == $text ) {
 823                                         $text= $m[1];
 824                                 } # this might be changed for ugliness reasons
 825                         } else {
 826                                 $link = $noslash; # no subpage allowed, use standard link
 827                         }
 828                 } elseif( $noforce ) { # no subpage
 829                         $link = $m[1];
 830                 } else {
 831                         $link = substr( $m[1], 1 );
 832                 }
 833                 if( "" == $text )
 834                         $text = $link;
 835
 836                 $nt = Title::newFromText( $link );
 837                 if( !$nt ) {
 838                         $s .= $prefix . "[[" . $line;
 839                         return $s;
 840                 }
 841                 $ns = $nt->getNamespace();
 842                 $iw = $nt->getInterWiki();
 843                 if( $noforce ) {
 844                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 845                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 846                                 $s .= $prefix . $trail;
 847                                 return $s;
 848                         }
 849                         if( $ns == $image ) {
 850                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 851                                 $wgLinkCache->addImageLinkObj( $nt );
 852                                 return $s;
 853                         }
 854                 }
 855                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 856                     ( strpos( $link, "#" ) == FALSE ) ) {
 857                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 858                         return $s;
 859                 }
 860
 861                 # Category feature
 862                 $catns = strtoupper ( $nt->getDBkey () ) ;
 863                 $catns = explode ( ":" , $catns ) ;
 864                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 865                 else $catns = "" ;
 866                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 867                         $t = explode ( ":" , $nt->getText() ) ;
 868                         array_shift ( $t ) ;
 869                         $t = implode ( ":" , $t ) ;
 870                         $t = $wgLang->ucFirst ( $t ) ;
 871                         $nnt = Title::newFromText ( $category.":".$t ) ;
 872                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 873                         $this->mOutput->mCategoryLinks[] = $t ;
 874                         $s .= $prefix . $trail ;
 875                         return $s ;
 876                 }
 877
 878                 if( $ns == $media ) {
 879                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 880                         $wgLinkCache->addImageLinkObj( $nt );
 881                         return $s;
 882                 } elseif( $ns == $special ) {
 883                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 884                         return $s;
 885                 }
 886                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 887
 888                 wfProfileOut( $fname );
 889                 return $s;
 890         }
 891
 892         # Some functions here used by doBlockLevels()
 893         #
 894         /* private */ function closeParagraph()
 895         {
 896                 $result = "";
 897                 if ( '' != $this->mLastSection ) {
 898                         $result = "</" . $this->mLastSection  . ">";
 899                 }
 900                 $this->mLastSection = "";
 901                 return $result."\n";
 902         }
 903         # getCommon() returns the length of the longest common substring
 904         # of both arguments, starting at the beginning of both.
 905         #
 906         /* private */ function getCommon( $st1, $st2 )
 907         {
 908                 $fl = strlen( $st1 );
 909                 $shorter = strlen( $st2 );
 910                 if ( $fl < $shorter ) { $shorter = $fl; }
 911
 912                 for ( $i = 0; $i < $shorter; ++$i ) {
 913                         if ( $st1{$i} != $st2{$i} ) { break; }
 914                 }
 915                 return $i;
 916         }
 917         # These next three functions open, continue, and close the list
 918         # element appropriate to the prefix character passed into them.
 919         #
 920         /* private */ function openList( $char )
 921     {
 922                 $result = $this->closeParagraph();
 923
 924                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 925                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 926                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 927                 else if ( ";" == $char ) {
 928                         $result .= "<dl><dt>";
 929                         $this->mDTopen = true;
 930                 }
 931                 else { $result = "<!-- ERR 1 -->"; }
 932
 933                 return $result;
 934         }
 935
 936         /* private */ function nextItem( $char )
 937         {
 938                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 939                 else if ( ":" == $char || ";" == $char ) {
 940                         $close = "</dd>";
 941                         if ( $this->mDTopen ) { $close = "</dt>"; }
 942                         if ( ";" == $char ) {
 943                                 $this->mDTopen = true;
 944                                 return $close . "<dt>";
 945                         } else {
 946                                 $this->mDTopen = false;
 947                                 return $close . "<dd>";
 948                         }
 949                 }
 950                 return "<!-- ERR 2 -->";
 951         }
 952
 953         /* private */function closeList( $char )
 954         {
 955                 if ( "*" == $char ) { $text = "</li></ul>"; }
 956                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 957                 else if ( ":" == $char ) {
 958                         if ( $this->mDTopen ) {
 959                                 $this->mDTopen = false;
 960                                 $text = "</dt></dl>";
 961                         } else {
 962                                 $text = "</dd></dl>";
 963                         }
 964                 }
 965                 else {  return "<!-- ERR 3 -->"; }
 966                 return $text."\n";
 967         }
 968
 969         /* private */ function doBlockLevels( $text, $linestart )
 970         {
 971                 $fname = "Parser::doBlockLevels";
 972                 wfProfileIn( $fname );
 973                 # Parsing through the text line by line.  The main thing
 974                 # happening here is handling of block-level elements p, pre,
 975                 # and making lists from lines starting with * # : etc.
 976                 #
 977                 $a = explode( "\n", $text );
 978                 $a[0] = "\n".$a[0];
 979                 $lastPref = $text = '';
 980                 $this->mDTopen = $inBlockElem = false;
 981
 982                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 983                 foreach ( $a as $t ) {
 984                         if ( "" != $text ) { $text .= "\n"; }
 985
 986                         $oLine = $t;
 987                         $opl = strlen( $lastPref );
 988                         $npl = strspn( $t, "*#:;" );
 989                         $pref = substr( $t, 0, $npl );
 990                         $pref2 = str_replace( ";", ":", $pref );
 991                         $t = substr( $t, $npl );
 992
 993                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 994                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 995
 996                                 if ( ";" == substr( $pref, -1 ) ) {
 997                                         $cpos = strpos( $t, ":" );
 998                                         if ( ! ( false === $cpos ) ) {
 999                                                 $term = substr( $t, 0, $cpos );
1000                                                 $text .= $term . $this->nextItem( ":" );
1001                                                 $t = substr( $t, $cpos + 1 );
1002                                         }
1003                                 }
1004                         } else if (0 != $npl || 0 != $opl) {
1005                                 $cpl = $this->getCommon( $pref, $lastPref );
1006
1007                                 while ( $cpl < $opl ) {
1008                                         $text .= $this->closeList( $lastPref{$opl-1} );
1009                                         --$opl;
1010                                 }
1011                                 if ( $npl <= $cpl && $cpl > 0 ) {
1012                                         $text .= $this->nextItem( $pref{$cpl-1} );
1013                                 }
1014                                 while ( $npl > $cpl ) {
1015                                         $char = substr( $pref, $cpl, 1 );
1016                                         $text .= $this->openList( $char );
1017
1018                                         if ( ";" == $char ) {
1019                                                 $cpos = strpos( $t, ":" );
1020                                                 if ( ! ( false === $cpos ) ) {
1021                                                         $term = substr( $t, 0, $cpos );
1022                                                         $text .= $term . $this->nextItem( ":" );
1023                                                         $t = substr( $t, $cpos + 1 );
1024                                                 }
1025                                         }
1026                                         ++$cpl;
1027                                 }
1028                                 $lastPref = $pref2;
1029                         }
1030                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
1031                                 if ( preg_match(
1032                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre)/i", $t ) ) {
1033                                         $text .= $this->closeParagraph();
1034                                         $inBlockElem = true;
1035                                 } else if ( preg_match("/(<hr|<\\/td)/i", $t ) ) {
1036                                         $text .= $this->closeParagraph();
1037                                         $inBlockElem = false;
1038                                 }
1039                                 if ( ! $inBlockElem ) {
1040                                         if ( " " == $t{0} ) {
1041                                                 $newSection = "pre";
1042                                                 $text .= $this->closeParagraph();
1043                                                 # $t = wfEscapeHTML( $t );
1044                                         }
1045                                         else { $newSection = "p"; }
1046
1047                                         if ( '' == trim( $oLine ) ) {
1048                                                 if ( $this->mLastSection != 'p') {
1049                                                         $text .= $this->closeParagraph();
1050                                                         $text .= "<" . $newSection . ">";
1051                                                         $this->mLastSection = $newSection;
1052                                                 } else if ( $this->mLastSection == 'p') {
1053                                                         $text .= '<br />';
1054                                                 }
1055                                         } else if ( $this->mLastSection == $newSection and $newSection != 'p' ) {
1056                                                 $text .= $this->closeParagraph();
1057                                                 $text .= "<" . $newSection . ">";
1058                                                 $this->mLastSection = $newSection;
1059                                         }
1060                                 }
1061                                 if ( $inBlockElem &&
1062                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|<\\/p<\\/div|<\\/pre)/i", $t ) ) {
1063                                         $inBlockElem = false;
1064                                 }
1065                         }
1066                         $text .= $t;
1067                 }
1068                 while ( $npl ) {
1069                         $text .= $this->closeList( $pref2{$npl-1} );
1070                         --$npl;
1071                 }
1072                 if ( "" != $this->mLastSection ) {
1073                         $text .= "</" . $this->mLastSection . ">";
1074                         $this->mLastSection = "";
1075                 }
1076                 wfProfileOut( $fname );
1077                 return $text;
1078         }
1079
1080         function getVariableValue( $index ) {
1081                 global $wgLang, $wgSitename, $wgServer;
1082
1083                 switch ( $index ) {
1084                         case MAG_CURRENTMONTH:
1085                                 return date( "m" );
1086                         case MAG_CURRENTMONTHNAME:
1087                                 return $wgLang->getMonthName( date("n") );
1088                         case MAG_CURRENTMONTHNAMEGEN:
1089                                 return $wgLang->getMonthNameGen( date("n") );
1090                         case MAG_CURRENTDAY:
1091                                 return date("j");
1092                         case MAG_CURRENTDAYNAME:
1093                                 return $wgLang->getWeekdayName( date("w")+1 );
1094                         case MAG_CURRENTYEAR:
1095                                 return date( "Y" );
1096                         case MAG_CURRENTTIME:
1097                                 return $wgLang->time( wfTimestampNow(), false );
1098                         case MAG_NUMBEROFARTICLES:
1099                                 return wfNumberOfArticles();
1100                         case MAG_SITENAME:
1101                                 return $wgSitename;
1102                         case MAG_SERVER:
1103                                 return $wgServer;
1104                         default:
1105                                 return NULL;
1106                 }
1107         }
1108
1109         function initialiseVariables()
1110         {
1111                 global $wgVariableIDs;
1112                 $this->mVariables = array();
1113                 foreach ( $wgVariableIDs as $id ) {
1114                         $mw =& MagicWord::get( $id );
1115                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1116                 }
1117         }
1118
1119         /* private */ function replaceVariables( $text )
1120         {
1121                 global $wgLang, $wgCurParser;
1122                 global $wgScript, $wgArticlePath;
1123
1124                 $fname = "Parser::replaceVariables";
1125                 wfProfileIn( $fname );
1126
1127                 $bail = false;
1128                 if ( !$this->mVariables ) {
1129                         $this->initialiseVariables();
1130                 }
1131                 $titleChars = Title::legalChars();
1132                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1133
1134                 # "Recursive" variable expansion: run it through a couple of passes
1135                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1136                         $oldText = $text;
1137
1138                         # It's impossible to rebind a global in PHP
1139                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1140                         $wgCurParser = $this->fork();
1141
1142                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1143                         if ( $oldText == $text ) {
1144                                 $bail = true;
1145                         }
1146                         $this->merge( $wgCurParser );
1147                 }
1148
1149                 return $text;
1150         }
1151
1152         # Returns a copy of this object except with various variables cleared
1153         # This copy can be re-merged with the parent after operations on the copy
1154         function fork()
1155         {
1156                 $copy = $this;
1157                 $copy->mOutput = new ParserOutput;
1158                 return $copy;
1159         }
1160
1161         # Merges a copy split off with fork()
1162         function merge( &$copy )
1163         {
1164                 $this->mOutput->merge( $copy->mOutput );
1165
1166                 # Merge include throttling arrays
1167                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1168                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1169                                 $this->mIncludeCount[$dbk] += $count;
1170                         } else {
1171                                 $this->mIncludeCount[$dbk] = $count;
1172                         }
1173                 }
1174         }
1175
1176         function braceSubstitution( $matches )
1177         {
1178                 global $wgLinkCache, $wgLang;
1179                 $fname = "Parser::braceSubstitution";
1180                 $found = false;
1181                 $nowiki = false;
1182
1183                 $text = $matches[1];
1184
1185                 # SUBST
1186                 $mwSubst =& MagicWord::get( MAG_SUBST );
1187                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1188                         if ( $this->mOutputType != OT_WIKI ) {
1189                                 # Invalid SUBST not replaced at PST time
1190                                 # Return without further processing
1191                                 $text = $matches[0];
1192                                 $found = true;
1193                         }
1194                 } elseif ( $this->mOutputType == OT_WIKI ) {
1195                         # SUBST not found in PST pass, do nothing
1196                         $text = $matches[0];
1197                         $found = true;
1198                 }
1199
1200                 # MSG, MSGNW and INT
1201                 if ( !$found ) {
1202                         # Check for MSGNW:
1203                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1204                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1205                                 $nowiki = true;
1206                         } else {
1207                                 # Remove obsolete MSG:
1208                                 $mwMsg =& MagicWord::get( MAG_MSG );
1209                                 $mwMsg->matchStartAndRemove( $text );
1210                         }
1211
1212                         # Check if it is an internal message
1213                         $mwInt =& MagicWord::get( MAG_INT );
1214                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1215                                 $text = wfMsg( $text );
1216                                 $found = true;
1217                         }
1218                 }
1219
1220                 # NS
1221                 if ( !$found ) {
1222                         # Check for NS: (namespace expansion)
1223                         $mwNs = MagicWord::get( MAG_NS );
1224                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1225                                 if ( intval( $text ) ) {
1226                                         $text = $wgLang->getNsText( intval( $text ) );
1227                                         $found = true;
1228                                 } else {
1229                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1230                                         if ( !is_null( $index ) ) {
1231                                                 $text = $wgLang->getNsText( $index );
1232                                                 $found = true;
1233                                         }
1234                                 }
1235                         }
1236                 }
1237
1238                 # LOCALURL and LOCALURLE
1239                 if ( !$found ) {
1240                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1241                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1242
1243                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1244                                 $func = 'getLocalURL';
1245                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1246                                 $func = 'escapeLocalURL';
1247                         } else {
1248                                 $func = '';
1249                         }
1250
1251                         if ( $func !== '' ) {
1252                                 $args = explode( "|", $text );
1253                                 $n = count( $args );
1254                                 if ( $n > 0 ) {
1255                                         $title = Title::newFromText( $args[0] );
1256                                         if ( !is_null( $title ) ) {
1257                                                 if ( $n > 1 ) {
1258                                                         $text = $title->$func( $args[1] );
1259                                                 } else {
1260                                                         $text = $title->$func();
1261                                                 }
1262                                                 $found = true;
1263                                         }
1264                                 }
1265                         }
1266                 }
1267
1268                 # Check for a match against internal variables
1269                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1270                         $text = $this->mVariables[$text];
1271                         $found = true;
1272                         $this->mOutput->mContainsOldMagic = true;
1273                 }
1274
1275                 # Load from database
1276                 if ( !$found ) {
1277                         $title = Title::newFromText( $text, NS_TEMPLATE );
1278                         if ( !is_null( $text ) && !$title->isExternal() ) {
1279                                 # Check for excessive inclusion
1280                                 $dbk = $title->getPrefixedDBkey();
1281                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1282                                         $this->mIncludeCount[$dbk] = 0;
1283                                 }
1284                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1285                                         $article = new Article( $title );
1286                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1287                                         if ( $articleContent !== false ) {
1288                                                 $found = true;
1289                                                 $text = $articleContent;
1290
1291                                                 # Escaping and link table handling
1292                                                 # Not required for preSaveTransform()
1293                                                 if ( $this->mOutputType == OT_HTML ) {
1294                                                         if ( $nowiki ) {
1295                                                                 $text = wfEscapeWikiText( $text );
1296                                                         } else {
1297                                                                 $text = $this->removeHTMLtags( $text );
1298                                                         }
1299                                                         $wgLinkCache->suspend();
1300                                                         $text = $this->doTokenizedParser( $text );
1301                                                         $wgLinkCache->resume();
1302                                                         $wgLinkCache->addLinkObj( $title );
1303
1304                                                 }
1305                                         }
1306                                 }
1307
1308                                 # If the title is valid but undisplayable, make a link to it
1309                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1310                                         $text = "[[" . $title->getPrefixedText() . "]]";
1311                                         $found = true;
1312                                 }
1313                         }
1314                 }
1315
1316                 if ( !$found ) {
1317                         return $matches[0];
1318                 } else {
1319                         return $text;
1320                 }
1321         }
1322
1323         # Cleans up HTML, removes dangerous tags and attributes
1324         /* private */ function removeHTMLtags( $text )
1325         {
1326                 $fname = "Parser::removeHTMLtags";
1327                 wfProfileIn( $fname );
1328                 $htmlpairs = array( # Tags that must be closed
1329                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1330                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1331                         "strike", "strong", "tt", "var", "div", "center",
1332                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1333                         "ruby", "rt" , "rb" , "rp", "p"
1334                 );
1335                 $htmlsingle = array(
1336                         "br", "hr", "li", "dt", "dd"
1337                 );
1338                 $htmlnest = array( # Tags that can be nested--??
1339                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1340                         "dl", "font", "big", "small", "sub", "sup"
1341                 );
1342                 $tabletags = array( # Can only appear inside table
1343                         "td", "th", "tr"
1344                 );
1345
1346                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1347                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1348
1349                 $htmlattrs = $this->getHTMLattrs () ;
1350
1351                 # Remove HTML comments
1352                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1353
1354                 $bits = explode( "<", $text );
1355                 $text = array_shift( $bits );
1356                 $tagstack = array(); $tablestack = array();
1357
1358                 foreach ( $bits as $x ) {
1359                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1360                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1361                           $x, $regs );
1362                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1363                         error_reporting( $prev );
1364
1365                         $badtag = 0 ;
1366                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1367                                 # Check our stack
1368                                 if ( $slash ) {
1369                                         # Closing a tag...
1370                                         if ( ! in_array( $t, $htmlsingle ) &&
1371                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1372                                                 array_push( $tagstack, $ot );
1373                                                 $badtag = 1;
1374                                         } else {
1375                                                 if ( $t == "table" ) {
1376                                                         $tagstack = array_pop( $tablestack );
1377                                                 }
1378                                                 $newparams = "";
1379                                         }
1380                                 } else {
1381                                         # Keep track for later
1382                                         if ( in_array( $t, $tabletags ) &&
1383                                           ! in_array( "table", $tagstack ) ) {
1384                                                 $badtag = 1;
1385                                         } else if ( in_array( $t, $tagstack ) &&
1386                                           ! in_array ( $t , $htmlnest ) ) {
1387                                                 $badtag = 1 ;
1388                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1389                                                 if ( $t == "table" ) {
1390                                                         array_push( $tablestack, $tagstack );
1391                                                         $tagstack = array();
1392                                                 }
1393                                                 array_push( $tagstack, $t );
1394                                         }
1395                                         # Strip non-approved attributes from the tag
1396                                         $newparams = $this->fixTagAttributes($params);
1397
1398                                 }
1399                                 if ( ! $badtag ) {
1400                                         $rest = str_replace( ">", "&gt;", $rest );
1401                                         $text .= "<$slash$t $newparams$brace$rest";
1402                                         continue;
1403                                 }
1404                         }
1405                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1406                 }
1407                 # Close off any remaining tags
1408                 while ( $t = array_pop( $tagstack ) ) {
1409                         $text .= "</$t>\n";
1410                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1411                 }
1412                 wfProfileOut( $fname );
1413                 return $text;
1414         }
1415
1416 /*
1417  *
1418  * This function accomplishes several tasks:
1419  * 1) Auto-number headings if that option is enabled
1420  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1421  * 3) Add a Table of contents on the top for users who have enabled the option
1422  * 4) Auto-anchor headings
1423  *
1424  * It loops through all headlines, collects the necessary data, then splits up the
1425  * string and re-inserts the newly formatted headlines.
1426  *
1427  */
1428
1429         /* private */ function formatHeadings( $text )
1430         {
1431                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1432                 $doShowToc = $this->mOptions->getShowToc();
1433                 if( !$this->mTitle->userCanEdit() ) {
1434                         $showEditLink = 0;
1435                         $rightClickHack = 0;
1436                 } else {
1437                         $showEditLink = $this->mOptions->getEditSection();
1438                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1439                 }
1440
1441                 # Inhibit editsection links if requested in the page
1442                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1443                 if( $esw->matchAndRemove( $text ) ) {
1444                         $showEditLink = 0;
1445                 }
1446                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1447                 # do not add TOC
1448                 $mw =& MagicWord::get( MAG_NOTOC );
1449                 if( $mw->matchAndRemove( $text ) ) {
1450                         $doShowToc = 0;
1451                 }
1452
1453                 # never add the TOC to the Main Page. This is an entry page that should not
1454                 # be more than 1-2 screens large anyway
1455                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1456                         $doShowToc = 0;
1457                 }
1458
1459                 # Get all headlines for numbering them and adding funky stuff like [edit]
1460                 # links - this is for later, but we need the number of headlines right now
1461                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1462
1463                 # if there are fewer than 4 headlines in the article, do not show TOC
1464                 if( $numMatches < 4 ) {
1465                         $doShowToc = 0;
1466                 }
1467
1468                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1469                 # override above conditions and always show TOC
1470                 $mw =& MagicWord::get( MAG_FORCETOC );
1471                 if ($mw->matchAndRemove( $text ) ) {
1472                         $doShowToc = 1;
1473                 }
1474
1475
1476                 # We need this to perform operations on the HTML
1477                 $sk =& $this->mOptions->getSkin();
1478
1479                 # headline counter
1480                 $headlineCount = 0;
1481
1482                 # Ugh .. the TOC should have neat indentation levels which can be
1483                 # passed to the skin functions. These are determined here
1484                 $toclevel = 0;
1485                 $toc = "";
1486                 $full = "";
1487                 $head = array();
1488                 $sublevelCount = array();
1489                 $level = 0;
1490                 $prevlevel = 0;
1491                 foreach( $matches[3] as $headline ) {
1492                         $numbering = "";
1493                         if( $level ) {
1494                                 $prevlevel = $level;
1495                         }
1496                         $level = $matches[1][$headlineCount];
1497                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1498                                 # reset when we enter a new level
1499                                 $sublevelCount[$level] = 0;
1500                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1501                                 $toclevel += $level - $prevlevel;
1502                         }
1503                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1504                                 # reset when we step back a level
1505                                 $sublevelCount[$level+1]=0;
1506                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1507                                 $toclevel -= $prevlevel - $level;
1508                         }
1509                         # count number of headlines for each level
1510                         @$sublevelCount[$level]++;
1511                         if( $doNumberHeadings || $doShowToc ) {
1512                                 $dot = 0;
1513                                 for( $i = 1; $i <= $level; $i++ ) {
1514                                         if( !empty( $sublevelCount[$i] ) ) {
1515                                                 if( $dot ) {
1516                                                         $numbering .= ".";
1517                                                 }
1518                                                 $numbering .= $sublevelCount[$i];
1519                                                 $dot = 1;
1520                                         }
1521                                 }
1522                         }
1523
1524                         # The canonized header is a version of the header text safe to use for links
1525                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1526                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1527
1528                         # strip out HTML
1529                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1530                         $tocline = trim( $canonized_headline );
1531                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1532                         $refer[$headlineCount] = $canonized_headline;
1533
1534                         # count how many in assoc. array so we can track dupes in anchors
1535                         @$refers[$canonized_headline]++;
1536                         $refcount[$headlineCount]=$refers[$canonized_headline];
1537
1538                         # Prepend the number to the heading text
1539
1540                         if( $doNumberHeadings || $doShowToc ) {
1541                                 $tocline = $numbering . " " . $tocline;
1542
1543                                 # Don't number the heading if it is the only one (looks silly)
1544                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1545                                         # the two are different if the line contains a link
1546                                         $headline=$numbering . " " . $headline;
1547                                 }
1548                         }
1549
1550                         # Create the anchor for linking from the TOC to the section
1551                         $anchor = $canonized_headline;
1552                         if($refcount[$headlineCount] > 1 ) {
1553                                 $anchor .= "_" . $refcount[$headlineCount];
1554                         }
1555                         if( $doShowToc ) {
1556                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1557                         }
1558                         if( $showEditLink ) {
1559                                 if ( empty( $head[$headlineCount] ) ) {
1560                                         $head[$headlineCount] = "";
1561                                 }
1562                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1563                         }
1564
1565                         # Add the edit section span
1566                         if( $rightClickHack ) {
1567                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1568                         }
1569
1570                         # give headline the correct <h#> tag
1571                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1572
1573                         $headlineCount++;
1574                 }
1575
1576                 if( $doShowToc ) {
1577                         $toclines = $headlineCount;
1578                         $toc .= $sk->tocUnindent( $toclevel );
1579                         $toc = $sk->tocTable( $toc );
1580                 }
1581
1582                 # split up and insert constructed headlines
1583
1584                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1585                 $i = 0;
1586
1587                 foreach( $blocks as $block ) {
1588                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1589                             # This is the [edit] link that appears for the top block of text when
1590                                 # section editing is enabled
1591                                 $full .= $sk->editSectionLink(0);
1592                         }
1593                         $full .= $block;
1594                         if( $doShowToc && !$i) {
1595                                 # Let's add a top anchor just in case we want to link to the top of the page
1596                                 $full = "<a name=\"top\"></a>".$full.$toc;
1597                         }
1598
1599                         if( !empty( $head[$i] ) ) {
1600                                 $full .= $head[$i];
1601                         }
1602                         $i++;
1603                 }
1604
1605                 return $full;
1606         }
1607
1608         /* private */ function doMagicISBN( &$tokenizer )
1609         {
1610                 global $wgLang;
1611
1612                 # Check whether next token is a text token
1613                 # If yes, fetch it and convert the text into a
1614                 # Special::BookSources link
1615                 $token = $tokenizer->previewToken();
1616                 while ( $token["type"] == "" )
1617                 {
1618                         $tokenizer->nextToken();
1619                         $token = $tokenizer->previewToken();
1620                 }
1621                 if ( $token["type"] == "text" )
1622                 {
1623                         $token = $tokenizer->nextToken();
1624                         $x = $token["text"];
1625                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1626
1627                         $isbn = $blank = "" ;
1628                         while ( " " == $x{0} ) {
1629                                 $blank .= " ";
1630                                 $x = substr( $x, 1 );
1631                         }
1632                         while ( strstr( $valid, $x{0} ) != false ) {
1633                                 $isbn .= $x{0};
1634                                 $x = substr( $x, 1 );
1635                         }
1636                         $num = str_replace( "-", "", $isbn );
1637                         $num = str_replace( " ", "", $num );
1638
1639                         if ( "" == $num ) {
1640                                 $text = "ISBN $blank$x";
1641                         } else {
1642                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1643                                 $text = "<a href=\"" .
1644                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1645                                         "\" class=\"internal\">ISBN $isbn</a>";
1646                                 $text .= $x;
1647                         }
1648                 } else {
1649                         $text = "ISBN ";
1650                 }
1651                 return $text;
1652         }
1653         /* private */ function doMagicRFC( &$tokenizer )
1654         {
1655                 global $wgLang;
1656
1657                 # Check whether next token is a text token
1658                 # If yes, fetch it and convert the text into a
1659                 # link to an RFC source
1660                 $token = $tokenizer->previewToken();
1661                 while ( $token["type"] == "" )
1662                 {
1663                         $tokenizer->nextToken();
1664                         $token = $tokenizer->previewToken();
1665                 }
1666                 if ( $token["type"] == "text" )
1667                 {
1668                         $token = $tokenizer->nextToken();
1669                         $x = $token["text"];
1670                         $valid = "0123456789";
1671
1672                         $rfc = $blank = "" ;
1673                         while ( " " == $x{0} ) {
1674                                 $blank .= " ";
1675                                 $x = substr( $x, 1 );
1676                         }
1677                         while ( strstr( $valid, $x{0} ) != false ) {
1678                                 $rfc .= $x{0};
1679                                 $x = substr( $x, 1 );
1680                         }
1681
1682                         if ( "" == $rfc ) {
1683                                 $text .= "RFC $blank$x";
1684                         } else {
1685                                 $url = wfmsg( "rfcurl" );
1686                                 $url = str_replace( "$1", $rfc, $url);
1687                                 $sk =& $this->mOptions->getSkin();
1688                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1689                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1690                         }
1691                 } else {
1692                         $text = "RFC ";
1693                 }
1694                 return $text;
1695         }
1696
1697         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1698         {
1699                 $this->mOptions = $options;
1700                 $this->mTitle =& $title;
1701                 $this->mOutputType = OT_WIKI;
1702
1703                 if ( $clearState ) {
1704                         $this->clearState();
1705                 }
1706
1707                 $stripState = false;
1708                 $text = str_replace("\r\n", "\n", $text);
1709                 $text = $this->strip( $text, $stripState, false );
1710                 $text = $this->pstPass2( $text, $user );
1711                 $text = $this->unstrip( $text, $stripState );
1712                 return $text;
1713         }
1714
1715         /* private */ function pstPass2( $text, &$user )
1716         {
1717                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1718
1719                 # Variable replacement
1720                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1721                 $text = $this->replaceVariables( $text );
1722
1723                 # Signatures
1724                 #
1725                 $n = $user->getName();
1726                 $k = $user->getOption( "nickname" );
1727                 if ( "" == $k ) { $k = $n; }
1728                 if(isset($wgLocaltimezone)) {
1729                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1730                 }
1731                 /* Note: this is an ugly timezone hack for the European wikis */
1732                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1733                   " (" . date( "T" ) . ")";
1734                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1735
1736                 $text = preg_replace( "/~~~~~/", $d, $text );
1737                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1738                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1739                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1740                   Namespace::getUser() ) . ":$n|$k]]", $text );
1741
1742                 # Context links: [[|name]] and [[name (context)|]]
1743                 #
1744                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1745                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1746                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1747                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1748
1749                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1750                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1751                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1752                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1753                                                                                                                 # [[ns:page (cont)|]]
1754                 $context = "";
1755                 $t = $this->mTitle->getText();
1756                 if ( preg_match( $conpat, $t, $m ) ) {
1757                         $context = $m[2];
1758                 }
1759                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1760                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1761                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1762
1763                 if ( "" == $context ) {
1764                         $text = preg_replace( $p2, "[[\\1]]", $text );
1765                 } else {
1766                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1767                 }
1768
1769                 /*
1770                 $mw =& MagicWord::get( MAG_SUBST );
1771                 $wgCurParser = $this->fork();
1772                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1773                 $this->merge( $wgCurParser );
1774                 */
1775
1776                 # Trim trailing whitespace
1777                 # MAG_END (__END__) tag allows for trailing
1778                 # whitespace to be deliberately included
1779                 $text = rtrim( $text );
1780                 $mw =& MagicWord::get( MAG_END );
1781                 $mw->matchAndRemove( $text );
1782
1783                 return $text;
1784         }
1785
1786         # Set up some variables which are usually set up in parse()
1787         # so that an external function can call some class members with confidence
1788         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1789         {
1790                 $this->mTitle =& $title;
1791                 $this->mOptions = $options;
1792                 $this->mOutputType = $outputType;
1793                 if ( $clearState ) {
1794                         $this->clearState();
1795                 }
1796         }
1797
1798         function transformMsg( $text, $options ) {
1799                 global $wgTitle;
1800                 static $executing = false;
1801
1802                 # Guard against infinite recursion
1803                 if ( $executing ) {
1804                         return $text;
1805                 }
1806                 $executing = true;
1807
1808                 $this->mTitle = $wgTitle;
1809                 $this->mOptions = $options;
1810                 $this->mOutputType = OT_MSG;
1811                 $this->clearState();
1812                 $text = $this->replaceVariables( $text );
1813
1814                 $executing = false;
1815                 return $text;
1816         }
1817 }
1818
1819 class ParserOutput
1820 {
1821         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1822
1823         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1824                 $containsOldMagic = false )
1825         {
1826                 $this->mText = $text;
1827                 $this->mLanguageLinks = $languageLinks;
1828                 $this->mCategoryLinks = $categoryLinks;
1829                 $this->mContainsOldMagic = $containsOldMagic;
1830         }
1831
1832         function getText() { return $this->mText; }
1833         function getLanguageLinks() { return $this->mLanguageLinks; }
1834         function getCategoryLinks() { return $this->mCategoryLinks; }
1835         function containsOldMagic() { return $this->mContainsOldMagic; }
1836         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1837         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1838         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1839         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1840
1841         function merge( $other ) {
1842                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1843                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1844                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1845         }
1846
1847 }
1848
1849 class ParserOptions
1850 {
1851         # All variables are private
1852         var $mUseTeX;                    # Use texvc to expand <math> tags
1853         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1854         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1855         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1856         var $mAllowExternalImages;       # Allow external images inline
1857         var $mSkin;                      # Reference to the preferred skin
1858         var $mDateFormat;                # Date format index
1859         var $mEditSection;               # Create "edit section" links
1860         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1861         var $mNumberHeadings;            # Automatically number headings
1862         var $mShowToc;                   # Show table of contents
1863
1864         function getUseTeX() { return $this->mUseTeX; }
1865         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1866         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1867         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1868         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1869         function getSkin() { return $this->mSkin; }
1870         function getDateFormat() { return $this->mDateFormat; }
1871         function getEditSection() { return $this->mEditSection; }
1872         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1873         function getNumberHeadings() { return $this->mNumberHeadings; }
1874         function getShowToc() { return $this->mShowToc; }
1875
1876         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1877         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1878         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1879         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1880         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1881         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1882         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1883         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1884         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1885         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1886         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1887
1888         /* static */ function newFromUser( &$user )
1889         {
1890                 $popts = new ParserOptions;
1891                 $popts->initialiseFromUser( &$user );
1892                 return $popts;
1893         }
1894
1895         function initialiseFromUser( &$userInput )
1896         {
1897                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1898
1899                 if ( !$userInput ) {
1900                         $user = new User;
1901                         $user->setLoaded( true );
1902                 } else {
1903                         $user =& $userInput;
1904                 }
1905
1906                 $this->mUseTeX = $wgUseTeX;
1907                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1908                 $this->mUseDynamicDates = $wgUseDynamicDates;
1909                 $this->mInterwikiMagic = $wgInterwikiMagic;
1910                 $this->mAllowExternalImages = $wgAllowExternalImages;
1911                 $this->mSkin =& $user->getSkin();
1912                 $this->mDateFormat = $user->getOption( "date" );
1913                 $this->mEditSection = $user->getOption( "editsection" );
1914                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1915                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1916                 $this->mShowToc = $user->getOption( "showtoc" );
1917         }
1918
1919
1920 }
1921
1922 # Regex callbacks, used in Parser::replaceVariables
1923 function wfBraceSubstitution( $matches )
1924 {
1925         global $wgCurParser;
1926         return $wgCurParser->braceSubstitution( $matches );
1927 }
1928
1929 ?>