includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 # prefix for escaping, used in two functions at least
  47 define( "UNIQ_PREFIX", "NaodW29");
  48
  49 class Parser
  50 {
  51         # Cleared with clearState():
  52         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  53         var $mVariables, $mIncludeCount;
  54
  55         # Temporary:
  56         var $mOptions, $mTitle, $mOutputType;
  57
  58         function Parser()
  59         {
  60                 $this->clearState();
  61         }
  62
  63         function clearState()
  64         {
  65                 $this->mOutput = new ParserOutput;
  66                 $this->mAutonumber = 0;
  67                 $this->mLastSection = "";
  68                 $this->mDTopen = false;
  69                 $this->mVariables = false;
  70                 $this->mIncludeCount = array();
  71                 $this->mStripState = array();
  72         }
  73
  74         # First pass--just handle <nowiki> sections, pass the rest off
  75         # to doWikiPass2() which does all the real work.
  76         #
  77         # Returns a ParserOutput
  78         #
  79         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  80         {
  81                 $fname = "Parser::parse";
  82                 wfProfileIn( $fname );
  83
  84                 if ( $clearState ) {
  85                         $this->clearState();
  86                 }
  87
  88                 $this->mOptions = $options;
  89                 $this->mTitle =& $title;
  90                 $this->mOutputType = OT_HTML;
  91
  92                 $stripState = NULL;
  93                 $text = $this->strip( $text, $this->mStripState );
  94                 $text = $this->doWikiPass2( $text, $linestart );
  95                 $text = $this->unstrip( $text, $this->mStripState );
  96
  97                 $this->mOutput->setText( $text );
  98                 wfProfileOut( $fname );
  99                 return $this->mOutput;
 100         }
 101
 102         /* static */ function getRandomString()
 103         {
 104                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 105         }
 106
 107         # Replaces all occurences of <$tag>content</$tag> in the text
 108         # with a random marker and returns the new text. the output parameter
 109         # $content will be an associative array filled with data on the form
 110         # $unique_marker => content.
 111
 112         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 113                 $result = array();
 114                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 115                 $content = array( );
 116                 $n = 1;
 117                 $stripped = "";
 118
 119                 while ( "" != $text ) {
 120                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 121                         $stripped .= $p[0];
 122                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 123                                 $text = "";
 124                         } else {
 125                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 126                                 $marker = $rnd . sprintf("%08X", $n++);
 127                                 $content[$marker] = $q[0];
 128                                 $stripped .= $marker;
 129                                 $text = $q[1];
 130                         }
 131                 }
 132                 return $stripped;
 133         }
 134
 135         # Strips <nowiki>, <pre> and <math>
 136         # Returns the text, and fills an array with data needed in unstrip()
 137         #
 138         function strip( $text, &$state )
 139         {
 140                 $render = ($this->mOutputType == OT_HTML);
 141                 $nowiki_content = array();
 142                 $hiero_content = array();
 143                 $math_content = array();
 144                 $pre_content = array();
 145
 146                 # Replace any instances of the placeholders
 147                 $uniq_prefix = UNIQ_PREFIX;
 148                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 149
 150                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 151                 foreach( $nowiki_content as $marker => $content ){
 152                         if( $render ){
 153                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 154                         } else {
 155                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 156                         }
 157                 }
 158
 159                 if( $GLOBALS['wgUseWikiHiero'] ){
 160                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 161                         foreach( $hiero_content as $marker => $content ){
 162                                 if( $render ){
 163                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 164                                 } else {
 165                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 166                                 }
 167                         }
 168                 }
 169
 170                 if( $this->mOptions->getUseTeX() ){
 171                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 172                         foreach( $math_content as $marker => $content ){
 173                                 if( $render ){
 174                                         $math_content[$marker] = renderMath( $content );
 175                                 } else {
 176                                         $math_content[$marker] = "<math>$content</math>";
 177                                 }
 178                         }
 179                 }
 180
 181                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 182                 foreach( $pre_content as $marker => $content ){
 183                         if( $render ){
 184                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 185                         } else {
 186                                 $pre_content[$marker] = "<pre>$content</pre>";
 187                         }
 188                 }
 189
 190                 # Must expand in reverse order, otherwise nested tags will be corrupted
 191                 $state = array( $pre_content, $math_content, $hiero_content, $nowiki_content );
 192                 return $text;
 193         }
 194
 195         function unstrip( $text, &$state )
 196         {
 197                 foreach( $state as $content_dict ){
 198                         foreach( $content_dict as $marker => $content ){
 199                                 $text = str_replace( $marker, $content, $text );
 200                         }
 201                 }
 202                 return $text;
 203         }
 204
 205         function categoryMagic ()
 206         {
 207                 global $wgLang , $wgUser ;
 208                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 209                 $id = $this->mTitle->getArticleID() ;
 210                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 211                 $ti = $this->mTitle->getText() ;
 212                 $ti = explode ( ":" , $ti , 2 ) ;
 213                 if ( $cat != $ti[0] ) return "" ;
 214                 $r = "<br break='all' />\n" ;
 215
 216                 $articles = array() ;
 217                 $parents = array () ;
 218                 $children = array() ;
 219
 220
 221 #               $sk =& $this->mGetSkin();
 222                 $sk =& $wgUser->getSkin() ;
 223
 224                 $data = array () ;
 225                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 226                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 227
 228                 $res = wfQuery ( $sql1, DB_READ ) ;
 229                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 230
 231                 $res = wfQuery ( $sql2, DB_READ ) ;
 232                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 233
 234
 235                 foreach ( $data AS $x )
 236                 {
 237                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 238                         if ( $t != "" ) $t .= ":" ;
 239                         $t .= $x->cur_title ;
 240
 241                         $y = explode ( ":" , $t , 2 ) ;
 242                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 243                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 244                         } else {
 245                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 246                         }
 247                 }
 248                 wfFreeResult ( $res ) ;
 249
 250                 # Children
 251                 if ( count ( $children ) > 0 )
 252                 {
 253                         asort ( $children ) ;
 254                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 255                         $r .= implode ( ", " , $children ) ;
 256                 }
 257
 258                 # Articles
 259                 if ( count ( $articles ) > 0 )
 260                 {
 261                         asort ( $articles ) ;
 262                         $h =  wfMsg( "category_header", $ti[1] );
 263                         $r .= "<h2>{$h}</h2>\n" ;
 264                         $r .= implode ( ", " , $articles ) ;
 265                 }
 266
 267
 268                 return $r ;
 269         }
 270
 271         function getHTMLattrs ()
 272         {
 273                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 274                                 "title", "align", "lang", "dir", "width", "height",
 275                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 276                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 277                                 /* FONT */ "type", "start", "value", "compact",
 278                                 /* For various lists, mostly deprecated but safe */
 279                                 "summary", "width", "border", "frame", "rules",
 280                                 "cellspacing", "cellpadding", "valign", "char",
 281                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 282                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 283                                 "id", "class", "name", "style" /* For CSS */
 284                                 );
 285                 return $htmlattrs ;
 286         }
 287
 288         function fixTagAttributes ( $t )
 289         {
 290                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 291                 $htmlattrs = $this->getHTMLattrs() ;
 292
 293                 # Strip non-approved attributes from the tag
 294                 $t = preg_replace(
 295                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 296                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 297                         $t);
 298                 # Strip javascript "expression" from stylesheets. Brute force approach:
 299                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 300
 301                 if( preg_match(
 302                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 303                         wfMungeToUtf8( $t ) ) )
 304                 {
 305                         $t="";
 306                 }
 307
 308                 return trim ( $t ) ;
 309         }
 310
 311         function doTableStuff ( $t )
 312         {
 313                 $t = explode ( "\n" , $t ) ;
 314                 $td = array () ; # Is currently a td tag open?
 315                         $ltd = array () ; # Was it TD or TH?
 316                         $tr = array () ; # Is currently a tr tag open?
 317                         $ltr = array () ; # tr attributes
 318                         foreach ( $t AS $k => $x )
 319                         {
 320                                 $x = rtrim ( $x ) ;
 321                                 $fc = substr ( $x , 0 , 1 ) ;
 322                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 323                                 {
 324                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 325                                         array_push ( $td , false ) ;
 326                                         array_push ( $ltd , "" ) ;
 327                                         array_push ( $tr , false ) ;
 328                                         array_push ( $ltr , "" ) ;
 329                                 }
 330                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 331                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 332                                 {
 333                                         $z = "</table>\n" ;
 334                                         $l = array_pop ( $ltd ) ;
 335                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 336                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 337                                         array_pop ( $ltr ) ;
 338                                         $t[$k] = $z ;
 339                                 }
 340                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 341                                                 {
 342                                                 $z = trim ( substr ( $x , 2 ) ) ;
 343                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 344                                                 }*/
 345                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 346                                 {
 347                                         $x = substr ( $x , 1 ) ;
 348                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 349                                         $z = "" ;
 350                                         $l = array_pop ( $ltd ) ;
 351                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 352                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 353                                         array_pop ( $ltr ) ;
 354                                         $t[$k] = $z ;
 355                                         array_push ( $tr , false ) ;
 356                                         array_push ( $td , false ) ;
 357                                         array_push ( $ltd , "" ) ;
 358                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 359                                 }
 360                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 361                                 {
 362                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 363                                         {
 364                                                 $fc = "+" ;
 365                                                 $x = substr ( $x , 1 ) ;
 366                                         }
 367                                         $after = substr ( $x , 1 ) ;
 368                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 369                                         $after = explode ( "||" , $after ) ;
 370                                         $t[$k] = "" ;
 371                                         foreach ( $after AS $theline )
 372                                         {
 373                                                 $z = "" ;
 374                                                 if ( $fc != "+" )
 375                                                 {
 376                                                         $tra = array_pop ( $ltr ) ;
 377                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 378                                                         array_push ( $tr , true ) ;
 379                                                         array_push ( $ltr , "" ) ;
 380                                                 }
 381
 382                                                 $l = array_pop ( $ltd ) ;
 383                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 384                                                 if ( $fc == "|" ) $l = "td" ;
 385                                                 else if ( $fc == "!" ) $l = "th" ;
 386                                                 else if ( $fc == "+" ) $l = "caption" ;
 387                                                 else $l = "" ;
 388                                                 array_push ( $ltd , $l ) ;
 389                                                 $y = explode ( "|" , $theline , 2 ) ;
 390                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 391                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 392                                                 $t[$k] .= $y ;
 393                                                 array_push ( $td , true ) ;
 394                                         }
 395                                 }
 396                         }
 397
 398                 # Closing open td, tr && table
 399                 while ( count ( $td ) > 0 )
 400                 {
 401                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 402                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 403                         $t[] = "</table>" ;
 404                 }
 405
 406                 $t = implode ( "\n" , $t ) ;
 407                 #               $t = $this->removeHTMLtags( $t );
 408                 return $t ;
 409         }
 410
 411         # Well, OK, it's actually about 14 passes.  But since all the
 412         # hard lifting is done inside PHP's regex code, it probably
 413         # wouldn't speed things up much to add a real parser.
 414         #
 415         function doWikiPass2( $text, $linestart )
 416         {
 417                 $fname = "Parser::doWikiPass2";
 418                 wfProfileIn( $fname );
 419
 420                 $text = $this->removeHTMLtags( $text );
 421                 $text = $this->replaceVariables( $text );
 422
 423                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 424
 425                 $text = $this->doHeadings( $text );
 426
 427                 if($this->mOptions->getUseDynamicDates()) {
 428                         global $wgDateFormatter;
 429                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 430                 }
 431
 432                 $text = $this->replaceExternalLinks( $text );
 433                 $text = $this->doTokenizedParser ( $text );
 434
 435                 $text = $this->doTableStuff ( $text ) ;
 436
 437                 $text = $this->formatHeadings( $text );
 438
 439                 $sk =& $this->mOptions->getSkin();
 440                 $text = $sk->transformContent( $text );
 441                 $fixtags = array(
 442                         "/<hr *>/i" => '<hr/>',
 443                         "/<br *>/i" => '<br/>',
 444                         "/<center *>/i"=>'<span style="text-align:center;">',
 445                         "/<\\/center *>/i" => '</span>'
 446                 );
 447                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 448
 449                 $text .= $this->categoryMagic () ;
 450
 451                 # needs to be called last
 452                 $text = $this->doBlockLevels( $text, $linestart );
 453
 454                 wfProfileOut( $fname );
 455                 return $text;
 456         }
 457
 458
 459         /* private */ function doHeadings( $text )
 460         {
 461                 for ( $i = 6; $i >= 1; --$i ) {
 462                         $h = substr( "======", 0, $i );
 463                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 464                           "<h{$i}>\\1</h{$i}>\\2", $text );
 465                 }
 466                 return $text;
 467         }
 468
 469         # Note: we have to do external links before the internal ones,
 470         # and otherwise take great care in the order of things here, so
 471         # that we don't end up interpreting some URLs twice.
 472
 473         /* private */ function replaceExternalLinks( $text )
 474         {
 475                 $fname = "Parser::replaceExternalLinks";
 476                 wfProfileIn( $fname );
 477                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 478                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 479                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 480                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 481                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 482                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 483                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 484                 wfProfileOut( $fname );
 485                 return $text;
 486         }
 487
 488         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 489         {
 490                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 491                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 492
 493                 # this is  the list of separators that should be ignored if they
 494                 # are the last character of an URL but that should be included
 495                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 496                 # in this case, the last comma should not become part of the URL,
 497                 # but in "www.foo.com/123,2342,32.htm" it should.
 498                 $sep = ",;\.:";
 499                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 500                 $images = "gif|png|jpg|jpeg";
 501
 502                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 503                 # they are interpreted as part of the string (used to tell PHP
 504                 # that the content of the string should be inserted there).
 505                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 506                   "((?i){$images})([^{$uc}]|$)/";
 507
 508                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 509                 $sk =& $this->mOptions->getSkin();
 510
 511                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 512                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 513                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 514                 }
 515                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 516                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 517                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 518                   "</a>\\5", $s );
 519                 $s = str_replace( $unique, $protocol, $s );
 520
 521                 $a = explode( "[{$protocol}:", " " . $s );
 522                 $s = array_shift( $a );
 523                 $s = substr( $s, 1 );
 524
 525                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 526                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 527
 528                 foreach ( $a as $line ) {
 529                         if ( preg_match( $e1, $line, $m ) ) {
 530                                 $link = "{$protocol}:{$m[1]}";
 531                                 $trail = $m[2];
 532                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 533                                 else { $text = wfEscapeHTML( $link ); }
 534                         } else if ( preg_match( $e2, $line, $m ) ) {
 535                                 $link = "{$protocol}:{$m[1]}";
 536                                 $text = $m[2];
 537                                 $trail = $m[3];
 538                         } else {
 539                                 $s .= "[{$protocol}:" . $line;
 540                                 continue;
 541                         }
 542                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 543                                 $paren = "";
 544                         } else {
 545                                 # Expand the URL for printable version
 546                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 547                         }
 548                         $la = $sk->getExternalLinkAttributes( $link, $text );
 549                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 550
 551                 }
 552                 return $s;
 553         }
 554
 555         /* private */ function handle3Quotes( &$state, $token )
 556         {
 557                 if ( $state["strong"] !== false ) {
 558                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 559                         {
 560                                 # ''' lala ''lala '''
 561                                 $s = "</em></strong><em>";
 562                         } else {
 563                                 $s = "</strong>";
 564                         }
 565                         $state["strong"] = FALSE;
 566                 } else {
 567                         $s = "<strong>";
 568                         $state["strong"] = $token["pos"];
 569                 }
 570                 return $s;
 571         }
 572
 573         /* private */ function handle2Quotes( &$state, $token )
 574         {
 575                 if ( $state["em"] !== false ) {
 576                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 577                         {
 578                                 # ''lala'''lala'' ....'''
 579                                 $s = "</strong></em><strong>";
 580                         } else {
 581                                 $s = "</em>";
 582                         }
 583                         $state["em"] = FALSE;
 584                 } else {
 585                         $s = "<em>";
 586                         $state["em"] = $token["pos"];
 587                 }
 588                 return $s;
 589         }
 590
 591         /* private */ function handle5Quotes( &$state, $token )
 592         {
 593                 $s = "";
 594                 if ( $state["em"] !== false && $state["strong"] ) {
 595                         if ( $state["em"] < $state["strong"] ) {
 596                                 $s .= "</strong></em>";
 597                         } else {
 598                                 $s .= "</em></strong>";
 599                         }
 600                         $state["strong"] = $state["em"] = FALSE;
 601                 } elseif ( $state["em"] !== false ) {
 602                         $s .= "</em><strong>";
 603                         $state["em"] = FALSE;
 604                         $state["strong"] = $token["pos"];
 605                 } elseif ( $state["strong"] !== false ) {
 606                         $s .= "</strong><em>";
 607                         $state["strong"] = FALSE;
 608                         $state["em"] = $token["pos"];
 609                 } else { # not $em and not $strong
 610                         $s .= "<strong><em>";
 611                         $state["strong"] = $state["em"] = $token["pos"];
 612                 }
 613                 return $s;
 614         }
 615
 616         /* private */ function doTokenizedParser( $str )
 617         {
 618                 global $wgLang; # for language specific parser hook
 619
 620                 $tokenizer=Tokenizer::newFromString( $str );
 621                 $tokenStack = array();
 622
 623                 $s="";
 624                 $state["em"]      = FALSE;
 625                 $state["strong"]  = FALSE;
 626                 $tagIsOpen = FALSE;
 627                 $threeopen = false;
 628
 629                 # The tokenizer splits the text into tokens and returns them one by one.
 630                 # Every call to the tokenizer returns a new token.
 631                 while ( $token = $tokenizer->nextToken() )
 632                 {
 633                         switch ( $token["type"] )
 634                         {
 635                                 case "text":
 636                                         # simple text with no further markup
 637                                         $txt = $token["text"];
 638                                         break;
 639                                 case "[[[":
 640                                         # remember the tag opened with 3 [
 641                                         $threeopen = true;
 642                                 case "[[":
 643                                         # link opening tag.
 644                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 645                                         $tagIsOpen = TRUE;
 646                                         array_push( $tokenStack, $token );
 647                                         $txt="";
 648                                         break;
 649
 650                                 case "]]]":
 651                                 case "]]":
 652                                         # link close tag.
 653                                         # get text from stack, glue it together, and call the code to handle a
 654                                         # link
 655
 656                                         if ( count( $tokenStack ) == 0 )
 657                                         {
 658                                                 # stack empty. Found a ]] without an opening [[
 659                                                 $txt = "]]";
 660                                         } else {
 661                                                 $linkText = "";
 662                                                 $lastToken = array_pop( $tokenStack );
 663                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 664                                                 {
 665                                                         if( !empty( $lastToken["text"] ) ) {
 666                                                                 $linkText = $lastToken["text"] . $linkText;
 667                                                         }
 668                                                         $lastToken = array_pop( $tokenStack );
 669                                                 }
 670
 671                                                 $txt = $linkText ."]]";
 672
 673                                                 if( isset( $lastToken["text"] ) ) {
 674                                                         $prefix = $lastToken["text"];
 675                                                 } else {
 676                                                         $prefix = "";
 677                                                 }
 678                                                 $nextToken = $tokenizer->previewToken();
 679                                                 if ( $nextToken["type"] == "text" )
 680                                                 {
 681                                                         # Preview just looks at it. Now we have to fetch it.
 682                                                         $nextToken = $tokenizer->nextToken();
 683                                                         $txt .= $nextToken["text"];
 684                                                 }
 685                                                 $fakestate = $this->mStripState;
 686                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
 687
 688                                                 # did the tag start with 3 [ ?
 689                                                 if($threeopen) {
 690                                                         # show the first as text
 691                                                         $txt = "[".$txt;
 692                                                         $threeopen=false;
 693                                                 }
 694
 695                                         }
 696                                         $tagIsOpen = (count( $tokenStack ) != 0);
 697                                         break;
 698                                 case "----":
 699                                         $txt = "\n<hr />\n";
 700                                         break;
 701                                 case "'''":
 702                                         # This and the three next ones handle quotes
 703                                         $txt = $this->handle3Quotes( $state, $token );
 704                                         break;
 705                                 case "''":
 706                                         $txt = $this->handle2Quotes( $state, $token );
 707                                         break;
 708                                 case "'''''":
 709                                         $txt = $this->handle5Quotes( $state, $token );
 710                                         break;
 711                                 case "":
 712                                         # empty token
 713                                         $txt="";
 714                                         break;
 715                                 case "RFC ":
 716                                         if ( $tagIsOpen ) {
 717                                                 $txt = "RFC ";
 718                                         } else {
 719                                                 $txt = $this->doMagicRFC( $tokenizer );
 720                                         }
 721                                         break;
 722                                 case "ISBN ":
 723                                         if ( $tagIsOpen ) {
 724                                                 $txt = "ISBN ";
 725                                         } else {
 726                                                 $txt = $this->doMagicISBN( $tokenizer );
 727                                         }
 728                                         break;
 729                                 default:
 730                                         # Call language specific Hook.
 731                                         $txt = $wgLang->processToken( $token, $tokenStack );
 732                                         if ( NULL == $txt ) {
 733                                                 # An unkown token. Highlight.
 734                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 735                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 736                                         }
 737                                         break;
 738                         }
 739                         # If we're parsing the interior of a link, don't append the interior to $s,
 740                         # but push it to the stack so it can be processed when a ]] token is found.
 741                         if ( $tagIsOpen  && $txt != "" ) {
 742                                 $token["type"] = "text";
 743                                 $token["text"] = $txt;
 744                                 array_push( $tokenStack, $token );
 745                         } else {
 746                                 $s .= $txt;
 747                         }
 748                 } #end while
 749                 if ( count( $tokenStack ) != 0 )
 750                 {
 751                         # still objects on stack. opened [[ tag without closing ]] tag.
 752                         $txt = "";
 753                         while ( $lastToken = array_pop( $tokenStack ) )
 754                         {
 755                                 if ( $lastToken["type"] == "text" )
 756                                 {
 757                                         $txt = $lastToken["text"] . $txt;
 758                                 } else {
 759                                         $txt = $lastToken["type"] . $txt;
 760                                 }
 761                         }
 762                         $s .= $txt;
 763                 }
 764                 return $s;
 765         }
 766
 767         /* private */ function handleInternalLink( $line, $prefix )
 768         {
 769                 global $wgLang, $wgLinkCache;
 770                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 771                 static $fname = "Parser::handleInternalLink" ;
 772                 wfProfileIn( $fname );
 773
 774                 wfProfileIn( "$fname-setup" );
 775                 static $tc = FALSE;
 776                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 777                 $sk =& $this->mOptions->getSkin();
 778
 779                 # Match a link having the form [[namespace:link|alternate]]trail
 780                 static $e1 = FALSE;
 781                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 782                 # Match the end of a line for a word that's not followed by whitespace,
 783                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 784                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 785                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 786                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 787
 788
 789                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 790                 static $image = FALSE;
 791                 static $special = FALSE;
 792                 static $media = FALSE;
 793                 static $category = FALSE;
 794                 if ( !$image ) { $image = Namespace::getImage(); }
 795                 if ( !$special ) { $special = Namespace::getSpecial(); }
 796                 if ( !$media ) { $media = Namespace::getMedia(); }
 797                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 798
 799                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 800
 801                 wfProfileOut( "$fname-setup" );
 802                 $s = "";
 803
 804                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 805                         $text = $m[2];
 806                         $trail = $m[3];
 807                 } else { # Invalid form; output directly
 808                         $s .= $prefix . "[[" . $line ;
 809                         return $s;
 810                 }
 811
 812                 /* Valid link forms:
 813                 Foobar -- normal
 814                 :Foobar -- override special treatment of prefix (images, language links)
 815                 /Foobar -- convert to CurrentPage/Foobar
 816                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 817                 */
 818                 $c = substr($m[1],0,1);
 819                 $noforce = ($c != ":");
 820                 if( $c == "/" ) { # subpage
 821                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 822                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 823                                 $noslash=$m[1];
 824                         } else {
 825                                 $noslash=substr($m[1],1);
 826                         }
 827                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 828                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 829                                 if( "" == $text ) {
 830                                         $text= $m[1];
 831                                 } # this might be changed for ugliness reasons
 832                         } else {
 833                                 $link = $noslash; # no subpage allowed, use standard link
 834                         }
 835                 } elseif( $noforce ) { # no subpage
 836                         $link = $m[1];
 837                 } else {
 838                         $link = substr( $m[1], 1 );
 839                 }
 840                 if( "" == $text )
 841                         $text = $link;
 842
 843                 $nt = Title::newFromText( $link );
 844                 if( !$nt ) {
 845                         $s .= $prefix . "[[" . $line;
 846                         return $s;
 847                 }
 848                 $ns = $nt->getNamespace();
 849                 $iw = $nt->getInterWiki();
 850                 if( $noforce ) {
 851                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 852                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 853                                 return (trim($s) == '')? '': $s;
 854                         }
 855                         if( $ns == $image ) {
 856                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 857                                 $wgLinkCache->addImageLinkObj( $nt );
 858                                 return $s;
 859                         }
 860                 }
 861                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 862                     ( strpos( $link, "#" ) == FALSE ) ) {
 863                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 864                         return $s;
 865                 }
 866
 867                 # Category feature
 868                 $catns = strtoupper ( $nt->getDBkey () ) ;
 869                 $catns = explode ( ":" , $catns ) ;
 870                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 871                 else $catns = "" ;
 872                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 873                         $t = explode ( ":" , $nt->getText() ) ;
 874                         array_shift ( $t ) ;
 875                         $t = implode ( ":" , $t ) ;
 876                         $t = $wgLang->ucFirst ( $t ) ;
 877                         $nnt = Title::newFromText ( $category.":".$t ) ;
 878                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 879                         $this->mOutput->mCategoryLinks[] = $t ;
 880                         $s .= $prefix . $trail ;
 881                         return $s ;
 882                 }
 883
 884                 if( $ns == $media ) {
 885                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 886                         $wgLinkCache->addImageLinkObj( $nt );
 887                         return $s;
 888                 } elseif( $ns == $special ) {
 889                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 890                         return $s;
 891                 }
 892                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 893
 894                 wfProfileOut( $fname );
 895                 return $s;
 896         }
 897
 898         # Some functions here used by doBlockLevels()
 899         #
 900         /* private */ function closeParagraph()
 901         {
 902                 $result = "";
 903                 if ( '' != $this->mLastSection ) {
 904                         $result = "</" . $this->mLastSection  . ">\n";
 905                 }
 906                 $this->mLastSection = "";
 907                 return $result;
 908         }
 909         # getCommon() returns the length of the longest common substring
 910         # of both arguments, starting at the beginning of both.
 911         #
 912         /* private */ function getCommon( $st1, $st2 )
 913         {
 914                 $fl = strlen( $st1 );
 915                 $shorter = strlen( $st2 );
 916                 if ( $fl < $shorter ) { $shorter = $fl; }
 917
 918                 for ( $i = 0; $i < $shorter; ++$i ) {
 919                         if ( $st1{$i} != $st2{$i} ) { break; }
 920                 }
 921                 return $i;
 922         }
 923         # These next three functions open, continue, and close the list
 924         # element appropriate to the prefix character passed into them.
 925         #
 926         /* private */ function openList( $char )
 927     {
 928                 $result = $this->closeParagraph();
 929
 930                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 931                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 932                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 933                 else if ( ";" == $char ) {
 934                         $result .= "<dl><dt>";
 935                         $this->mDTopen = true;
 936                 }
 937                 else { $result = "<!-- ERR 1 -->"; }
 938
 939                 return $result;
 940         }
 941
 942         /* private */ function nextItem( $char )
 943         {
 944                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 945                 else if ( ":" == $char || ";" == $char ) {
 946                         $close = "</dd>";
 947                         if ( $this->mDTopen ) { $close = "</dt>"; }
 948                         if ( ";" == $char ) {
 949                                 $this->mDTopen = true;
 950                                 return $close . "<dt>";
 951                         } else {
 952                                 $this->mDTopen = false;
 953                                 return $close . "<dd>";
 954                         }
 955                 }
 956                 return "<!-- ERR 2 -->";
 957         }
 958
 959         /* private */function closeList( $char )
 960         {
 961                 if ( "*" == $char ) { $text = "</li></ul>"; }
 962                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 963                 else if ( ":" == $char ) {
 964                         if ( $this->mDTopen ) {
 965                                 $this->mDTopen = false;
 966                                 $text = "</dt></dl>";
 967                         } else {
 968                                 $text = "</dd></dl>";
 969                         }
 970                 }
 971                 else {  return "<!-- ERR 3 -->"; }
 972                 return $text."\n";
 973         }
 974
 975         /* private */ function doBlockLevels( $text, $linestart )
 976         {
 977                 $fname = "Parser::doBlockLevels";
 978                 wfProfileIn( $fname );
 979                 # Parsing through the text line by line.  The main thing
 980                 # happening here is handling of block-level elements p, pre,
 981                 # and making lists from lines starting with * # : etc.
 982                 #
 983                 $a = explode( "\n", $text );
 984                 $lastPref = $text = $lastLine = '';
 985                 $this->mDTopen = $inBlockElem = false;
 986
 987                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 988                 foreach ( $a as $t ) {
 989                         if ( "" != $text ) { $text .= "\n"; }
 990
 991                         $oLine = $t;
 992                         $opl = strlen( $lastPref );
 993                         $npl = strspn( $t, "*#:;" );
 994                         $pref = substr( $t, 0, $npl );
 995                         $pref2 = str_replace( ";", ":", $pref );
 996                         $t = substr( $t, $npl );
 997
 998                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 999                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1000
1001                                 if ( ";" == substr( $pref, -1 ) ) {
1002                                         $cpos = strpos( $t, ":" );
1003                                         if ( ! ( false === $cpos ) ) {
1004                                                 $term = substr( $t, 0, $cpos );
1005                                                 $text .= $term . $this->nextItem( ":" );
1006                                                 $t = substr( $t, $cpos + 1 );
1007                                         }
1008                                 }
1009                         } else if (0 != $npl || 0 != $opl) {
1010                                 $cpl = $this->getCommon( $pref, $lastPref );
1011
1012                                 while ( $cpl < $opl ) {
1013                                         $text .= $this->closeList( $lastPref{$opl-1} );
1014                                         --$opl;
1015                                 }
1016                                 if ( $npl <= $cpl && $cpl > 0 ) {
1017                                         $text .= $this->nextItem( $pref{$cpl-1} );
1018                                 }
1019                                 while ( $npl > $cpl ) {
1020                                         $char = substr( $pref, $cpl, 1 );
1021                                         $text .= $this->openList( $char );
1022
1023                                         if ( ";" == $char ) {
1024                                                 $cpos = strpos( $t, ":" );
1025                                                 if ( ! ( false === $cpos ) ) {
1026                                                         $term = substr( $t, 0, $cpos );
1027                                                         $text .= $term . $this->nextItem( ":" );
1028                                                         $t = substr( $t, $cpos + 1 );
1029                                                 }
1030                                         }
1031                                         ++$cpl;
1032                                 }
1033                                 $lastPref = $pref2;
1034                         }
1035                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
1036                                 $uniq_prefix = UNIQ_PREFIX;
1037                                 // XXX: use a stack for nestable elements like span, table and div
1038                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div)/i", $t );
1039                                 $closematch = preg_match(
1040                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1041                                         "<\\/p|<\\/div|<hr|<\\/td|".$uniq_prefix."-pre)/i", $t );
1042                                 if ( $openmatch or $closematch ) {
1043                                         $text .= $this->closeParagraph();
1044                                         if ( !$closematch  ) {
1045                                                 $inBlockElem = true;
1046                                         } else {
1047                                                 $inBlockElem = false;
1048                                         }
1049                                 } else if ( !$inBlockElem ) {
1050                                         if ( " " == $t{0} ) {
1051                                                 $newSection = "pre";
1052                                                 $text .= $this->closeParagraph();
1053                                                 $text .= "<" . $newSection . ">";
1054                                                 $this->mLastSection = $newSection;
1055                                         } else {
1056                                                 $newSection = "p";
1057                                                 if ( '' == $oLine ) {
1058                                                         if ( '' == $lastLine ) {
1059                                                                 $text .= $this->closeParagraph();
1060                                                                 $text .= "<" . $newSection . "><br/>";
1061                                                                 $this->mLastSection = $newSection;
1062                                                         } else {
1063                                                                 $t = '';
1064                                                         }
1065                                                 } else {
1066                                                         $text .= $this->closeParagraph();
1067                                                         $text .= "<" . $newSection . ">";
1068                                                         $this->mLastSection = $newSection;
1069                                                 }
1070                                         }
1071
1072                                 }
1073                         }
1074                         $lastLine = $t;
1075                         $text .= $t;
1076                 }
1077                 while ( $npl ) {
1078                         $text .= $this->closeList( $pref2{$npl-1} );
1079                         --$npl;
1080                 }
1081                 if ( "" != $this->mLastSection ) {
1082                         $text .= "</" . $this->mLastSection . ">";
1083                         $this->mLastSection = "";
1084                 }
1085                 wfProfileOut( $fname );
1086                 return $text;
1087         }
1088
1089         function getVariableValue( $index ) {
1090                 global $wgLang, $wgSitename, $wgServer;
1091
1092                 switch ( $index ) {
1093                         case MAG_CURRENTMONTH:
1094                                 return date( "m" );
1095                         case MAG_CURRENTMONTHNAME:
1096                                 return $wgLang->getMonthName( date("n") );
1097                         case MAG_CURRENTMONTHNAMEGEN:
1098                                 return $wgLang->getMonthNameGen( date("n") );
1099                         case MAG_CURRENTDAY:
1100                                 return date("j");
1101                         case MAG_CURRENTDAYNAME:
1102                                 return $wgLang->getWeekdayName( date("w")+1 );
1103                         case MAG_CURRENTYEAR:
1104                                 return date( "Y" );
1105                         case MAG_CURRENTTIME:
1106                                 return $wgLang->time( wfTimestampNow(), false );
1107                         case MAG_NUMBEROFARTICLES:
1108                                 return wfNumberOfArticles();
1109                         case MAG_SITENAME:
1110                                 return $wgSitename;
1111                         case MAG_SERVER:
1112                                 return $wgServer;
1113                         default:
1114                                 return NULL;
1115                 }
1116         }
1117
1118         function initialiseVariables()
1119         {
1120                 global $wgVariableIDs;
1121                 $this->mVariables = array();
1122                 foreach ( $wgVariableIDs as $id ) {
1123                         $mw =& MagicWord::get( $id );
1124                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1125                 }
1126         }
1127
1128         /* private */ function replaceVariables( $text )
1129         {
1130                 global $wgLang, $wgCurParser;
1131                 global $wgScript, $wgArticlePath;
1132
1133                 $fname = "Parser::replaceVariables";
1134                 wfProfileIn( $fname );
1135
1136                 $bail = false;
1137                 if ( !$this->mVariables ) {
1138                         $this->initialiseVariables();
1139                 }
1140                 $titleChars = Title::legalChars();
1141                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1142
1143                 # "Recursive" variable expansion: run it through a couple of passes
1144                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1145                         $oldText = $text;
1146
1147                         # It's impossible to rebind a global in PHP
1148                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1149                         $wgCurParser = $this->fork();
1150
1151                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1152                         if ( $oldText == $text ) {
1153                                 $bail = true;
1154                         }
1155                         $this->merge( $wgCurParser );
1156                 }
1157
1158                 return $text;
1159         }
1160
1161         # Returns a copy of this object except with various variables cleared
1162         # This copy can be re-merged with the parent after operations on the copy
1163         function fork()
1164         {
1165                 $copy = $this;
1166                 $copy->mOutput = new ParserOutput;
1167                 return $copy;
1168         }
1169
1170         # Merges a copy split off with fork()
1171         function merge( &$copy )
1172         {
1173                 $this->mOutput->merge( $copy->mOutput );
1174
1175                 # Merge include throttling arrays
1176                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1177                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1178                                 $this->mIncludeCount[$dbk] += $count;
1179                         } else {
1180                                 $this->mIncludeCount[$dbk] = $count;
1181                         }
1182                 }
1183         }
1184
1185         function braceSubstitution( $matches )
1186         {
1187                 global $wgLinkCache, $wgLang;
1188                 $fname = "Parser::braceSubstitution";
1189                 $found = false;
1190                 $nowiki = false;
1191
1192                 $text = $matches[1];
1193
1194                 # SUBST
1195                 $mwSubst =& MagicWord::get( MAG_SUBST );
1196                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1197                         if ( $this->mOutputType != OT_WIKI ) {
1198                                 # Invalid SUBST not replaced at PST time
1199                                 # Return without further processing
1200                                 $text = $matches[0];
1201                                 $found = true;
1202                         }
1203                 } elseif ( $this->mOutputType == OT_WIKI ) {
1204                         # SUBST not found in PST pass, do nothing
1205                         $text = $matches[0];
1206                         $found = true;
1207                 }
1208
1209                 # MSG, MSGNW and INT
1210                 if ( !$found ) {
1211                         # Check for MSGNW:
1212                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1213                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1214                                 $nowiki = true;
1215                         } else {
1216                                 # Remove obsolete MSG:
1217                                 $mwMsg =& MagicWord::get( MAG_MSG );
1218                                 $mwMsg->matchStartAndRemove( $text );
1219                         }
1220
1221                         # Check if it is an internal message
1222                         $mwInt =& MagicWord::get( MAG_INT );
1223                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1224                                 $text = wfMsg( $text );
1225                                 $found = true;
1226                         }
1227                 }
1228
1229                 # NS
1230                 if ( !$found ) {
1231                         # Check for NS: (namespace expansion)
1232                         $mwNs = MagicWord::get( MAG_NS );
1233                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1234                                 if ( intval( $text ) ) {
1235                                         $text = $wgLang->getNsText( intval( $text ) );
1236                                         $found = true;
1237                                 } else {
1238                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1239                                         if ( !is_null( $index ) ) {
1240                                                 $text = $wgLang->getNsText( $index );
1241                                                 $found = true;
1242                                         }
1243                                 }
1244                         }
1245                 }
1246
1247                 # LOCALURL and LOCALURLE
1248                 if ( !$found ) {
1249                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1250                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1251
1252                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1253                                 $func = 'getLocalURL';
1254                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1255                                 $func = 'escapeLocalURL';
1256                         } else {
1257                                 $func = '';
1258                         }
1259
1260                         if ( $func !== '' ) {
1261                                 $args = explode( "|", $text );
1262                                 $n = count( $args );
1263                                 if ( $n > 0 ) {
1264                                         $title = Title::newFromText( $args[0] );
1265                                         if ( !is_null( $title ) ) {
1266                                                 if ( $n > 1 ) {
1267                                                         $text = $title->$func( $args[1] );
1268                                                 } else {
1269                                                         $text = $title->$func();
1270                                                 }
1271                                                 $found = true;
1272                                         }
1273                                 }
1274                         }
1275                 }
1276
1277                 # Check for a match against internal variables
1278                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1279                         $text = $this->mVariables[$text];
1280                         $found = true;
1281                         $this->mOutput->mContainsOldMagic = true;
1282                 }
1283
1284                 # Load from database
1285                 if ( !$found ) {
1286                         $title = Title::newFromText( $text, NS_TEMPLATE );
1287                         if ( is_object( $title ) && !$title->isExternal() ) {
1288                                 # Check for excessive inclusion
1289                                 $dbk = $title->getPrefixedDBkey();
1290                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1291                                         $this->mIncludeCount[$dbk] = 0;
1292                                 }
1293                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1294                                         $article = new Article( $title );
1295                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1296                                         if ( $articleContent !== false ) {
1297                                                 $found = true;
1298                                                 $text = $articleContent;
1299
1300                                                 # Escaping and link table handling
1301                                                 # Not required for preSaveTransform()
1302                                                 if ( $this->mOutputType == OT_HTML ) {
1303                                                         if ( $nowiki ) {
1304                                                                 $text = wfEscapeWikiText( $text );
1305                                                         } else {
1306                                                                 $text = $this->removeHTMLtags( $text );
1307                                                         }
1308                                                         $wgLinkCache->suspend();
1309                                                         $text = $this->doTokenizedParser( $text );
1310                                                         $wgLinkCache->resume();
1311                                                         $wgLinkCache->addLinkObj( $title );
1312
1313                                                 }
1314                                         }
1315                                 }
1316
1317                                 # If the title is valid but undisplayable, make a link to it
1318                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1319                                         $text = "[[" . $title->getPrefixedText() . "]]";
1320                                         $found = true;
1321                                 }
1322                         }
1323                 }
1324
1325                 if ( !$found ) {
1326                         return $matches[0];
1327                 } else {
1328                         return $text;
1329                 }
1330         }
1331
1332         # Cleans up HTML, removes dangerous tags and attributes
1333         /* private */ function removeHTMLtags( $text )
1334         {
1335                 $fname = "Parser::removeHTMLtags";
1336                 wfProfileIn( $fname );
1337                 $htmlpairs = array( # Tags that must be closed
1338                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1339                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1340                         "strike", "strong", "tt", "var", "div", "center",
1341                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1342                         "ruby", "rt" , "rb" , "rp", "p"
1343                 );
1344                 $htmlsingle = array(
1345                         "br", "hr", "li", "dt", "dd"
1346                 );
1347                 $htmlnest = array( # Tags that can be nested--??
1348                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1349                         "dl", "font", "big", "small", "sub", "sup"
1350                 );
1351                 $tabletags = array( # Can only appear inside table
1352                         "td", "th", "tr"
1353                 );
1354
1355                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1356                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1357
1358                 $htmlattrs = $this->getHTMLattrs () ;
1359
1360                 # Remove HTML comments
1361                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1362
1363                 $bits = explode( "<", $text );
1364                 $text = array_shift( $bits );
1365                 $tagstack = array(); $tablestack = array();
1366
1367                 foreach ( $bits as $x ) {
1368                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1369                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1370                           $x, $regs );
1371                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1372                         error_reporting( $prev );
1373
1374                         $badtag = 0 ;
1375                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1376                                 # Check our stack
1377                                 if ( $slash ) {
1378                                         # Closing a tag...
1379                                         if ( ! in_array( $t, $htmlsingle ) &&
1380                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1381                                                 array_push( $tagstack, $ot );
1382                                                 $badtag = 1;
1383                                         } else {
1384                                                 if ( $t == "table" ) {
1385                                                         $tagstack = array_pop( $tablestack );
1386                                                 }
1387                                                 $newparams = "";
1388                                         }
1389                                 } else {
1390                                         # Keep track for later
1391                                         if ( in_array( $t, $tabletags ) &&
1392                                           ! in_array( "table", $tagstack ) ) {
1393                                                 $badtag = 1;
1394                                         } else if ( in_array( $t, $tagstack ) &&
1395                                           ! in_array ( $t , $htmlnest ) ) {
1396                                                 $badtag = 1 ;
1397                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1398                                                 if ( $t == "table" ) {
1399                                                         array_push( $tablestack, $tagstack );
1400                                                         $tagstack = array();
1401                                                 }
1402                                                 array_push( $tagstack, $t );
1403                                         }
1404                                         # Strip non-approved attributes from the tag
1405                                         $newparams = $this->fixTagAttributes($params);
1406
1407                                 }
1408                                 if ( ! $badtag ) {
1409                                         $rest = str_replace( ">", "&gt;", $rest );
1410                                         $text .= "<$slash$t $newparams$brace$rest";
1411                                         continue;
1412                                 }
1413                         }
1414                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1415                 }
1416                 # Close off any remaining tags
1417                 while ( $t = array_pop( $tagstack ) ) {
1418                         $text .= "</$t>\n";
1419                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1420                 }
1421                 wfProfileOut( $fname );
1422                 return $text;
1423         }
1424
1425 /*
1426  *
1427  * This function accomplishes several tasks:
1428  * 1) Auto-number headings if that option is enabled
1429  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1430  * 3) Add a Table of contents on the top for users who have enabled the option
1431  * 4) Auto-anchor headings
1432  *
1433  * It loops through all headlines, collects the necessary data, then splits up the
1434  * string and re-inserts the newly formatted headlines.
1435  *
1436  */
1437
1438         /* private */ function formatHeadings( $text )
1439         {
1440                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1441                 $doShowToc = $this->mOptions->getShowToc();
1442                 if( !$this->mTitle->userCanEdit() ) {
1443                         $showEditLink = 0;
1444                         $rightClickHack = 0;
1445                 } else {
1446                         $showEditLink = $this->mOptions->getEditSection();
1447                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1448                 }
1449
1450                 # Inhibit editsection links if requested in the page
1451                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1452                 if( $esw->matchAndRemove( $text ) ) {
1453                         $showEditLink = 0;
1454                 }
1455                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1456                 # do not add TOC
1457                 $mw =& MagicWord::get( MAG_NOTOC );
1458                 if( $mw->matchAndRemove( $text ) ) {
1459                         $doShowToc = 0;
1460                 }
1461
1462                 # never add the TOC to the Main Page. This is an entry page that should not
1463                 # be more than 1-2 screens large anyway
1464                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1465                         $doShowToc = 0;
1466                 }
1467
1468                 # Get all headlines for numbering them and adding funky stuff like [edit]
1469                 # links - this is for later, but we need the number of headlines right now
1470                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1471
1472                 # if there are fewer than 4 headlines in the article, do not show TOC
1473                 if( $numMatches < 4 ) {
1474                         $doShowToc = 0;
1475                 }
1476
1477                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1478                 # override above conditions and always show TOC
1479                 $mw =& MagicWord::get( MAG_FORCETOC );
1480                 if ($mw->matchAndRemove( $text ) ) {
1481                         $doShowToc = 1;
1482                 }
1483
1484
1485                 # We need this to perform operations on the HTML
1486                 $sk =& $this->mOptions->getSkin();
1487
1488                 # headline counter
1489                 $headlineCount = 0;
1490
1491                 # Ugh .. the TOC should have neat indentation levels which can be
1492                 # passed to the skin functions. These are determined here
1493                 $toclevel = 0;
1494                 $toc = "";
1495                 $full = "";
1496                 $head = array();
1497                 $sublevelCount = array();
1498                 $level = 0;
1499                 $prevlevel = 0;
1500                 foreach( $matches[3] as $headline ) {
1501                         $numbering = "";
1502                         if( $level ) {
1503                                 $prevlevel = $level;
1504                         }
1505                         $level = $matches[1][$headlineCount];
1506                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1507                                 # reset when we enter a new level
1508                                 $sublevelCount[$level] = 0;
1509                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1510                                 $toclevel += $level - $prevlevel;
1511                         }
1512                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1513                                 # reset when we step back a level
1514                                 $sublevelCount[$level+1]=0;
1515                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1516                                 $toclevel -= $prevlevel - $level;
1517                         }
1518                         # count number of headlines for each level
1519                         @$sublevelCount[$level]++;
1520                         if( $doNumberHeadings || $doShowToc ) {
1521                                 $dot = 0;
1522                                 for( $i = 1; $i <= $level; $i++ ) {
1523                                         if( !empty( $sublevelCount[$i] ) ) {
1524                                                 if( $dot ) {
1525                                                         $numbering .= ".";
1526                                                 }
1527                                                 $numbering .= $sublevelCount[$i];
1528                                                 $dot = 1;
1529                                         }
1530                                 }
1531                         }
1532
1533                         # The canonized header is a version of the header text safe to use for links
1534                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1535                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1536
1537                         # strip out HTML
1538                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1539                         $tocline = trim( $canonized_headline );
1540                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1541                         $refer[$headlineCount] = $canonized_headline;
1542
1543                         # count how many in assoc. array so we can track dupes in anchors
1544                         @$refers[$canonized_headline]++;
1545                         $refcount[$headlineCount]=$refers[$canonized_headline];
1546
1547                         # Prepend the number to the heading text
1548
1549                         if( $doNumberHeadings || $doShowToc ) {
1550                                 $tocline = $numbering . " " . $tocline;
1551
1552                                 # Don't number the heading if it is the only one (looks silly)
1553                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1554                                         # the two are different if the line contains a link
1555                                         $headline=$numbering . " " . $headline;
1556                                 }
1557                         }
1558
1559                         # Create the anchor for linking from the TOC to the section
1560                         $anchor = $canonized_headline;
1561                         if($refcount[$headlineCount] > 1 ) {
1562                                 $anchor .= "_" . $refcount[$headlineCount];
1563                         }
1564                         if( $doShowToc ) {
1565                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1566                         }
1567                         if( $showEditLink ) {
1568                                 if ( empty( $head[$headlineCount] ) ) {
1569                                         $head[$headlineCount] = "";
1570                                 }
1571                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1572                         }
1573
1574                         # Add the edit section span
1575                         if( $rightClickHack ) {
1576                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1577                         }
1578
1579                         # give headline the correct <h#> tag
1580                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1581
1582                         $headlineCount++;
1583                 }
1584
1585                 if( $doShowToc ) {
1586                         $toclines = $headlineCount;
1587                         $toc .= $sk->tocUnindent( $toclevel );
1588                         $toc = $sk->tocTable( $toc );
1589                 }
1590
1591                 # split up and insert constructed headlines
1592
1593                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1594                 $i = 0;
1595
1596                 foreach( $blocks as $block ) {
1597                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1598                             # This is the [edit] link that appears for the top block of text when
1599                                 # section editing is enabled
1600                                 $full .= $sk->editSectionLink(0);
1601                         }
1602                         $full .= $block;
1603                         if( $doShowToc && !$i) {
1604                         # Top anchor now in skin
1605                                 $full = $full.$toc;
1606                         }
1607
1608                         if( !empty( $head[$i] ) ) {
1609                                 $full .= $head[$i];
1610                         }
1611                         $i++;
1612                 }
1613
1614                 return $full;
1615         }
1616
1617         /* private */ function doMagicISBN( &$tokenizer )
1618         {
1619                 global $wgLang;
1620
1621                 # Check whether next token is a text token
1622                 # If yes, fetch it and convert the text into a
1623                 # Special::BookSources link
1624                 $token = $tokenizer->previewToken();
1625                 while ( $token["type"] == "" )
1626                 {
1627                         $tokenizer->nextToken();
1628                         $token = $tokenizer->previewToken();
1629                 }
1630                 if ( $token["type"] == "text" )
1631                 {
1632                         $token = $tokenizer->nextToken();
1633                         $x = $token["text"];
1634                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1635
1636                         $isbn = $blank = "" ;
1637                         while ( " " == $x{0} ) {
1638                                 $blank .= " ";
1639                                 $x = substr( $x, 1 );
1640                         }
1641                         while ( strstr( $valid, $x{0} ) != false ) {
1642                                 $isbn .= $x{0};
1643                                 $x = substr( $x, 1 );
1644                         }
1645                         $num = str_replace( "-", "", $isbn );
1646                         $num = str_replace( " ", "", $num );
1647
1648                         if ( "" == $num ) {
1649                                 $text = "ISBN $blank$x";
1650                         } else {
1651                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1652                                 $text = "<a href=\"" .
1653                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1654                                         "\" class=\"internal\">ISBN $isbn</a>";
1655                                 $text .= $x;
1656                         }
1657                 } else {
1658                         $text = "ISBN ";
1659                 }
1660                 return $text;
1661         }
1662         /* private */ function doMagicRFC( &$tokenizer )
1663         {
1664                 global $wgLang;
1665
1666                 # Check whether next token is a text token
1667                 # If yes, fetch it and convert the text into a
1668                 # link to an RFC source
1669                 $token = $tokenizer->previewToken();
1670                 while ( $token["type"] == "" )
1671                 {
1672                         $tokenizer->nextToken();
1673                         $token = $tokenizer->previewToken();
1674                 }
1675                 if ( $token["type"] == "text" )
1676                 {
1677                         $token = $tokenizer->nextToken();
1678                         $x = $token["text"];
1679                         $valid = "0123456789";
1680
1681                         $rfc = $blank = "" ;
1682                         while ( " " == $x{0} ) {
1683                                 $blank .= " ";
1684                                 $x = substr( $x, 1 );
1685                         }
1686                         while ( strstr( $valid, $x{0} ) != false ) {
1687                                 $rfc .= $x{0};
1688                                 $x = substr( $x, 1 );
1689                         }
1690
1691                         if ( "" == $rfc ) {
1692                                 $text .= "RFC $blank$x";
1693                         } else {
1694                                 $url = wfmsg( "rfcurl" );
1695                                 $url = str_replace( "$1", $rfc, $url);
1696                                 $sk =& $this->mOptions->getSkin();
1697                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1698                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1699                         }
1700                 } else {
1701                         $text = "RFC ";
1702                 }
1703                 return $text;
1704         }
1705
1706         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1707         {
1708                 $this->mOptions = $options;
1709                 $this->mTitle =& $title;
1710                 $this->mOutputType = OT_WIKI;
1711
1712                 if ( $clearState ) {
1713                         $this->clearState();
1714                 }
1715
1716                 $stripState = false;
1717                 $text = str_replace("\r\n", "\n", $text);
1718                 $text = $this->strip( $text, $stripState, false );
1719                 $text = $this->pstPass2( $text, $user );
1720                 $text = $this->unstrip( $text, $stripState );
1721                 return $text;
1722         }
1723
1724         /* private */ function pstPass2( $text, &$user )
1725         {
1726                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1727
1728                 # Variable replacement
1729                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1730                 $text = $this->replaceVariables( $text );
1731
1732                 # Signatures
1733                 #
1734                 $n = $user->getName();
1735                 $k = $user->getOption( "nickname" );
1736                 if ( "" == $k ) { $k = $n; }
1737                 if(isset($wgLocaltimezone)) {
1738                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1739                 }
1740                 /* Note: this is an ugly timezone hack for the European wikis */
1741                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1742                   " (" . date( "T" ) . ")";
1743                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1744
1745                 $text = preg_replace( "/~~~~~/", $d, $text );
1746                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1747                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1748                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1749                   Namespace::getUser() ) . ":$n|$k]]", $text );
1750
1751                 # Context links: [[|name]] and [[name (context)|]]
1752                 #
1753                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1754                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1755                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1756                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1757
1758                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1759                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1760                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1761                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1762                                                                                                                 # [[ns:page (cont)|]]
1763                 $context = "";
1764                 $t = $this->mTitle->getText();
1765                 if ( preg_match( $conpat, $t, $m ) ) {
1766                         $context = $m[2];
1767                 }
1768                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1769                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1770                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1771
1772                 if ( "" == $context ) {
1773                         $text = preg_replace( $p2, "[[\\1]]", $text );
1774                 } else {
1775                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1776                 }
1777
1778                 /*
1779                 $mw =& MagicWord::get( MAG_SUBST );
1780                 $wgCurParser = $this->fork();
1781                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1782                 $this->merge( $wgCurParser );
1783                 */
1784
1785                 # Trim trailing whitespace
1786                 # MAG_END (__END__) tag allows for trailing
1787                 # whitespace to be deliberately included
1788                 $text = rtrim( $text );
1789                 $mw =& MagicWord::get( MAG_END );
1790                 $mw->matchAndRemove( $text );
1791
1792                 return $text;
1793         }
1794
1795         # Set up some variables which are usually set up in parse()
1796         # so that an external function can call some class members with confidence
1797         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1798         {
1799                 $this->mTitle =& $title;
1800                 $this->mOptions = $options;
1801                 $this->mOutputType = $outputType;
1802                 if ( $clearState ) {
1803                         $this->clearState();
1804                 }
1805         }
1806
1807         function transformMsg( $text, $options ) {
1808                 global $wgTitle;
1809                 static $executing = false;
1810
1811                 # Guard against infinite recursion
1812                 if ( $executing ) {
1813                         return $text;
1814                 }
1815                 $executing = true;
1816
1817                 $this->mTitle = $wgTitle;
1818                 $this->mOptions = $options;
1819                 $this->mOutputType = OT_MSG;
1820                 $this->clearState();
1821                 $text = $this->replaceVariables( $text );
1822
1823                 $executing = false;
1824                 return $text;
1825         }
1826 }
1827
1828 class ParserOutput
1829 {
1830         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1831
1832         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1833                 $containsOldMagic = false )
1834         {
1835                 $this->mText = $text;
1836                 $this->mLanguageLinks = $languageLinks;
1837                 $this->mCategoryLinks = $categoryLinks;
1838                 $this->mContainsOldMagic = $containsOldMagic;
1839         }
1840
1841         function getText() { return $this->mText; }
1842         function getLanguageLinks() { return $this->mLanguageLinks; }
1843         function getCategoryLinks() { return $this->mCategoryLinks; }
1844         function containsOldMagic() { return $this->mContainsOldMagic; }
1845         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1846         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1847         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1848         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1849
1850         function merge( $other ) {
1851                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1852                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1853                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1854         }
1855
1856 }
1857
1858 class ParserOptions
1859 {
1860         # All variables are private
1861         var $mUseTeX;                    # Use texvc to expand <math> tags
1862         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1863         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1864         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1865         var $mAllowExternalImages;       # Allow external images inline
1866         var $mSkin;                      # Reference to the preferred skin
1867         var $mDateFormat;                # Date format index
1868         var $mEditSection;               # Create "edit section" links
1869         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1870         var $mNumberHeadings;            # Automatically number headings
1871         var $mShowToc;                   # Show table of contents
1872
1873         function getUseTeX() { return $this->mUseTeX; }
1874         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1875         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1876         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1877         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1878         function getSkin() { return $this->mSkin; }
1879         function getDateFormat() { return $this->mDateFormat; }
1880         function getEditSection() { return $this->mEditSection; }
1881         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1882         function getNumberHeadings() { return $this->mNumberHeadings; }
1883         function getShowToc() { return $this->mShowToc; }
1884
1885         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1886         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1887         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1888         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1889         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1890         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1891         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1892         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1893         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1894         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1895         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1896
1897         /* static */ function newFromUser( &$user )
1898         {
1899                 $popts = new ParserOptions;
1900                 $popts->initialiseFromUser( &$user );
1901                 return $popts;
1902         }
1903
1904         function initialiseFromUser( &$userInput )
1905         {
1906                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1907
1908                 if ( !$userInput ) {
1909                         $user = new User;
1910                         $user->setLoaded( true );
1911                 } else {
1912                         $user =& $userInput;
1913                 }
1914
1915                 $this->mUseTeX = $wgUseTeX;
1916                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1917                 $this->mUseDynamicDates = $wgUseDynamicDates;
1918                 $this->mInterwikiMagic = $wgInterwikiMagic;
1919                 $this->mAllowExternalImages = $wgAllowExternalImages;
1920                 $this->mSkin =& $user->getSkin();
1921                 $this->mDateFormat = $user->getOption( "date" );
1922                 $this->mEditSection = $user->getOption( "editsection" );
1923                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1924                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1925                 $this->mShowToc = $user->getOption( "showtoc" );
1926         }
1927
1928
1929 }
1930
1931 # Regex callbacks, used in Parser::replaceVariables
1932 function wfBraceSubstitution( $matches )
1933 {
1934         global $wgCurParser;
1935         return $wgCurParser->braceSubstitution( $matches );
1936 }
1937
1938 ?>