includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 # prefix for escaping, used in two functions at least
  47 define( "UNIQ_PREFIX", "NaodW29");
  48
  49 class Parser
  50 {
  51         # Cleared with clearState():
  52         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  53         var $mVariables, $mIncludeCount;
  54
  55         # Temporary:
  56         var $mOptions, $mTitle, $mOutputType;
  57
  58         function Parser()
  59         {
  60                 $this->clearState();
  61         }
  62
  63         function clearState()
  64         {
  65                 $this->mOutput = new ParserOutput;
  66                 $this->mAutonumber = 0;
  67                 $this->mLastSection = "";
  68                 $this->mDTopen = false;
  69                 $this->mVariables = false;
  70                 $this->mIncludeCount = array();
  71                 $this->mStripState = array();
  72         }
  73
  74         # First pass--just handle <nowiki> sections, pass the rest off
  75         # to doWikiPass2() which does all the real work.
  76         #
  77         # Returns a ParserOutput
  78         #
  79         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  80         {
  81                 $fname = "Parser::parse";
  82                 wfProfileIn( $fname );
  83
  84                 if ( $clearState ) {
  85                         $this->clearState();
  86                 }
  87
  88                 $this->mOptions = $options;
  89                 $this->mTitle =& $title;
  90                 $this->mOutputType = OT_HTML;
  91
  92                 $stripState = NULL;
  93                 $text = $this->strip( $text, $this->mStripState );
  94                 $text = $this->doWikiPass2( $text, $linestart );
  95                 $text = $this->unstrip( $text, $this->mStripState );
  96
  97                 $this->mOutput->setText( $text );
  98                 wfProfileOut( $fname );
  99                 return $this->mOutput;
 100         }
 101
 102         /* static */ function getRandomString()
 103         {
 104                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 105         }
 106
 107         # Replaces all occurences of <$tag>content</$tag> in the text
 108         # with a random marker and returns the new text. the output parameter
 109         # $content will be an associative array filled with data on the form
 110         # $unique_marker => content.
 111
 112         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 113                 $result = array();
 114                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 115                 $content = array( );
 116                 $n = 1;
 117                 $stripped = "";
 118
 119                 while ( "" != $text ) {
 120                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 121                         $stripped .= $p[0];
 122                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 123                                 $text = "";
 124                         } else {
 125                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 126                                 $marker = $rnd . sprintf("%08X", $n++);
 127                                 $content[$marker] = $q[0];
 128                                 $stripped .= $marker;
 129                                 $text = $q[1];
 130                         }
 131                 }
 132                 return $stripped;
 133         }
 134
 135         # Strips <nowiki>, <pre> and <math>
 136         # Returns the text, and fills an array with data needed in unstrip()
 137         #
 138         function strip( $text, &$state )
 139         {
 140                 $render = ($this->mOutputType == OT_HTML);
 141                 $nowiki_content = array();
 142                 $hiero_content = array();
 143                 $math_content = array();
 144                 $pre_content = array();
 145
 146                 # Replace any instances of the placeholders
 147                 $uniq_prefix = UNIQ_PREFIX;
 148                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 149
 150                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 151                 foreach( $nowiki_content as $marker => $content ){
 152                         if( $render ){
 153                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 154                         } else {
 155                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 156                         }
 157                 }
 158
 159                 if( $GLOBALS['wgUseWikiHiero'] ){
 160                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 161                         foreach( $hiero_content as $marker => $content ){
 162                                 if( $render ){
 163                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 164                                 } else {
 165                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 166                                 }
 167                         }
 168                 }
 169
 170                 if( $this->mOptions->getUseTeX() ){
 171                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 172                         foreach( $math_content as $marker => $content ){
 173                                 if( $render ){
 174                                         $math_content[$marker] = renderMath( $content );
 175                                 } else {
 176                                         $math_content[$marker] = "<math>$content</math>";
 177                                 }
 178                         }
 179                 }
 180
 181                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 182                 foreach( $pre_content as $marker => $content ){
 183                         if( $render ){
 184                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 185                         } else {
 186                                 $pre_content[$marker] = "<pre>$content</pre>";
 187                         }
 188                 }
 189
 190                 # Must expand in reverse order, otherwise nested tags will be corrupted
 191                 $state = array( $pre_content, $math_content, $hiero_content, $nowiki_content );
 192                 return $text;
 193         }
 194
 195         function unstrip( $text, &$state )
 196         {
 197                 foreach( $state as $content_dict ){
 198                         foreach( $content_dict as $marker => $content ){
 199                                 $text = str_replace( $marker, $content, $text );
 200                         }
 201                 }
 202                 return $text;
 203         }
 204
 205         function categoryMagic ()
 206         {
 207                 global $wgLang , $wgUser ;
 208                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 209                 $id = $this->mTitle->getArticleID() ;
 210                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 211                 $ti = $this->mTitle->getText() ;
 212                 $ti = explode ( ":" , $ti , 2 ) ;
 213                 if ( $cat != $ti[0] ) return "" ;
 214                 $r = "<br break='all' />\n" ;
 215
 216                 $articles = array() ;
 217                 $parents = array () ;
 218                 $children = array() ;
 219
 220
 221 #               $sk =& $this->mGetSkin();
 222                 $sk =& $wgUser->getSkin() ;
 223
 224                 $data = array () ;
 225                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 226                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 227
 228                 $res = wfQuery ( $sql1, DB_READ ) ;
 229                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 230
 231                 $res = wfQuery ( $sql2, DB_READ ) ;
 232                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 233
 234
 235                 foreach ( $data AS $x )
 236                 {
 237                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 238                         if ( $t != "" ) $t .= ":" ;
 239                         $t .= $x->cur_title ;
 240
 241                         $y = explode ( ":" , $t , 2 ) ;
 242                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 243                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 244                         } else {
 245                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 246                         }
 247                 }
 248                 wfFreeResult ( $res ) ;
 249
 250                 # Children
 251                 if ( count ( $children ) > 0 )
 252                 {
 253                         asort ( $children ) ;
 254                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 255                         $r .= implode ( ", " , $children ) ;
 256                 }
 257
 258                 # Articles
 259                 if ( count ( $articles ) > 0 )
 260                 {
 261                         asort ( $articles ) ;
 262                         $h =  wfMsg( "category_header", $ti[1] );
 263                         $r .= "<h2>{$h}</h2>\n" ;
 264                         $r .= implode ( ", " , $articles ) ;
 265                 }
 266
 267
 268                 return $r ;
 269         }
 270
 271         function getHTMLattrs ()
 272         {
 273                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 274                                 "title", "align", "lang", "dir", "width", "height",
 275                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 276                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 277                                 /* FONT */ "type", "start", "value", "compact",
 278                                 /* For various lists, mostly deprecated but safe */
 279                                 "summary", "width", "border", "frame", "rules",
 280                                 "cellspacing", "cellpadding", "valign", "char",
 281                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 282                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 283                                 "id", "class", "name", "style" /* For CSS */
 284                                 );
 285                 return $htmlattrs ;
 286         }
 287
 288         function fixTagAttributes ( $t )
 289         {
 290                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 291                 $htmlattrs = $this->getHTMLattrs() ;
 292
 293                 # Strip non-approved attributes from the tag
 294                 $t = preg_replace(
 295                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 296                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 297                         $t);
 298                 # Strip javascript "expression" from stylesheets. Brute force approach:
 299                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 300
 301                 if( preg_match(
 302                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 303                         wfMungeToUtf8( $t ) ) )
 304                 {
 305                         $t="";
 306                 }
 307
 308                 return trim ( $t ) ;
 309         }
 310
 311         function doTableStuff ( $t )
 312         {
 313                 $t = explode ( "\n" , $t ) ;
 314                 $td = array () ; # Is currently a td tag open?
 315                         $ltd = array () ; # Was it TD or TH?
 316                         $tr = array () ; # Is currently a tr tag open?
 317                         $ltr = array () ; # tr attributes
 318                         foreach ( $t AS $k => $x )
 319                         {
 320                                 $x = rtrim ( $x ) ;
 321                                 $fc = substr ( $x , 0 , 1 ) ;
 322                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 323                                 {
 324                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 325                                         array_push ( $td , false ) ;
 326                                         array_push ( $ltd , "" ) ;
 327                                         array_push ( $tr , false ) ;
 328                                         array_push ( $ltr , "" ) ;
 329                                 }
 330                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 331                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 332                                 {
 333                                         $z = "</table>\n" ;
 334                                         $l = array_pop ( $ltd ) ;
 335                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 336                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 337                                         array_pop ( $ltr ) ;
 338                                         $t[$k] = $z ;
 339                                 }
 340                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 341                                                 {
 342                                                 $z = trim ( substr ( $x , 2 ) ) ;
 343                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 344                                                 }*/
 345                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 346                                 {
 347                                         $x = substr ( $x , 1 ) ;
 348                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 349                                         $z = "" ;
 350                                         $l = array_pop ( $ltd ) ;
 351                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 352                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 353                                         array_pop ( $ltr ) ;
 354                                         $t[$k] = $z ;
 355                                         array_push ( $tr , false ) ;
 356                                         array_push ( $td , false ) ;
 357                                         array_push ( $ltd , "" ) ;
 358                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 359                                 }
 360                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 361                                 {
 362                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 363                                         {
 364                                                 $fc = "+" ;
 365                                                 $x = substr ( $x , 1 ) ;
 366                                         }
 367                                         $after = substr ( $x , 1 ) ;
 368                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 369                                         $after = explode ( "||" , $after ) ;
 370                                         $t[$k] = "" ;
 371                                         foreach ( $after AS $theline )
 372                                         {
 373                                                 $z = "" ;
 374                                                 if ( $fc != "+" )
 375                                                 {
 376                                                         $tra = array_pop ( $ltr ) ;
 377                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 378                                                         array_push ( $tr , true ) ;
 379                                                         array_push ( $ltr , "" ) ;
 380                                                 }
 381
 382                                                 $l = array_pop ( $ltd ) ;
 383                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 384                                                 if ( $fc == "|" ) $l = "td" ;
 385                                                 else if ( $fc == "!" ) $l = "th" ;
 386                                                 else if ( $fc == "+" ) $l = "caption" ;
 387                                                 else $l = "" ;
 388                                                 array_push ( $ltd , $l ) ;
 389                                                 $y = explode ( "|" , $theline , 2 ) ;
 390                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 391                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 392                                                 $t[$k] .= $y ;
 393                                                 array_push ( $td , true ) ;
 394                                         }
 395                                 }
 396                         }
 397
 398                 # Closing open td, tr && table
 399                 while ( count ( $td ) > 0 )
 400                 {
 401                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 402                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 403                         $t[] = "</table>" ;
 404                 }
 405
 406                 $t = implode ( "\n" , $t ) ;
 407                 #               $t = $this->removeHTMLtags( $t );
 408                 return $t ;
 409         }
 410
 411         # Well, OK, it's actually about 14 passes.  But since all the
 412         # hard lifting is done inside PHP's regex code, it probably
 413         # wouldn't speed things up much to add a real parser.
 414         #
 415         function doWikiPass2( $text, $linestart )
 416         {
 417                 $fname = "Parser::doWikiPass2";
 418                 wfProfileIn( $fname );
 419
 420                 $text = $this->removeHTMLtags( $text );
 421                 $text = $this->replaceVariables( $text );
 422
 423                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 424
 425                 $text = $this->doHeadings( $text );
 426
 427                 if($this->mOptions->getUseDynamicDates()) {
 428                         global $wgDateFormatter;
 429                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 430                 }
 431
 432                 $text = $this->replaceExternalLinks( $text );
 433                 $text = $this->doBlockLevels( $text, $linestart );
 434                 $text = $this->doTokenizedParser ( $text );
 435                 $text = $this->doTableStuff ( $text ) ;
 436
 437                 $text = $this->formatHeadings( $text );
 438
 439                 $sk =& $this->mOptions->getSkin();
 440                 $text = $sk->transformContent( $text );
 441                 $fixtags = array(
 442                         "/<hr *>/i" => '<hr/>',
 443                         "/<br *>/i" => '<br/>',
 444                         "/<center *>/i"=>'<span style="text-align:center;">',
 445                         "/<\\/center *>/i" => '</span>'
 446                 );
 447                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 448
 449                 $text .= $this->categoryMagic () ;
 450
 451                 wfProfileOut( $fname );
 452                 return $text;
 453         }
 454
 455
 456         /* private */ function doHeadings( $text )
 457         {
 458                 for ( $i = 6; $i >= 1; --$i ) {
 459                         $h = substr( "======", 0, $i );
 460                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 461                           "<h{$i}>\\1</h{$i}>\\2", $text );
 462                 }
 463                 return $text;
 464         }
 465
 466         # Note: we have to do external links before the internal ones,
 467         # and otherwise take great care in the order of things here, so
 468         # that we don't end up interpreting some URLs twice.
 469
 470         /* private */ function replaceExternalLinks( $text )
 471         {
 472                 $fname = "Parser::replaceExternalLinks";
 473                 wfProfileIn( $fname );
 474                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 475                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 476                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 477                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 478                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 479                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 480                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 481                 wfProfileOut( $fname );
 482                 return $text;
 483         }
 484
 485         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 486         {
 487                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 488                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 489
 490                 # this is  the list of separators that should be ignored if they
 491                 # are the last character of an URL but that should be included
 492                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 493                 # in this case, the last comma should not become part of the URL,
 494                 # but in "www.foo.com/123,2342,32.htm" it should.
 495                 $sep = ",;\.:";
 496                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 497                 $images = "gif|png|jpg|jpeg";
 498
 499                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 500                 # they are interpreted as part of the string (used to tell PHP
 501                 # that the content of the string should be inserted there).
 502                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 503                   "((?i){$images})([^{$uc}]|$)/";
 504
 505                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 506                 $sk =& $this->mOptions->getSkin();
 507
 508                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 509                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 510                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 511                 }
 512                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 513                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 514                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 515                   "</a>\\5", $s );
 516                 $s = str_replace( $unique, $protocol, $s );
 517
 518                 $a = explode( "[{$protocol}:", " " . $s );
 519                 $s = array_shift( $a );
 520                 $s = substr( $s, 1 );
 521
 522                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 523                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 524
 525                 foreach ( $a as $line ) {
 526                         if ( preg_match( $e1, $line, $m ) ) {
 527                                 $link = "{$protocol}:{$m[1]}";
 528                                 $trail = $m[2];
 529                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 530                                 else { $text = wfEscapeHTML( $link ); }
 531                         } else if ( preg_match( $e2, $line, $m ) ) {
 532                                 $link = "{$protocol}:{$m[1]}";
 533                                 $text = $m[2];
 534                                 $trail = $m[3];
 535                         } else {
 536                                 $s .= "[{$protocol}:" . $line;
 537                                 continue;
 538                         }
 539                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 540                                 $paren = "";
 541                         } else {
 542                                 # Expand the URL for printable version
 543                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 544                         }
 545                         $la = $sk->getExternalLinkAttributes( $link, $text );
 546                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 547
 548                 }
 549                 return $s;
 550         }
 551
 552         /* private */ function handle3Quotes( &$state, $token )
 553         {
 554                 if ( $state["strong"] !== false ) {
 555                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 556                         {
 557                                 # ''' lala ''lala '''
 558                                 $s = "</em></strong><em>";
 559                         } else {
 560                                 $s = "</strong>";
 561                         }
 562                         $state["strong"] = FALSE;
 563                 } else {
 564                         $s = "<strong>";
 565                         $state["strong"] = $token["pos"];
 566                 }
 567                 return $s;
 568         }
 569
 570         /* private */ function handle2Quotes( &$state, $token )
 571         {
 572                 if ( $state["em"] !== false ) {
 573                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 574                         {
 575                                 # ''lala'''lala'' ....'''
 576                                 $s = "</strong></em><strong>";
 577                         } else {
 578                                 $s = "</em>";
 579                         }
 580                         $state["em"] = FALSE;
 581                 } else {
 582                         $s = "<em>";
 583                         $state["em"] = $token["pos"];
 584                 }
 585                 return $s;
 586         }
 587
 588         /* private */ function handle5Quotes( &$state, $token )
 589         {
 590                 $s = "";
 591                 if ( $state["em"] !== false && $state["strong"] ) {
 592                         if ( $state["em"] < $state["strong"] ) {
 593                                 $s .= "</strong></em>";
 594                         } else {
 595                                 $s .= "</em></strong>";
 596                         }
 597                         $state["strong"] = $state["em"] = FALSE;
 598                 } elseif ( $state["em"] !== false ) {
 599                         $s .= "</em><strong>";
 600                         $state["em"] = FALSE;
 601                         $state["strong"] = $token["pos"];
 602                 } elseif ( $state["strong"] !== false ) {
 603                         $s .= "</strong><em>";
 604                         $state["strong"] = FALSE;
 605                         $state["em"] = $token["pos"];
 606                 } else { # not $em and not $strong
 607                         $s .= "<strong><em>";
 608                         $state["strong"] = $state["em"] = $token["pos"];
 609                 }
 610                 return $s;
 611         }
 612
 613         /* private */ function doTokenizedParser( $str )
 614         {
 615                 global $wgLang; # for language specific parser hook
 616
 617                 $tokenizer=Tokenizer::newFromString( $str );
 618                 $tokenStack = array();
 619
 620                 $s="";
 621                 $state["em"]      = FALSE;
 622                 $state["strong"]  = FALSE;
 623                 $tagIsOpen = FALSE;
 624                 $threeopen = false;
 625
 626                 # The tokenizer splits the text into tokens and returns them one by one.
 627                 # Every call to the tokenizer returns a new token.
 628                 while ( $token = $tokenizer->nextToken() )
 629                 {
 630                         switch ( $token["type"] )
 631                         {
 632                                 case "text":
 633                                         # simple text with no further markup
 634                                         $txt = $token["text"];
 635                                         break;
 636                                 case "[[[":
 637                                         # remember the tag opened with 3 [
 638                                         $threeopen = true;
 639                                 case "[[":
 640                                         # link opening tag.
 641                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 642                                         $tagIsOpen = TRUE;
 643                                         array_push( $tokenStack, $token );
 644                                         $txt="";
 645                                         break;
 646
 647                                 case "]]]":
 648                                 case "]]":
 649                                         # link close tag.
 650                                         # get text from stack, glue it together, and call the code to handle a
 651                                         # link
 652
 653                                         if ( count( $tokenStack ) == 0 )
 654                                         {
 655                                                 # stack empty. Found a ]] without an opening [[
 656                                                 $txt = "]]";
 657                                         } else {
 658                                                 $linkText = "";
 659                                                 $lastToken = array_pop( $tokenStack );
 660                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 661                                                 {
 662                                                         if( !empty( $lastToken["text"] ) ) {
 663                                                                 $linkText = $lastToken["text"] . $linkText;
 664                                                         }
 665                                                         $lastToken = array_pop( $tokenStack );
 666                                                 }
 667
 668                                                 $txt = $linkText ."]]";
 669
 670                                                 if( isset( $lastToken["text"] ) ) {
 671                                                         $prefix = $lastToken["text"];
 672                                                 } else {
 673                                                         $prefix = "";
 674                                                 }
 675                                                 $nextToken = $tokenizer->previewToken();
 676                                                 if ( $nextToken["type"] == "text" )
 677                                                 {
 678                                                         # Preview just looks at it. Now we have to fetch it.
 679                                                         $nextToken = $tokenizer->nextToken();
 680                                                         $txt .= $nextToken["text"];
 681                                                 }
 682                                                 $fakestate = $this->mStripState;
 683                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
 684
 685                                                 # did the tag start with 3 [ ?
 686                                                 if($threeopen) {
 687                                                         # show the first as text
 688                                                         $txt = "[".$txt;
 689                                                         $threeopen=false;
 690                                                 }
 691
 692                                         }
 693                                         $tagIsOpen = (count( $tokenStack ) != 0);
 694                                         break;
 695                                 case "----":
 696                                         $txt = "\n<hr />\n";
 697                                         break;
 698                                 case "'''":
 699                                         # This and the three next ones handle quotes
 700                                         $txt = $this->handle3Quotes( $state, $token );
 701                                         break;
 702                                 case "''":
 703                                         $txt = $this->handle2Quotes( $state, $token );
 704                                         break;
 705                                 case "'''''":
 706                                         $txt = $this->handle5Quotes( $state, $token );
 707                                         break;
 708                                 case "":
 709                                         # empty token
 710                                         $txt="";
 711                                         break;
 712                                 case "RFC ":
 713                                         if ( $tagIsOpen ) {
 714                                                 $txt = "RFC ";
 715                                         } else {
 716                                                 $txt = $this->doMagicRFC( $tokenizer );
 717                                         }
 718                                         break;
 719                                 case "ISBN ":
 720                                         if ( $tagIsOpen ) {
 721                                                 $txt = "ISBN ";
 722                                         } else {
 723                                                 $txt = $this->doMagicISBN( $tokenizer );
 724                                         }
 725                                         break;
 726                                 default:
 727                                         # Call language specific Hook.
 728                                         $txt = $wgLang->processToken( $token, $tokenStack );
 729                                         if ( NULL == $txt ) {
 730                                                 # An unkown token. Highlight.
 731                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 732                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 733                                         }
 734                                         break;
 735                         }
 736                         # If we're parsing the interior of a link, don't append the interior to $s,
 737                         # but push it to the stack so it can be processed when a ]] token is found.
 738                         if ( $tagIsOpen  && $txt != "" ) {
 739                                 $token["type"] = "text";
 740                                 $token["text"] = $txt;
 741                                 array_push( $tokenStack, $token );
 742                         } else {
 743                                 $s .= $txt;
 744                         }
 745                 } #end while
 746                 if ( count( $tokenStack ) != 0 )
 747                 {
 748                         # still objects on stack. opened [[ tag without closing ]] tag.
 749                         $txt = "";
 750                         while ( $lastToken = array_pop( $tokenStack ) )
 751                         {
 752                                 if ( $lastToken["type"] == "text" )
 753                                 {
 754                                         $txt = $lastToken["text"] . $txt;
 755                                 } else {
 756                                         $txt = $lastToken["type"] . $txt;
 757                                 }
 758                         }
 759                         $s .= $txt;
 760                 }
 761                 return $s;
 762         }
 763
 764         /* private */ function handleInternalLink( $line, $prefix )
 765         {
 766                 global $wgLang, $wgLinkCache;
 767                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 768                 static $fname = "Parser::handleInternalLink" ;
 769                 wfProfileIn( $fname );
 770
 771                 wfProfileIn( "$fname-setup" );
 772                 static $tc = FALSE;
 773                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 774                 $sk =& $this->mOptions->getSkin();
 775
 776                 # Match a link having the form [[namespace:link|alternate]]trail
 777                 static $e1 = FALSE;
 778                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 779                 # Match the end of a line for a word that's not followed by whitespace,
 780                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 781                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 782                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 783                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 784
 785
 786                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 787                 static $image = FALSE;
 788                 static $special = FALSE;
 789                 static $media = FALSE;
 790                 static $category = FALSE;
 791                 if ( !$image ) { $image = Namespace::getImage(); }
 792                 if ( !$special ) { $special = Namespace::getSpecial(); }
 793                 if ( !$media ) { $media = Namespace::getMedia(); }
 794                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 795
 796                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 797
 798                 wfProfileOut( "$fname-setup" );
 799                 $s = "";
 800
 801                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 802                         $text = $m[2];
 803                         $trail = $m[3];
 804                 } else { # Invalid form; output directly
 805                         $s .= $prefix . "[[" . $line ;
 806                         return $s;
 807                 }
 808
 809                 /* Valid link forms:
 810                 Foobar -- normal
 811                 :Foobar -- override special treatment of prefix (images, language links)
 812                 /Foobar -- convert to CurrentPage/Foobar
 813                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 814                 */
 815                 $c = substr($m[1],0,1);
 816                 $noforce = ($c != ":");
 817                 if( $c == "/" ) { # subpage
 818                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 819                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 820                                 $noslash=$m[1];
 821                         } else {
 822                                 $noslash=substr($m[1],1);
 823                         }
 824                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 825                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 826                                 if( "" == $text ) {
 827                                         $text= $m[1];
 828                                 } # this might be changed for ugliness reasons
 829                         } else {
 830                                 $link = $noslash; # no subpage allowed, use standard link
 831                         }
 832                 } elseif( $noforce ) { # no subpage
 833                         $link = $m[1];
 834                 } else {
 835                         $link = substr( $m[1], 1 );
 836                 }
 837                 if( "" == $text )
 838                         $text = $link;
 839
 840                 $nt = Title::newFromText( $link );
 841                 if( !$nt ) {
 842                         $s .= $prefix . "[[" . $line;
 843                         return $s;
 844                 }
 845                 $ns = $nt->getNamespace();
 846                 $iw = $nt->getInterWiki();
 847                 if( $noforce ) {
 848                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 849                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 850                                 return (trim($s) == '')? '': $s;
 851                         }
 852                         if( $ns == $image ) {
 853                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 854                                 $wgLinkCache->addImageLinkObj( $nt );
 855                                 return $s;
 856                         }
 857                 }
 858                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 859                     ( strpos( $link, "#" ) == FALSE ) ) {
 860                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 861                         return $s;
 862                 }
 863
 864                 # Category feature
 865                 $catns = strtoupper ( $nt->getDBkey () ) ;
 866                 $catns = explode ( ":" , $catns ) ;
 867                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 868                 else $catns = "" ;
 869                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 870                         $t = explode ( ":" , $nt->getText() ) ;
 871                         array_shift ( $t ) ;
 872                         $t = implode ( ":" , $t ) ;
 873                         $t = $wgLang->ucFirst ( $t ) ;
 874                         $nnt = Title::newFromText ( $category.":".$t ) ;
 875                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 876                         $this->mOutput->mCategoryLinks[] = $t ;
 877                         $s .= $prefix . $trail ;
 878                         return $s ;
 879                 }
 880
 881                 if( $ns == $media ) {
 882                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 883                         $wgLinkCache->addImageLinkObj( $nt );
 884                         return $s;
 885                 } elseif( $ns == $special ) {
 886                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 887                         return $s;
 888                 }
 889                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 890
 891                 wfProfileOut( $fname );
 892                 return $s;
 893         }
 894
 895         # Some functions here used by doBlockLevels()
 896         #
 897         /* private */ function closeParagraph()
 898         {
 899                 $result = "";
 900                 if ( '' != $this->mLastSection ) {
 901                         $result = "</" . $this->mLastSection  . ">";
 902                 }
 903                 $this->mLastSection = "";
 904                 return $result."\n";
 905         }
 906         # getCommon() returns the length of the longest common substring
 907         # of both arguments, starting at the beginning of both.
 908         #
 909         /* private */ function getCommon( $st1, $st2 )
 910         {
 911                 $fl = strlen( $st1 );
 912                 $shorter = strlen( $st2 );
 913                 if ( $fl < $shorter ) { $shorter = $fl; }
 914
 915                 for ( $i = 0; $i < $shorter; ++$i ) {
 916                         if ( $st1{$i} != $st2{$i} ) { break; }
 917                 }
 918                 return $i;
 919         }
 920         # These next three functions open, continue, and close the list
 921         # element appropriate to the prefix character passed into them.
 922         #
 923         /* private */ function openList( $char )
 924     {
 925                 $result = $this->closeParagraph();
 926
 927                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 928                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 929                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 930                 else if ( ";" == $char ) {
 931                         $result .= "<dl><dt>";
 932                         $this->mDTopen = true;
 933                 }
 934                 else { $result = "<!-- ERR 1 -->"; }
 935
 936                 return $result;
 937         }
 938
 939         /* private */ function nextItem( $char )
 940         {
 941                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 942                 else if ( ":" == $char || ";" == $char ) {
 943                         $close = "</dd>";
 944                         if ( $this->mDTopen ) { $close = "</dt>"; }
 945                         if ( ";" == $char ) {
 946                                 $this->mDTopen = true;
 947                                 return $close . "<dt>";
 948                         } else {
 949                                 $this->mDTopen = false;
 950                                 return $close . "<dd>";
 951                         }
 952                 }
 953                 return "<!-- ERR 2 -->";
 954         }
 955
 956         /* private */function closeList( $char )
 957         {
 958                 if ( "*" == $char ) { $text = "</li></ul>"; }
 959                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 960                 else if ( ":" == $char ) {
 961                         if ( $this->mDTopen ) {
 962                                 $this->mDTopen = false;
 963                                 $text = "</dt></dl>";
 964                         } else {
 965                                 $text = "</dd></dl>";
 966                         }
 967                 }
 968                 else {  return "<!-- ERR 3 -->"; }
 969                 return $text."\n";
 970         }
 971
 972         /* private */ function doBlockLevels( $text, $linestart )
 973         {
 974                 $fname = "Parser::doBlockLevels";
 975                 wfProfileIn( $fname );
 976                 # Parsing through the text line by line.  The main thing
 977                 # happening here is handling of block-level elements p, pre,
 978                 # and making lists from lines starting with * # : etc.
 979                 #
 980                 $a = explode( "\n", $text );
 981                 $lastPref = $text = '';
 982                 $this->mDTopen = $inBlockElem = false;
 983
 984                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 985                 foreach ( $a as $t ) {
 986                         if ( "" != $text ) { $text .= "\n"; }
 987
 988                         $oLine = $t;
 989                         $opl = strlen( $lastPref );
 990                         $npl = strspn( $t, "*#:;" );
 991                         $pref = substr( $t, 0, $npl );
 992                         $pref2 = str_replace( ";", ":", $pref );
 993                         $t = substr( $t, $npl );
 994
 995                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 996                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 997
 998                                 if ( ";" == substr( $pref, -1 ) ) {
 999                                         $cpos = strpos( $t, ":" );
1000                                         if ( ! ( false === $cpos ) ) {
1001                                                 $term = substr( $t, 0, $cpos );
1002                                                 $text .= $term . $this->nextItem( ":" );
1003                                                 $t = substr( $t, $cpos + 1 );
1004                                         }
1005                                 }
1006                         } else if (0 != $npl || 0 != $opl) {
1007                                 $cpl = $this->getCommon( $pref, $lastPref );
1008
1009                                 while ( $cpl < $opl ) {
1010                                         $text .= $this->closeList( $lastPref{$opl-1} );
1011                                         --$opl;
1012                                 }
1013                                 if ( $npl <= $cpl && $cpl > 0 ) {
1014                                         $text .= $this->nextItem( $pref{$cpl-1} );
1015                                 }
1016                                 while ( $npl > $cpl ) {
1017                                         $char = substr( $pref, $cpl, 1 );
1018                                         $text .= $this->openList( $char );
1019
1020                                         if ( ";" == $char ) {
1021                                                 $cpos = strpos( $t, ":" );
1022                                                 if ( ! ( false === $cpos ) ) {
1023                                                         $term = substr( $t, 0, $cpos );
1024                                                         $text .= $term . $this->nextItem( ":" );
1025                                                         $t = substr( $t, $cpos + 1 );
1026                                                 }
1027                                         }
1028                                         ++$cpl;
1029                                 }
1030                                 $lastPref = $pref2;
1031                         }
1032                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
1033                                 $uniq_prefix = UNIQ_PREFIX;
1034                                 if ( preg_match(
1035                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div)/i", $t ) ) {
1036                                         $text .= $this->closeParagraph();
1037                                         $inBlockElem = true;
1038                                 } else if ( preg_match("/(<hr|<\\/td|".$uniq_prefix."-pre)/i", $t ) ) {
1039                                         $text .= $this->closeParagraph();
1040                                         $inBlockElem = false;
1041                                 }
1042                                 if ( ! $inBlockElem ) {
1043                                         if ( " " == $t{0} ) {
1044                                                 $newSection = "pre";
1045                                                 $text .= $this->closeParagraph();
1046                                                 # $t = wfEscapeHTML( $t );
1047                                         }
1048                                         else { $newSection = "p"; }
1049
1050                                         if ( '' == trim( $oLine ) ) {
1051                                                 if ( $this->mLastSection != 'p') {
1052                                                         $text .= $this->closeParagraph();
1053                                                         $text .= "<" . $newSection . ">";
1054                                                         $this->mLastSection = $newSection;
1055                                                 } else if ( $this->mLastSection == 'p' and '' == $oLine) {
1056                                                         $text .= '<p>';
1057                                                 }
1058                                         } else if ( $this->mLastSection == $newSection and $newSection != 'p' ) {
1059                                                 $text .= $this->closeParagraph();
1060                                                 $text .= "<" . $newSection . ">";
1061                                                 $this->mLastSection = $newSection;
1062                                         }
1063                                 }
1064                                 if ( $inBlockElem &&
1065                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|<\\/p<\\/div)/i", $t ) ) {
1066                                         $inBlockElem = false;
1067                                 }
1068                         }
1069                         $text .= $t;
1070                 }
1071                 while ( $npl ) {
1072                         $text .= $this->closeList( $pref2{$npl-1} );
1073                         --$npl;
1074                 }
1075                 if ( "" != $this->mLastSection ) {
1076                         $text .= "</" . $this->mLastSection . ">";
1077                         $this->mLastSection = "";
1078                 }
1079                 wfProfileOut( $fname );
1080                 return $text;
1081         }
1082
1083         function getVariableValue( $index ) {
1084                 global $wgLang, $wgSitename, $wgServer;
1085
1086                 switch ( $index ) {
1087                         case MAG_CURRENTMONTH:
1088                                 return date( "m" );
1089                         case MAG_CURRENTMONTHNAME:
1090                                 return $wgLang->getMonthName( date("n") );
1091                         case MAG_CURRENTMONTHNAMEGEN:
1092                                 return $wgLang->getMonthNameGen( date("n") );
1093                         case MAG_CURRENTDAY:
1094                                 return date("j");
1095                         case MAG_CURRENTDAYNAME:
1096                                 return $wgLang->getWeekdayName( date("w")+1 );
1097                         case MAG_CURRENTYEAR:
1098                                 return date( "Y" );
1099                         case MAG_CURRENTTIME:
1100                                 return $wgLang->time( wfTimestampNow(), false );
1101                         case MAG_NUMBEROFARTICLES:
1102                                 return wfNumberOfArticles();
1103                         case MAG_SITENAME:
1104                                 return $wgSitename;
1105                         case MAG_SERVER:
1106                                 return $wgServer;
1107                         default:
1108                                 return NULL;
1109                 }
1110         }
1111
1112         function initialiseVariables()
1113         {
1114                 global $wgVariableIDs;
1115                 $this->mVariables = array();
1116                 foreach ( $wgVariableIDs as $id ) {
1117                         $mw =& MagicWord::get( $id );
1118                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1119                 }
1120         }
1121
1122         /* private */ function replaceVariables( $text )
1123         {
1124                 global $wgLang, $wgCurParser;
1125                 global $wgScript, $wgArticlePath;
1126
1127                 $fname = "Parser::replaceVariables";
1128                 wfProfileIn( $fname );
1129
1130                 $bail = false;
1131                 if ( !$this->mVariables ) {
1132                         $this->initialiseVariables();
1133                 }
1134                 $titleChars = Title::legalChars();
1135                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1136
1137                 # "Recursive" variable expansion: run it through a couple of passes
1138                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1139                         $oldText = $text;
1140
1141                         # It's impossible to rebind a global in PHP
1142                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1143                         $wgCurParser = $this->fork();
1144
1145                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1146                         if ( $oldText == $text ) {
1147                                 $bail = true;
1148                         }
1149                         $this->merge( $wgCurParser );
1150                 }
1151
1152                 return $text;
1153         }
1154
1155         # Returns a copy of this object except with various variables cleared
1156         # This copy can be re-merged with the parent after operations on the copy
1157         function fork()
1158         {
1159                 $copy = $this;
1160                 $copy->mOutput = new ParserOutput;
1161                 return $copy;
1162         }
1163
1164         # Merges a copy split off with fork()
1165         function merge( &$copy )
1166         {
1167                 $this->mOutput->merge( $copy->mOutput );
1168
1169                 # Merge include throttling arrays
1170                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1171                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1172                                 $this->mIncludeCount[$dbk] += $count;
1173                         } else {
1174                                 $this->mIncludeCount[$dbk] = $count;
1175                         }
1176                 }
1177         }
1178
1179         function braceSubstitution( $matches )
1180         {
1181                 global $wgLinkCache, $wgLang;
1182                 $fname = "Parser::braceSubstitution";
1183                 $found = false;
1184                 $nowiki = false;
1185
1186                 $text = $matches[1];
1187
1188                 # SUBST
1189                 $mwSubst =& MagicWord::get( MAG_SUBST );
1190                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1191                         if ( $this->mOutputType != OT_WIKI ) {
1192                                 # Invalid SUBST not replaced at PST time
1193                                 # Return without further processing
1194                                 $text = $matches[0];
1195                                 $found = true;
1196                         }
1197                 } elseif ( $this->mOutputType == OT_WIKI ) {
1198                         # SUBST not found in PST pass, do nothing
1199                         $text = $matches[0];
1200                         $found = true;
1201                 }
1202
1203                 # MSG, MSGNW and INT
1204                 if ( !$found ) {
1205                         # Check for MSGNW:
1206                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1207                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1208                                 $nowiki = true;
1209                         } else {
1210                                 # Remove obsolete MSG:
1211                                 $mwMsg =& MagicWord::get( MAG_MSG );
1212                                 $mwMsg->matchStartAndRemove( $text );
1213                         }
1214
1215                         # Check if it is an internal message
1216                         $mwInt =& MagicWord::get( MAG_INT );
1217                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1218                                 $text = wfMsg( $text );
1219                                 $found = true;
1220                         }
1221                 }
1222
1223                 # NS
1224                 if ( !$found ) {
1225                         # Check for NS: (namespace expansion)
1226                         $mwNs = MagicWord::get( MAG_NS );
1227                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1228                                 if ( intval( $text ) ) {
1229                                         $text = $wgLang->getNsText( intval( $text ) );
1230                                         $found = true;
1231                                 } else {
1232                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1233                                         if ( !is_null( $index ) ) {
1234                                                 $text = $wgLang->getNsText( $index );
1235                                                 $found = true;
1236                                         }
1237                                 }
1238                         }
1239                 }
1240
1241                 # LOCALURL and LOCALURLE
1242                 if ( !$found ) {
1243                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1244                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1245
1246                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1247                                 $func = 'getLocalURL';
1248                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1249                                 $func = 'escapeLocalURL';
1250                         } else {
1251                                 $func = '';
1252                         }
1253
1254                         if ( $func !== '' ) {
1255                                 $args = explode( "|", $text );
1256                                 $n = count( $args );
1257                                 if ( $n > 0 ) {
1258                                         $title = Title::newFromText( $args[0] );
1259                                         if ( !is_null( $title ) ) {
1260                                                 if ( $n > 1 ) {
1261                                                         $text = $title->$func( $args[1] );
1262                                                 } else {
1263                                                         $text = $title->$func();
1264                                                 }
1265                                                 $found = true;
1266                                         }
1267                                 }
1268                         }
1269                 }
1270
1271                 # Check for a match against internal variables
1272                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1273                         $text = $this->mVariables[$text];
1274                         $found = true;
1275                         $this->mOutput->mContainsOldMagic = true;
1276                 }
1277
1278                 # Load from database
1279                 if ( !$found ) {
1280                         $title = Title::newFromText( $text, NS_TEMPLATE );
1281                         if ( is_object( $title ) && !$title->isExternal() ) {
1282                                 # Check for excessive inclusion
1283                                 $dbk = $title->getPrefixedDBkey();
1284                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1285                                         $this->mIncludeCount[$dbk] = 0;
1286                                 }
1287                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1288                                         $article = new Article( $title );
1289                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1290                                         if ( $articleContent !== false ) {
1291                                                 $found = true;
1292                                                 $text = $articleContent;
1293
1294                                                 # Escaping and link table handling
1295                                                 # Not required for preSaveTransform()
1296                                                 if ( $this->mOutputType == OT_HTML ) {
1297                                                         if ( $nowiki ) {
1298                                                                 $text = wfEscapeWikiText( $text );
1299                                                         } else {
1300                                                                 $text = $this->removeHTMLtags( $text );
1301                                                         }
1302                                                         $wgLinkCache->suspend();
1303                                                         $text = $this->doTokenizedParser( $text );
1304                                                         $wgLinkCache->resume();
1305                                                         $wgLinkCache->addLinkObj( $title );
1306
1307                                                 }
1308                                         }
1309                                 }
1310
1311                                 # If the title is valid but undisplayable, make a link to it
1312                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1313                                         $text = "[[" . $title->getPrefixedText() . "]]";
1314                                         $found = true;
1315                                 }
1316                         }
1317                 }
1318
1319                 if ( !$found ) {
1320                         return $matches[0];
1321                 } else {
1322                         return $text;
1323                 }
1324         }
1325
1326         # Cleans up HTML, removes dangerous tags and attributes
1327         /* private */ function removeHTMLtags( $text )
1328         {
1329                 $fname = "Parser::removeHTMLtags";
1330                 wfProfileIn( $fname );
1331                 $htmlpairs = array( # Tags that must be closed
1332                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1333                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1334                         "strike", "strong", "tt", "var", "div", "center",
1335                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1336                         "ruby", "rt" , "rb" , "rp", "p"
1337                 );
1338                 $htmlsingle = array(
1339                         "br", "hr", "li", "dt", "dd"
1340                 );
1341                 $htmlnest = array( # Tags that can be nested--??
1342                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1343                         "dl", "font", "big", "small", "sub", "sup"
1344                 );
1345                 $tabletags = array( # Can only appear inside table
1346                         "td", "th", "tr"
1347                 );
1348
1349                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1350                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1351
1352                 $htmlattrs = $this->getHTMLattrs () ;
1353
1354                 # Remove HTML comments
1355                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1356
1357                 $bits = explode( "<", $text );
1358                 $text = array_shift( $bits );
1359                 $tagstack = array(); $tablestack = array();
1360
1361                 foreach ( $bits as $x ) {
1362                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1363                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1364                           $x, $regs );
1365                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1366                         error_reporting( $prev );
1367
1368                         $badtag = 0 ;
1369                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1370                                 # Check our stack
1371                                 if ( $slash ) {
1372                                         # Closing a tag...
1373                                         if ( ! in_array( $t, $htmlsingle ) &&
1374                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1375                                                 array_push( $tagstack, $ot );
1376                                                 $badtag = 1;
1377                                         } else {
1378                                                 if ( $t == "table" ) {
1379                                                         $tagstack = array_pop( $tablestack );
1380                                                 }
1381                                                 $newparams = "";
1382                                         }
1383                                 } else {
1384                                         # Keep track for later
1385                                         if ( in_array( $t, $tabletags ) &&
1386                                           ! in_array( "table", $tagstack ) ) {
1387                                                 $badtag = 1;
1388                                         } else if ( in_array( $t, $tagstack ) &&
1389                                           ! in_array ( $t , $htmlnest ) ) {
1390                                                 $badtag = 1 ;
1391                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1392                                                 if ( $t == "table" ) {
1393                                                         array_push( $tablestack, $tagstack );
1394                                                         $tagstack = array();
1395                                                 }
1396                                                 array_push( $tagstack, $t );
1397                                         }
1398                                         # Strip non-approved attributes from the tag
1399                                         $newparams = $this->fixTagAttributes($params);
1400
1401                                 }
1402                                 if ( ! $badtag ) {
1403                                         $rest = str_replace( ">", "&gt;", $rest );
1404                                         $text .= "<$slash$t $newparams$brace$rest";
1405                                         continue;
1406                                 }
1407                         }
1408                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1409                 }
1410                 # Close off any remaining tags
1411                 while ( $t = array_pop( $tagstack ) ) {
1412                         $text .= "</$t>\n";
1413                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1414                 }
1415                 wfProfileOut( $fname );
1416                 return $text;
1417         }
1418
1419 /*
1420  *
1421  * This function accomplishes several tasks:
1422  * 1) Auto-number headings if that option is enabled
1423  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1424  * 3) Add a Table of contents on the top for users who have enabled the option
1425  * 4) Auto-anchor headings
1426  *
1427  * It loops through all headlines, collects the necessary data, then splits up the
1428  * string and re-inserts the newly formatted headlines.
1429  *
1430  */
1431
1432         /* private */ function formatHeadings( $text )
1433         {
1434                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1435                 $doShowToc = $this->mOptions->getShowToc();
1436                 if( !$this->mTitle->userCanEdit() ) {
1437                         $showEditLink = 0;
1438                         $rightClickHack = 0;
1439                 } else {
1440                         $showEditLink = $this->mOptions->getEditSection();
1441                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1442                 }
1443
1444                 # Inhibit editsection links if requested in the page
1445                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1446                 if( $esw->matchAndRemove( $text ) ) {
1447                         $showEditLink = 0;
1448                 }
1449                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1450                 # do not add TOC
1451                 $mw =& MagicWord::get( MAG_NOTOC );
1452                 if( $mw->matchAndRemove( $text ) ) {
1453                         $doShowToc = 0;
1454                 }
1455
1456                 # never add the TOC to the Main Page. This is an entry page that should not
1457                 # be more than 1-2 screens large anyway
1458                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1459                         $doShowToc = 0;
1460                 }
1461
1462                 # Get all headlines for numbering them and adding funky stuff like [edit]
1463                 # links - this is for later, but we need the number of headlines right now
1464                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1465
1466                 # if there are fewer than 4 headlines in the article, do not show TOC
1467                 if( $numMatches < 4 ) {
1468                         $doShowToc = 0;
1469                 }
1470
1471                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1472                 # override above conditions and always show TOC
1473                 $mw =& MagicWord::get( MAG_FORCETOC );
1474                 if ($mw->matchAndRemove( $text ) ) {
1475                         $doShowToc = 1;
1476                 }
1477
1478
1479                 # We need this to perform operations on the HTML
1480                 $sk =& $this->mOptions->getSkin();
1481
1482                 # headline counter
1483                 $headlineCount = 0;
1484
1485                 # Ugh .. the TOC should have neat indentation levels which can be
1486                 # passed to the skin functions. These are determined here
1487                 $toclevel = 0;
1488                 $toc = "";
1489                 $full = "";
1490                 $head = array();
1491                 $sublevelCount = array();
1492                 $level = 0;
1493                 $prevlevel = 0;
1494                 foreach( $matches[3] as $headline ) {
1495                         $numbering = "";
1496                         if( $level ) {
1497                                 $prevlevel = $level;
1498                         }
1499                         $level = $matches[1][$headlineCount];
1500                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1501                                 # reset when we enter a new level
1502                                 $sublevelCount[$level] = 0;
1503                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1504                                 $toclevel += $level - $prevlevel;
1505                         }
1506                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1507                                 # reset when we step back a level
1508                                 $sublevelCount[$level+1]=0;
1509                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1510                                 $toclevel -= $prevlevel - $level;
1511                         }
1512                         # count number of headlines for each level
1513                         @$sublevelCount[$level]++;
1514                         if( $doNumberHeadings || $doShowToc ) {
1515                                 $dot = 0;
1516                                 for( $i = 1; $i <= $level; $i++ ) {
1517                                         if( !empty( $sublevelCount[$i] ) ) {
1518                                                 if( $dot ) {
1519                                                         $numbering .= ".";
1520                                                 }
1521                                                 $numbering .= $sublevelCount[$i];
1522                                                 $dot = 1;
1523                                         }
1524                                 }
1525                         }
1526
1527                         # The canonized header is a version of the header text safe to use for links
1528                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1529                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1530
1531                         # strip out HTML
1532                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1533                         $tocline = trim( $canonized_headline );
1534                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1535                         $refer[$headlineCount] = $canonized_headline;
1536
1537                         # count how many in assoc. array so we can track dupes in anchors
1538                         @$refers[$canonized_headline]++;
1539                         $refcount[$headlineCount]=$refers[$canonized_headline];
1540
1541                         # Prepend the number to the heading text
1542
1543                         if( $doNumberHeadings || $doShowToc ) {
1544                                 $tocline = $numbering . " " . $tocline;
1545
1546                                 # Don't number the heading if it is the only one (looks silly)
1547                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1548                                         # the two are different if the line contains a link
1549                                         $headline=$numbering . " " . $headline;
1550                                 }
1551                         }
1552
1553                         # Create the anchor for linking from the TOC to the section
1554                         $anchor = $canonized_headline;
1555                         if($refcount[$headlineCount] > 1 ) {
1556                                 $anchor .= "_" . $refcount[$headlineCount];
1557                         }
1558                         if( $doShowToc ) {
1559                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1560                         }
1561                         if( $showEditLink ) {
1562                                 if ( empty( $head[$headlineCount] ) ) {
1563                                         $head[$headlineCount] = "";
1564                                 }
1565                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1566                         }
1567
1568                         # Add the edit section span
1569                         if( $rightClickHack ) {
1570                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1571                         }
1572
1573                         # give headline the correct <h#> tag
1574                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1575
1576                         $headlineCount++;
1577                 }
1578
1579                 if( $doShowToc ) {
1580                         $toclines = $headlineCount;
1581                         $toc .= $sk->tocUnindent( $toclevel );
1582                         $toc = $sk->tocTable( $toc );
1583                 }
1584
1585                 # split up and insert constructed headlines
1586
1587                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1588                 $i = 0;
1589
1590                 foreach( $blocks as $block ) {
1591                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1592                             # This is the [edit] link that appears for the top block of text when
1593                                 # section editing is enabled
1594                                 $full .= $sk->editSectionLink(0);
1595                         }
1596                         $full .= $block;
1597                         if( $doShowToc && !$i) {
1598                         # Top anchor now in skin
1599                                 $full = $full.$toc;
1600                         }
1601
1602                         if( !empty( $head[$i] ) ) {
1603                                 $full .= $head[$i];
1604                         }
1605                         $i++;
1606                 }
1607
1608                 return $full;
1609         }
1610
1611         /* private */ function doMagicISBN( &$tokenizer )
1612         {
1613                 global $wgLang;
1614
1615                 # Check whether next token is a text token
1616                 # If yes, fetch it and convert the text into a
1617                 # Special::BookSources link
1618                 $token = $tokenizer->previewToken();
1619                 while ( $token["type"] == "" )
1620                 {
1621                         $tokenizer->nextToken();
1622                         $token = $tokenizer->previewToken();
1623                 }
1624                 if ( $token["type"] == "text" )
1625                 {
1626                         $token = $tokenizer->nextToken();
1627                         $x = $token["text"];
1628                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1629
1630                         $isbn = $blank = "" ;
1631                         while ( " " == $x{0} ) {
1632                                 $blank .= " ";
1633                                 $x = substr( $x, 1 );
1634                         }
1635                         while ( strstr( $valid, $x{0} ) != false ) {
1636                                 $isbn .= $x{0};
1637                                 $x = substr( $x, 1 );
1638                         }
1639                         $num = str_replace( "-", "", $isbn );
1640                         $num = str_replace( " ", "", $num );
1641
1642                         if ( "" == $num ) {
1643                                 $text = "ISBN $blank$x";
1644                         } else {
1645                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1646                                 $text = "<a href=\"" .
1647                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1648                                         "\" class=\"internal\">ISBN $isbn</a>";
1649                                 $text .= $x;
1650                         }
1651                 } else {
1652                         $text = "ISBN ";
1653                 }
1654                 return $text;
1655         }
1656         /* private */ function doMagicRFC( &$tokenizer )
1657         {
1658                 global $wgLang;
1659
1660                 # Check whether next token is a text token
1661                 # If yes, fetch it and convert the text into a
1662                 # link to an RFC source
1663                 $token = $tokenizer->previewToken();
1664                 while ( $token["type"] == "" )
1665                 {
1666                         $tokenizer->nextToken();
1667                         $token = $tokenizer->previewToken();
1668                 }
1669                 if ( $token["type"] == "text" )
1670                 {
1671                         $token = $tokenizer->nextToken();
1672                         $x = $token["text"];
1673                         $valid = "0123456789";
1674
1675                         $rfc = $blank = "" ;
1676                         while ( " " == $x{0} ) {
1677                                 $blank .= " ";
1678                                 $x = substr( $x, 1 );
1679                         }
1680                         while ( strstr( $valid, $x{0} ) != false ) {
1681                                 $rfc .= $x{0};
1682                                 $x = substr( $x, 1 );
1683                         }
1684
1685                         if ( "" == $rfc ) {
1686                                 $text .= "RFC $blank$x";
1687                         } else {
1688                                 $url = wfmsg( "rfcurl" );
1689                                 $url = str_replace( "$1", $rfc, $url);
1690                                 $sk =& $this->mOptions->getSkin();
1691                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1692                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1693                         }
1694                 } else {
1695                         $text = "RFC ";
1696                 }
1697                 return $text;
1698         }
1699
1700         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1701         {
1702                 $this->mOptions = $options;
1703                 $this->mTitle =& $title;
1704                 $this->mOutputType = OT_WIKI;
1705
1706                 if ( $clearState ) {
1707                         $this->clearState();
1708                 }
1709
1710                 $stripState = false;
1711                 $text = str_replace("\r\n", "\n", $text);
1712                 $text = $this->strip( $text, $stripState, false );
1713                 $text = $this->pstPass2( $text, $user );
1714                 $text = $this->unstrip( $text, $stripState );
1715                 return $text;
1716         }
1717
1718         /* private */ function pstPass2( $text, &$user )
1719         {
1720                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1721
1722                 # Variable replacement
1723                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1724                 $text = $this->replaceVariables( $text );
1725
1726                 # Signatures
1727                 #
1728                 $n = $user->getName();
1729                 $k = $user->getOption( "nickname" );
1730                 if ( "" == $k ) { $k = $n; }
1731                 if(isset($wgLocaltimezone)) {
1732                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1733                 }
1734                 /* Note: this is an ugly timezone hack for the European wikis */
1735                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1736                   " (" . date( "T" ) . ")";
1737                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1738
1739                 $text = preg_replace( "/~~~~~/", $d, $text );
1740                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1741                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1742                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1743                   Namespace::getUser() ) . ":$n|$k]]", $text );
1744
1745                 # Context links: [[|name]] and [[name (context)|]]
1746                 #
1747                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1748                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1749                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1750                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1751
1752                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1753                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1754                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1755                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1756                                                                                                                 # [[ns:page (cont)|]]
1757                 $context = "";
1758                 $t = $this->mTitle->getText();
1759                 if ( preg_match( $conpat, $t, $m ) ) {
1760                         $context = $m[2];
1761                 }
1762                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1763                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1764                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1765
1766                 if ( "" == $context ) {
1767                         $text = preg_replace( $p2, "[[\\1]]", $text );
1768                 } else {
1769                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1770                 }
1771
1772                 /*
1773                 $mw =& MagicWord::get( MAG_SUBST );
1774                 $wgCurParser = $this->fork();
1775                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1776                 $this->merge( $wgCurParser );
1777                 */
1778
1779                 # Trim trailing whitespace
1780                 # MAG_END (__END__) tag allows for trailing
1781                 # whitespace to be deliberately included
1782                 $text = rtrim( $text );
1783                 $mw =& MagicWord::get( MAG_END );
1784                 $mw->matchAndRemove( $text );
1785
1786                 return $text;
1787         }
1788
1789         # Set up some variables which are usually set up in parse()
1790         # so that an external function can call some class members with confidence
1791         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1792         {
1793                 $this->mTitle =& $title;
1794                 $this->mOptions = $options;
1795                 $this->mOutputType = $outputType;
1796                 if ( $clearState ) {
1797                         $this->clearState();
1798                 }
1799         }
1800
1801         function transformMsg( $text, $options ) {
1802                 global $wgTitle;
1803                 static $executing = false;
1804
1805                 # Guard against infinite recursion
1806                 if ( $executing ) {
1807                         return $text;
1808                 }
1809                 $executing = true;
1810
1811                 $this->mTitle = $wgTitle;
1812                 $this->mOptions = $options;
1813                 $this->mOutputType = OT_MSG;
1814                 $this->clearState();
1815                 $text = $this->replaceVariables( $text );
1816
1817                 $executing = false;
1818                 return $text;
1819         }
1820 }
1821
1822 class ParserOutput
1823 {
1824         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1825
1826         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1827                 $containsOldMagic = false )
1828         {
1829                 $this->mText = $text;
1830                 $this->mLanguageLinks = $languageLinks;
1831                 $this->mCategoryLinks = $categoryLinks;
1832                 $this->mContainsOldMagic = $containsOldMagic;
1833         }
1834
1835         function getText() { return $this->mText; }
1836         function getLanguageLinks() { return $this->mLanguageLinks; }
1837         function getCategoryLinks() { return $this->mCategoryLinks; }
1838         function containsOldMagic() { return $this->mContainsOldMagic; }
1839         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1840         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1841         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1842         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1843
1844         function merge( $other ) {
1845                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1846                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1847                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1848         }
1849
1850 }
1851
1852 class ParserOptions
1853 {
1854         # All variables are private
1855         var $mUseTeX;                    # Use texvc to expand <math> tags
1856         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1857         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1858         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1859         var $mAllowExternalImages;       # Allow external images inline
1860         var $mSkin;                      # Reference to the preferred skin
1861         var $mDateFormat;                # Date format index
1862         var $mEditSection;               # Create "edit section" links
1863         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1864         var $mNumberHeadings;            # Automatically number headings
1865         var $mShowToc;                   # Show table of contents
1866
1867         function getUseTeX() { return $this->mUseTeX; }
1868         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1869         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1870         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1871         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1872         function getSkin() { return $this->mSkin; }
1873         function getDateFormat() { return $this->mDateFormat; }
1874         function getEditSection() { return $this->mEditSection; }
1875         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1876         function getNumberHeadings() { return $this->mNumberHeadings; }
1877         function getShowToc() { return $this->mShowToc; }
1878
1879         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1880         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1881         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1882         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1883         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1884         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1885         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1886         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1887         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1888         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1889         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1890
1891         /* static */ function newFromUser( &$user )
1892         {
1893                 $popts = new ParserOptions;
1894                 $popts->initialiseFromUser( &$user );
1895                 return $popts;
1896         }
1897
1898         function initialiseFromUser( &$userInput )
1899         {
1900                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1901
1902                 if ( !$userInput ) {
1903                         $user = new User;
1904                         $user->setLoaded( true );
1905                 } else {
1906                         $user =& $userInput;
1907                 }
1908
1909                 $this->mUseTeX = $wgUseTeX;
1910                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1911                 $this->mUseDynamicDates = $wgUseDynamicDates;
1912                 $this->mInterwikiMagic = $wgInterwikiMagic;
1913                 $this->mAllowExternalImages = $wgAllowExternalImages;
1914                 $this->mSkin =& $user->getSkin();
1915                 $this->mDateFormat = $user->getOption( "date" );
1916                 $this->mEditSection = $user->getOption( "editsection" );
1917                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1918                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1919                 $this->mShowToc = $user->getOption( "showtoc" );
1920         }
1921
1922
1923 }
1924
1925 # Regex callbacks, used in Parser::replaceVariables
1926 function wfBraceSubstitution( $matches )
1927 {
1928         global $wgCurParser;
1929         return $wgCurParser->braceSubstitution( $matches );
1930 }
1931
1932 ?>