includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 # prefix for escaping, used in two functions at least
  47 define( "UNIQ_PREFIX", "NaodW29");
  48
  49 class Parser
  50 {
  51         # Cleared with clearState():
  52         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  53         var $mVariables, $mIncludeCount;
  54
  55         # Temporary:
  56         var $mOptions, $mTitle, $mOutputType;
  57
  58         function Parser()
  59         {
  60                 $this->clearState();
  61         }
  62
  63         function clearState()
  64         {
  65                 $this->mOutput = new ParserOutput;
  66                 $this->mAutonumber = 0;
  67                 $this->mLastSection = "";
  68                 $this->mDTopen = false;
  69                 $this->mVariables = false;
  70                 $this->mIncludeCount = array();
  71                 $this->mStripState = array();
  72         }
  73
  74         # First pass--just handle <nowiki> sections, pass the rest off
  75         # to doWikiPass2() which does all the real work.
  76         #
  77         # Returns a ParserOutput
  78         #
  79         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  80         {
  81                 $fname = "Parser::parse";
  82                 wfProfileIn( $fname );
  83
  84                 if ( $clearState ) {
  85                         $this->clearState();
  86                 }
  87
  88                 $this->mOptions = $options;
  89                 $this->mTitle =& $title;
  90                 $this->mOutputType = OT_HTML;
  91
  92                 $stripState = NULL;
  93                 $text = $this->strip( $text, $this->mStripState );
  94                 $text = $this->doWikiPass2( $text, $linestart );
  95                 $text = $this->unstrip( $text, $this->mStripState );
  96
  97                 $this->mOutput->setText( $text );
  98                 wfProfileOut( $fname );
  99                 return $this->mOutput;
 100         }
 101
 102         /* static */ function getRandomString()
 103         {
 104                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 105         }
 106
 107         # Replaces all occurences of <$tag>content</$tag> in the text
 108         # with a random marker and returns the new text. the output parameter
 109         # $content will be an associative array filled with data on the form
 110         # $unique_marker => content.
 111
 112         # If $content is already set, the additional entries will be appended
 113
 114         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 115                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 116                 if ( !$content ) {
 117                         $content = array( );
 118                 }
 119                 $n = 1;
 120                 $stripped = "";
 121
 122                 while ( "" != $text ) {
 123                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 124                         $stripped .= $p[0];
 125                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 126                                 $text = "";
 127                         } else {
 128                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 129                                 $marker = $rnd . sprintf("%08X", $n++);
 130                                 $content[$marker] = $q[0];
 131                                 $stripped .= $marker;
 132                                 $text = $q[1];
 133                         }
 134                 }
 135                 return $stripped;
 136         }
 137
 138         # Strips <nowiki>, <pre> and <math>
 139         # Returns the text, and fills an array with data needed in unstrip()
 140         # If the $state is already a valid strip state, it adds to the state
 141         #
 142         function strip( $text, &$state )
 143         {
 144                 $render = ($this->mOutputType == OT_HTML);
 145                 if ( $state ) {
 146                         $nowiki_content = $state['nowiki'];
 147                         $hiero_content = $state['hiero'];
 148                         $math_content = $state['math'];
 149                         $pre_content = $state['pre'];
 150                         $item_content = $state['item'];
 151                 } else {
 152                         $nowiki_content = array();
 153                         $hiero_content = array();
 154                         $math_content = array();
 155                         $pre_content = array();
 156                         $item_content = array();
 157                 }
 158
 159                 # Replace any instances of the placeholders
 160                 $uniq_prefix = UNIQ_PREFIX;
 161                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 162
 163                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 164                 foreach( $nowiki_content as $marker => $content ){
 165                         if( $render ){
 166                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 167                         } else {
 168                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 169                         }
 170                 }
 171
 172                 if( $GLOBALS['wgUseWikiHiero'] ){
 173                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 174                         foreach( $hiero_content as $marker => $content ){
 175                                 if( $render ){
 176                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 177                                 } else {
 178                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 179                                 }
 180                         }
 181                 }
 182
 183                 if( $this->mOptions->getUseTeX() ){
 184                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 185                         foreach( $math_content as $marker => $content ){
 186                                 if( $render ){
 187                                         $math_content[$marker] = renderMath( $content );
 188                                 } else {
 189                                         $math_content[$marker] = "<math>$content</math>";
 190                                 }
 191                         }
 192                 }
 193
 194                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 195                 foreach( $pre_content as $marker => $content ){
 196                         if( $render ){
 197                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 198                         } else {
 199                                 $pre_content[$marker] = "<pre>$content</pre>";
 200                         }
 201                 }
 202
 203                 $state = array(
 204                   'nowiki' => $nowiki_content,
 205                   'hiero' => $hiero_content,
 206                   'math' => $math_content,
 207                   'pre' => $pre_content,
 208                   'item' => $item_content
 209                 );
 210                 return $text;
 211         }
 212
 213         function unstrip( $text, &$state )
 214         {
 215                 # Must expand in reverse order, otherwise nested tags will be corrupted
 216                 /*
 217                 $dicts = array( 'item', 'pre', 'math', 'hiero', 'nowiki' );
 218                 foreach ( $dicts as $dictName ) {
 219                         $content_dict = $state[$dictName];
 220                         foreach( $content_dict as $marker => $content ){
 221                                 $text = str_replace( $marker, $content, $text );
 222                         }
 223                 }*/
 224
 225                 $contentDict = end( $state );
 226                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 227                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 228                                 $text = str_replace( key( $contentDict ), $content, $text );
 229                         }
 230                 }
 231
 232                 return $text;
 233         }
 234
 235         # Add an item to the strip state
 236         # Returns the unique tag which must be inserted into the stripped text
 237         # The tag will be replaced with the original text in unstrip()
 238
 239         function insertStripItem( $text, &$state )
 240         {
 241                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 242                 if ( !$state ) {
 243                         $state = array(
 244                           'nowiki' => array(),
 245                           'hiero' => array(),
 246                           'math' => array(),
 247                           'pre' => array(),
 248                           'item' => array()
 249                         );
 250                 }
 251                 $state['item'][$rnd] = $text;
 252                 return $rnd;
 253         }
 254
 255         function categoryMagic ()
 256         {
 257                 global $wgLang , $wgUser ;
 258                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 259                 $id = $this->mTitle->getArticleID() ;
 260                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 261                 $ti = $this->mTitle->getText() ;
 262                 $ti = explode ( ":" , $ti , 2 ) ;
 263                 if ( $cat != $ti[0] ) return "" ;
 264                 $r = '<br style="clear:both;"/>\n';
 265
 266                 $articles = array() ;
 267                 $parents = array () ;
 268                 $children = array() ;
 269
 270
 271 #               $sk =& $this->mGetSkin();
 272                 $sk =& $wgUser->getSkin() ;
 273
 274                 $data = array () ;
 275                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 276                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 277
 278                 $res = wfQuery ( $sql1, DB_READ ) ;
 279                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 280
 281                 $res = wfQuery ( $sql2, DB_READ ) ;
 282                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 283
 284
 285                 foreach ( $data AS $x )
 286                 {
 287                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 288                         if ( $t != "" ) $t .= ":" ;
 289                         $t .= $x->cur_title ;
 290
 291                         $y = explode ( ":" , $t , 2 ) ;
 292                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 293                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 294                         } else {
 295                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 296                         }
 297                 }
 298                 wfFreeResult ( $res ) ;
 299
 300                 # Children
 301                 if ( count ( $children ) > 0 )
 302                 {
 303                         asort ( $children ) ;
 304                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 305                         $r .= implode ( ", " , $children ) ;
 306                 }
 307
 308                 # Articles
 309                 if ( count ( $articles ) > 0 )
 310                 {
 311                         asort ( $articles ) ;
 312                         $h =  wfMsg( "category_header", $ti[1] );
 313                         $r .= "<h2>{$h}</h2>\n" ;
 314                         $r .= implode ( ", " , $articles ) ;
 315                 }
 316
 317
 318                 return $r ;
 319         }
 320
 321         function getHTMLattrs ()
 322         {
 323                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 324                                 "title", "align", "lang", "dir", "width", "height",
 325                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 326                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 327                                 /* FONT */ "type", "start", "value", "compact",
 328                                 /* For various lists, mostly deprecated but safe */
 329                                 "summary", "width", "border", "frame", "rules",
 330                                 "cellspacing", "cellpadding", "valign", "char",
 331                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 332                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 333                                 "id", "class", "name", "style" /* For CSS */
 334                                 );
 335                 return $htmlattrs ;
 336         }
 337
 338         function fixTagAttributes ( $t )
 339         {
 340                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 341                 $htmlattrs = $this->getHTMLattrs() ;
 342
 343                 # Strip non-approved attributes from the tag
 344                 $t = preg_replace(
 345                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 346                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 347                         $t);
 348                 # Strip javascript "expression" from stylesheets. Brute force approach:
 349                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 350
 351                 if( preg_match(
 352                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 353                         wfMungeToUtf8( $t ) ) )
 354                 {
 355                         $t="";
 356                 }
 357
 358                 return trim ( $t ) ;
 359         }
 360
 361         function doTableStuff ( $t )
 362         {
 363                 $t = explode ( "\n" , $t ) ;
 364                 $td = array () ; # Is currently a td tag open?
 365                         $ltd = array () ; # Was it TD or TH?
 366                         $tr = array () ; # Is currently a tr tag open?
 367                         $ltr = array () ; # tr attributes
 368                         foreach ( $t AS $k => $x )
 369                         {
 370                                 $x = rtrim ( $x ) ;
 371                                 $fc = substr ( $x , 0 , 1 ) ;
 372                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 373                                 {
 374                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 375                                         array_push ( $td , false ) ;
 376                                         array_push ( $ltd , "" ) ;
 377                                         array_push ( $tr , false ) ;
 378                                         array_push ( $ltr , "" ) ;
 379                                 }
 380                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 381                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 382                                 {
 383                                         $z = "</table>\n" ;
 384                                         $l = array_pop ( $ltd ) ;
 385                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 386                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 387                                         array_pop ( $ltr ) ;
 388                                         $t[$k] = $z ;
 389                                 }
 390                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 391                                                 {
 392                                                 $z = trim ( substr ( $x , 2 ) ) ;
 393                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 394                                                 }*/
 395                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 396                                 {
 397                                         $x = substr ( $x , 1 ) ;
 398                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 399                                         $z = "" ;
 400                                         $l = array_pop ( $ltd ) ;
 401                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 402                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 403                                         array_pop ( $ltr ) ;
 404                                         $t[$k] = $z ;
 405                                         array_push ( $tr , false ) ;
 406                                         array_push ( $td , false ) ;
 407                                         array_push ( $ltd , "" ) ;
 408                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 409                                 }
 410                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 411                                 {
 412                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 413                                         {
 414                                                 $fc = "+" ;
 415                                                 $x = substr ( $x , 1 ) ;
 416                                         }
 417                                         $after = substr ( $x , 1 ) ;
 418                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 419                                         $after = explode ( "||" , $after ) ;
 420                                         $t[$k] = "" ;
 421                                         foreach ( $after AS $theline )
 422                                         {
 423                                                 $z = "" ;
 424                                                 if ( $fc != "+" )
 425                                                 {
 426                                                         $tra = array_pop ( $ltr ) ;
 427                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 428                                                         array_push ( $tr , true ) ;
 429                                                         array_push ( $ltr , "" ) ;
 430                                                 }
 431
 432                                                 $l = array_pop ( $ltd ) ;
 433                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 434                                                 if ( $fc == "|" ) $l = "td" ;
 435                                                 else if ( $fc == "!" ) $l = "th" ;
 436                                                 else if ( $fc == "+" ) $l = "caption" ;
 437                                                 else $l = "" ;
 438                                                 array_push ( $ltd , $l ) ;
 439                                                 $y = explode ( "|" , $theline , 2 ) ;
 440                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 441                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 442                                                 $t[$k] .= $y ;
 443                                                 array_push ( $td , true ) ;
 444                                         }
 445                                 }
 446                         }
 447
 448                 # Closing open td, tr && table
 449                 while ( count ( $td ) > 0 )
 450                 {
 451                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 452                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 453                         $t[] = "</table>" ;
 454                 }
 455
 456                 $t = implode ( "\n" , $t ) ;
 457                 #               $t = $this->removeHTMLtags( $t );
 458                 return $t ;
 459         }
 460
 461         # Well, OK, it's actually about 14 passes.  But since all the
 462         # hard lifting is done inside PHP's regex code, it probably
 463         # wouldn't speed things up much to add a real parser.
 464         #
 465         function doWikiPass2( $text, $linestart )
 466         {
 467                 $fname = "Parser::doWikiPass2";
 468                 wfProfileIn( $fname );
 469
 470                 $text = $this->removeHTMLtags( $text );
 471                 $text = $this->replaceVariables( $text );
 472
 473                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 474
 475                 $text = $this->doHeadings( $text );
 476
 477                 if($this->mOptions->getUseDynamicDates()) {
 478                         global $wgDateFormatter;
 479                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 480                 }
 481
 482                 $text = $this->replaceExternalLinks( $text );
 483                 $text = $this->doTokenizedParser ( $text );
 484
 485                 $text = $this->doTableStuff ( $text ) ;
 486
 487                 $text = $this->formatHeadings( $text );
 488
 489                 $sk =& $this->mOptions->getSkin();
 490                 $text = $sk->transformContent( $text );
 491                 $fixtags = array(
 492                         "/<hr *>/i" => '<hr/>',
 493                         "/<br *>/i" => '<br/>',
 494                         "/<center *>/i"=>'<span style="text-align:center;">',
 495                         "/<\\/center *>/i" => '</span>'
 496                 );
 497                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 498
 499                 # Clean up spare ampersands; note that we probably ought to be
 500                 # more careful about named entities.
 501                 $text = preg_replace(
 502                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/',
 503                         '&amp;',
 504                         $text );
 505
 506                 $text .= $this->categoryMagic () ;
 507
 508                 # needs to be called last
 509                 $text = $this->doBlockLevels( $text, $linestart );
 510
 511                 wfProfileOut( $fname );
 512                 return $text;
 513         }
 514
 515
 516         /* private */ function doHeadings( $text )
 517         {
 518                 for ( $i = 6; $i >= 1; --$i ) {
 519                         $h = substr( "======", 0, $i );
 520                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 521                           "<h{$i}>\\1</h{$i}>\\2", $text );
 522                 }
 523                 return $text;
 524         }
 525
 526         # Note: we have to do external links before the internal ones,
 527         # and otherwise take great care in the order of things here, so
 528         # that we don't end up interpreting some URLs twice.
 529
 530         /* private */ function replaceExternalLinks( $text )
 531         {
 532                 $fname = "Parser::replaceExternalLinks";
 533                 wfProfileIn( $fname );
 534                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 535                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 536                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 537                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 538                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 539                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 540                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 541                 wfProfileOut( $fname );
 542                 return $text;
 543         }
 544
 545         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 546         {
 547                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 548                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 549
 550                 # this is  the list of separators that should be ignored if they
 551                 # are the last character of an URL but that should be included
 552                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 553                 # in this case, the last comma should not become part of the URL,
 554                 # but in "www.foo.com/123,2342,32.htm" it should.
 555                 $sep = ",;\.:";
 556                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 557                 $images = "gif|png|jpg|jpeg";
 558
 559                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 560                 # they are interpreted as part of the string (used to tell PHP
 561                 # that the content of the string should be inserted there).
 562                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 563                   "((?i){$images})([^{$uc}]|$)/";
 564
 565                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 566                 $sk =& $this->mOptions->getSkin();
 567
 568                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 569                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 570                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 571                 }
 572                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 573                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 574                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 575                   "</a>\\5", $s );
 576                 $s = str_replace( $unique, $protocol, $s );
 577
 578                 $a = explode( "[{$protocol}:", " " . $s );
 579                 $s = array_shift( $a );
 580                 $s = substr( $s, 1 );
 581
 582                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 583                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 584
 585                 foreach ( $a as $line ) {
 586                         if ( preg_match( $e1, $line, $m ) ) {
 587                                 $link = "{$protocol}:{$m[1]}";
 588                                 $trail = $m[2];
 589                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 590                                 else { $text = wfEscapeHTML( $link ); }
 591                         } else if ( preg_match( $e2, $line, $m ) ) {
 592                                 $link = "{$protocol}:{$m[1]}";
 593                                 $text = $m[2];
 594                                 $trail = $m[3];
 595                         } else {
 596                                 $s .= "[{$protocol}:" . $line;
 597                                 continue;
 598                         }
 599                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 600                                 $paren = "";
 601                         } else {
 602                                 # Expand the URL for printable version
 603                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 604                         }
 605                         $la = $sk->getExternalLinkAttributes( $link, $text );
 606                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 607
 608                 }
 609                 return $s;
 610         }
 611
 612         /* private */ function handle3Quotes( &$state, $token )
 613         {
 614                 if ( $state["strong"] !== false ) {
 615                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 616                         {
 617                                 # ''' lala ''lala '''
 618                                 $s = "</em></strong><em>";
 619                         } else {
 620                                 $s = "</strong>";
 621                         }
 622                         $state["strong"] = FALSE;
 623                 } else {
 624                         $s = "<strong>";
 625                         $state["strong"] = $token["pos"];
 626                 }
 627                 return $s;
 628         }
 629
 630         /* private */ function handle2Quotes( &$state, $token )
 631         {
 632                 if ( $state["em"] !== false ) {
 633                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 634                         {
 635                                 # ''lala'''lala'' ....'''
 636                                 $s = "</strong></em><strong>";
 637                         } else {
 638                                 $s = "</em>";
 639                         }
 640                         $state["em"] = FALSE;
 641                 } else {
 642                         $s = "<em>";
 643                         $state["em"] = $token["pos"];
 644                 }
 645                 return $s;
 646         }
 647
 648         /* private */ function handle5Quotes( &$state, $token )
 649         {
 650                 $s = "";
 651                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 652                         if ( $state["em"] < $state["strong"] ) {
 653                                 $s .= "</strong></em>";
 654                         } else {
 655                                 $s .= "</em></strong>";
 656                         }
 657                         $state["strong"] = $state["em"] = FALSE;
 658                 } elseif ( $state["em"] !== false ) {
 659                         $s .= "</em><strong>";
 660                         $state["em"] = FALSE;
 661                         $state["strong"] = $token["pos"];
 662                 } elseif ( $state["strong"] !== false ) {
 663                         $s .= "</strong><em>";
 664                         $state["strong"] = FALSE;
 665                         $state["em"] = $token["pos"];
 666                 } else { # not $em and not $strong
 667                         $s .= "<strong><em>";
 668                         $state["strong"] = $state["em"] = $token["pos"];
 669                 }
 670                 return $s;
 671         }
 672
 673         /* private */ function doTokenizedParser( $str )
 674         {
 675                 global $wgLang; # for language specific parser hook
 676
 677                 $tokenizer=Tokenizer::newFromString( $str );
 678                 $tokenStack = array();
 679
 680                 $s="";
 681                 $state["em"]      = FALSE;
 682                 $state["strong"]  = FALSE;
 683                 $tagIsOpen = FALSE;
 684                 $threeopen = false;
 685
 686                 # The tokenizer splits the text into tokens and returns them one by one.
 687                 # Every call to the tokenizer returns a new token.
 688                 while ( $token = $tokenizer->nextToken() )
 689                 {
 690                         switch ( $token["type"] )
 691                         {
 692                                 case "text":
 693                                         # simple text with no further markup
 694                                         $txt = $token["text"];
 695                                         break;
 696                                 case "[[[":
 697                                         # remember the tag opened with 3 [
 698                                         $threeopen = true;
 699                                 case "[[":
 700                                         # link opening tag.
 701                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 702                                         $tagIsOpen = TRUE;
 703                                         array_push( $tokenStack, $token );
 704                                         $txt="";
 705                                         break;
 706
 707                                 case "]]]":
 708                                 case "]]":
 709                                         # link close tag.
 710                                         # get text from stack, glue it together, and call the code to handle a
 711                                         # link
 712
 713                                         if ( count( $tokenStack ) == 0 )
 714                                         {
 715                                                 # stack empty. Found a ]] without an opening [[
 716                                                 $txt = "]]";
 717                                         } else {
 718                                                 $linkText = "";
 719                                                 $lastToken = array_pop( $tokenStack );
 720                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 721                                                 {
 722                                                         if( !empty( $lastToken["text"] ) ) {
 723                                                                 $linkText = $lastToken["text"] . $linkText;
 724                                                         }
 725                                                         $lastToken = array_pop( $tokenStack );
 726                                                 }
 727
 728                                                 $txt = $linkText ."]]";
 729
 730                                                 if( isset( $lastToken["text"] ) ) {
 731                                                         $prefix = $lastToken["text"];
 732                                                 } else {
 733                                                         $prefix = "";
 734                                                 }
 735                                                 $nextToken = $tokenizer->previewToken();
 736                                                 if ( $nextToken["type"] == "text" )
 737                                                 {
 738                                                         # Preview just looks at it. Now we have to fetch it.
 739                                                         $nextToken = $tokenizer->nextToken();
 740                                                         $txt .= $nextToken["text"];
 741                                                 }
 742                                                 $fakestate = $this->mStripState;
 743                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
 744
 745                                                 # did the tag start with 3 [ ?
 746                                                 if($threeopen) {
 747                                                         # show the first as text
 748                                                         $txt = "[".$txt;
 749                                                         $threeopen=false;
 750                                                 }
 751
 752                                         }
 753                                         $tagIsOpen = (count( $tokenStack ) != 0);
 754                                         break;
 755                                 case "----":
 756                                         $txt = "\n<hr />\n";
 757                                         break;
 758                                 case "'''":
 759                                         # This and the three next ones handle quotes
 760                                         $txt = $this->handle3Quotes( $state, $token );
 761                                         break;
 762                                 case "''":
 763                                         $txt = $this->handle2Quotes( $state, $token );
 764                                         break;
 765                                 case "'''''":
 766                                         $txt = $this->handle5Quotes( $state, $token );
 767                                         break;
 768                                 case "":
 769                                         # empty token
 770                                         $txt="";
 771                                         break;
 772                                 case "RFC ":
 773                                         if ( $tagIsOpen ) {
 774                                                 $txt = "RFC ";
 775                                         } else {
 776                                                 $txt = $this->doMagicRFC( $tokenizer );
 777                                         }
 778                                         break;
 779                                 case "ISBN ":
 780                                         if ( $tagIsOpen ) {
 781                                                 $txt = "ISBN ";
 782                                         } else {
 783                                                 $txt = $this->doMagicISBN( $tokenizer );
 784                                         }
 785                                         break;
 786                                 default:
 787                                         # Call language specific Hook.
 788                                         $txt = $wgLang->processToken( $token, $tokenStack );
 789                                         if ( NULL == $txt ) {
 790                                                 # An unkown token. Highlight.
 791                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 792                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 793                                         }
 794                                         break;
 795                         }
 796                         # If we're parsing the interior of a link, don't append the interior to $s,
 797                         # but push it to the stack so it can be processed when a ]] token is found.
 798                         if ( $tagIsOpen  && $txt != "" ) {
 799                                 $token["type"] = "text";
 800                                 $token["text"] = $txt;
 801                                 array_push( $tokenStack, $token );
 802                         } else {
 803                                 $s .= $txt;
 804                         }
 805                 } #end while
 806                 if ( count( $tokenStack ) != 0 )
 807                 {
 808                         # still objects on stack. opened [[ tag without closing ]] tag.
 809                         $txt = "";
 810                         while ( $lastToken = array_pop( $tokenStack ) )
 811                         {
 812                                 if ( $lastToken["type"] == "text" )
 813                                 {
 814                                         $txt = $lastToken["text"] . $txt;
 815                                 } else {
 816                                         $txt = $lastToken["type"] . $txt;
 817                                 }
 818                         }
 819                         $s .= $txt;
 820                 }
 821                 return $s;
 822         }
 823
 824         /* private */ function handleInternalLink( $line, $prefix )
 825         {
 826                 global $wgLang, $wgLinkCache;
 827                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 828                 static $fname = "Parser::handleInternalLink" ;
 829                 wfProfileIn( $fname );
 830
 831                 wfProfileIn( "$fname-setup" );
 832                 static $tc = FALSE;
 833                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 834                 $sk =& $this->mOptions->getSkin();
 835
 836                 # Match a link having the form [[namespace:link|alternate]]trail
 837                 static $e1 = FALSE;
 838                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 839                 # Match the end of a line for a word that's not followed by whitespace,
 840                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 841                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 842                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 843                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 844
 845
 846                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 847                 static $image = FALSE;
 848                 static $special = FALSE;
 849                 static $media = FALSE;
 850                 static $category = FALSE;
 851                 if ( !$image ) { $image = Namespace::getImage(); }
 852                 if ( !$special ) { $special = Namespace::getSpecial(); }
 853                 if ( !$media ) { $media = Namespace::getMedia(); }
 854                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 855
 856                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 857
 858                 wfProfileOut( "$fname-setup" );
 859                 $s = "";
 860
 861                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 862                         $text = $m[2];
 863                         $trail = $m[3];
 864                 } else { # Invalid form; output directly
 865                         $s .= $prefix . "[[" . $line ;
 866                         return $s;
 867                 }
 868
 869                 /* Valid link forms:
 870                 Foobar -- normal
 871                 :Foobar -- override special treatment of prefix (images, language links)
 872                 /Foobar -- convert to CurrentPage/Foobar
 873                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 874                 */
 875                 $c = substr($m[1],0,1);
 876                 $noforce = ($c != ":");
 877                 if( $c == "/" ) { # subpage
 878                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 879                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 880                                 $noslash=$m[1];
 881                         } else {
 882                                 $noslash=substr($m[1],1);
 883                         }
 884                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 885                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 886                                 if( "" == $text ) {
 887                                         $text= $m[1];
 888                                 } # this might be changed for ugliness reasons
 889                         } else {
 890                                 $link = $noslash; # no subpage allowed, use standard link
 891                         }
 892                 } elseif( $noforce ) { # no subpage
 893                         $link = $m[1];
 894                 } else {
 895                         $link = substr( $m[1], 1 );
 896                 }
 897                 if( "" == $text )
 898                         $text = $link;
 899
 900                 $nt = Title::newFromText( $link );
 901                 if( !$nt ) {
 902                         $s .= $prefix . "[[" . $line;
 903                         return $s;
 904                 }
 905                 $ns = $nt->getNamespace();
 906                 $iw = $nt->getInterWiki();
 907                 if( $noforce ) {
 908                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 909                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 910                                 return (trim($s) == '')? '': $s;
 911                         }
 912                         if( $ns == $image ) {
 913                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 914                                 $wgLinkCache->addImageLinkObj( $nt );
 915                                 return $s;
 916                         }
 917                 }
 918                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 919                     ( strpos( $link, "#" ) == FALSE ) ) {
 920                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 921                         return $s;
 922                 }
 923
 924                 # Category feature
 925                 $catns = strtoupper ( $nt->getDBkey () ) ;
 926                 $catns = explode ( ":" , $catns ) ;
 927                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 928                 else $catns = "" ;
 929                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 930                         $t = explode ( ":" , $nt->getText() ) ;
 931                         array_shift ( $t ) ;
 932                         $t = implode ( ":" , $t ) ;
 933                         $t = $wgLang->ucFirst ( $t ) ;
 934                         $nnt = Title::newFromText ( $category.":".$t ) ;
 935                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 936                         $this->mOutput->mCategoryLinks[] = $t ;
 937                         $s .= $prefix . $trail ;
 938                         return $s ;
 939                 }
 940
 941                 if( $ns == $media ) {
 942                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 943                         $wgLinkCache->addImageLinkObj( $nt );
 944                         return $s;
 945                 } elseif( $ns == $special ) {
 946                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 947                         return $s;
 948                 }
 949                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 950
 951                 wfProfileOut( $fname );
 952                 return $s;
 953         }
 954
 955         # Some functions here used by doBlockLevels()
 956         #
 957         /* private */ function closeParagraph()
 958         {
 959                 $result = "";
 960                 if ( '' != $this->mLastSection ) {
 961                         $result = "</" . $this->mLastSection  . ">\n";
 962                 }
 963                 $this->mLastSection = "";
 964                 return $result;
 965         }
 966         # getCommon() returns the length of the longest common substring
 967         # of both arguments, starting at the beginning of both.
 968         #
 969         /* private */ function getCommon( $st1, $st2 )
 970         {
 971                 $fl = strlen( $st1 );
 972                 $shorter = strlen( $st2 );
 973                 if ( $fl < $shorter ) { $shorter = $fl; }
 974
 975                 for ( $i = 0; $i < $shorter; ++$i ) {
 976                         if ( $st1{$i} != $st2{$i} ) { break; }
 977                 }
 978                 return $i;
 979         }
 980         # These next three functions open, continue, and close the list
 981         # element appropriate to the prefix character passed into them.
 982         #
 983         /* private */ function openList( $char )
 984     {
 985                 $result = $this->closeParagraph();
 986
 987                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 988                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 989                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 990                 else if ( ";" == $char ) {
 991                         $result .= "<dl><dt>";
 992                         $this->mDTopen = true;
 993                 }
 994                 else { $result = "<!-- ERR 1 -->"; }
 995
 996                 return $result;
 997         }
 998
 999         /* private */ function nextItem( $char )
1000         {
1001                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1002                 else if ( ":" == $char || ";" == $char ) {
1003                         $close = "</dd>";
1004                         if ( $this->mDTopen ) { $close = "</dt>"; }
1005                         if ( ";" == $char ) {
1006                                 $this->mDTopen = true;
1007                                 return $close . "<dt>";
1008                         } else {
1009                                 $this->mDTopen = false;
1010                                 return $close . "<dd>";
1011                         }
1012                 }
1013                 return "<!-- ERR 2 -->";
1014         }
1015
1016         /* private */function closeList( $char )
1017         {
1018                 if ( "*" == $char ) { $text = "</li></ul>"; }
1019                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1020                 else if ( ":" == $char ) {
1021                         if ( $this->mDTopen ) {
1022                                 $this->mDTopen = false;
1023                                 $text = "</dt></dl>";
1024                         } else {
1025                                 $text = "</dd></dl>";
1026                         }
1027                 }
1028                 else {  return "<!-- ERR 3 -->"; }
1029                 return $text."\n";
1030         }
1031
1032         /* private */ function doBlockLevels( $text, $linestart )
1033         {
1034                 $fname = "Parser::doBlockLevels";
1035                 wfProfileIn( $fname );
1036                 # Parsing through the text line by line.  The main thing
1037                 # happening here is handling of block-level elements p, pre,
1038                 # and making lists from lines starting with * # : etc.
1039                 #
1040                 $a = explode( "\n", $text );
1041                 $lastPref = $text = $lastLine = '';
1042                 $this->mDTopen = $inBlockElem = false;
1043
1044                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1045                 foreach ( $a as $t ) {
1046                         if ( "" != $text ) { $text .= "\n"; }
1047
1048                         $oLine = $t;
1049                         $opl = strlen( $lastPref );
1050                         $npl = strspn( $t, "*#:;" );
1051                         $pref = substr( $t, 0, $npl );
1052                         $pref2 = str_replace( ";", ":", $pref );
1053                         $t = substr( $t, $npl );
1054
1055                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1056                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1057
1058                                 if ( ";" == substr( $pref, -1 ) ) {
1059                                         $cpos = strpos( $t, ":" );
1060                                         if ( ! ( false === $cpos ) ) {
1061                                                 $term = substr( $t, 0, $cpos );
1062                                                 $text .= $term . $this->nextItem( ":" );
1063                                                 $t = substr( $t, $cpos + 1 );
1064                                         }
1065                                 }
1066                         } else if (0 != $npl || 0 != $opl) {
1067                                 $cpl = $this->getCommon( $pref, $lastPref );
1068
1069                                 while ( $cpl < $opl ) {
1070                                         $text .= $this->closeList( $lastPref{$opl-1} );
1071                                         --$opl;
1072                                 }
1073                                 if ( $npl <= $cpl && $cpl > 0 ) {
1074                                         $text .= $this->nextItem( $pref{$cpl-1} );
1075                                 }
1076                                 while ( $npl > $cpl ) {
1077                                         $char = substr( $pref, $cpl, 1 );
1078                                         $text .= $this->openList( $char );
1079
1080                                         if ( ";" == $char ) {
1081                                                 $cpos = strpos( $t, ":" );
1082                                                 if ( ! ( false === $cpos ) ) {
1083                                                         $term = substr( $t, 0, $cpos );
1084                                                         $text .= $term . $this->nextItem( ":" );
1085                                                         $t = substr( $t, $cpos + 1 );
1086                                                 }
1087                                         }
1088                                         ++$cpl;
1089                                 }
1090                                 $lastPref = $pref2;
1091                         }
1092                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
1093                                 $uniq_prefix = UNIQ_PREFIX;
1094                                 // XXX: use a stack for nestable elements like span, table and div
1095                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p)/i", $t );
1096                                 $closematch = preg_match(
1097                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1098                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre)/i", $t );
1099                                 if ( $openmatch or $closematch ) {
1100                                         $text .= $this->closeParagraph();
1101                                         if ( $closematch  ) {
1102                                                 $inBlockElem = false;
1103                                         } else {
1104                                                 $inBlockElem = true;
1105                                         }
1106                                 } else if ( !$inBlockElem ) {
1107                                         if ( " " == $t{0} ) {
1108                                                 $newSection = "pre";
1109                                                 if ($this->mLastSection != 'pre') {
1110                                                         $text .= $this->closeParagraph();
1111                                                         $text .= "<" . $newSection . ">";
1112                                                         $this->mLastSection = $newSection;
1113                                                 }
1114                                         } else {
1115                                                 $newSection = "p";
1116                                                 if ( ''==trim($t) && ( '' != trim($lastLine) )) {
1117                                                         $text .= $this->closeParagraph();
1118                                                         $text .= "<" . $newSection . ">";
1119                                                         $this->mLastSection = $newSection;
1120                                                 }
1121                                         }
1122
1123                                 }
1124                         }
1125                         $lastLine = $t;
1126                         $text .= $t;
1127                 }
1128                 while ( $npl ) {
1129                         $text .= $this->closeList( $pref2{$npl-1} );
1130                         --$npl;
1131                 }
1132                 if ( "" != $this->mLastSection ) {
1133                         $text .= "</" . $this->mLastSection . ">";
1134                         $this->mLastSection = "";
1135                 }
1136                 wfProfileOut( $fname );
1137                 return $text;
1138         }
1139
1140         function getVariableValue( $index ) {
1141                 global $wgLang, $wgSitename, $wgServer;
1142
1143                 switch ( $index ) {
1144                         case MAG_CURRENTMONTH:
1145                                 return date( "m" );
1146                         case MAG_CURRENTMONTHNAME:
1147                                 return $wgLang->getMonthName( date("n") );
1148                         case MAG_CURRENTMONTHNAMEGEN:
1149                                 return $wgLang->getMonthNameGen( date("n") );
1150                         case MAG_CURRENTDAY:
1151                                 return date("j");
1152                         case MAG_CURRENTDAYNAME:
1153                                 return $wgLang->getWeekdayName( date("w")+1 );
1154                         case MAG_CURRENTYEAR:
1155                                 return date( "Y" );
1156                         case MAG_CURRENTTIME:
1157                                 return $wgLang->time( wfTimestampNow(), false );
1158                         case MAG_NUMBEROFARTICLES:
1159                                 return wfNumberOfArticles();
1160                         case MAG_SITENAME:
1161                                 return $wgSitename;
1162                         case MAG_SERVER:
1163                                 return $wgServer;
1164                         default:
1165                                 return NULL;
1166                 }
1167         }
1168
1169         function initialiseVariables()
1170         {
1171                 global $wgVariableIDs;
1172                 $this->mVariables = array();
1173                 foreach ( $wgVariableIDs as $id ) {
1174                         $mw =& MagicWord::get( $id );
1175                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1176                 }
1177         }
1178
1179         /* private */ function replaceVariables( $text )
1180         {
1181                 global $wgLang, $wgCurParser;
1182                 global $wgScript, $wgArticlePath;
1183
1184                 $fname = "Parser::replaceVariables";
1185                 wfProfileIn( $fname );
1186
1187                 $bail = false;
1188                 if ( !$this->mVariables ) {
1189                         $this->initialiseVariables();
1190                 }
1191                 $titleChars = Title::legalChars();
1192                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1193
1194                 # "Recursive" variable expansion: run it through a couple of passes
1195                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1196                         $oldText = $text;
1197
1198                         # It's impossible to rebind a global in PHP
1199                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1200                         $wgCurParser = $this->fork();
1201
1202                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1203                         if ( $oldText == $text ) {
1204                                 $bail = true;
1205                         }
1206                         $this->merge( $wgCurParser );
1207                 }
1208
1209                 return $text;
1210         }
1211
1212         # Returns a copy of this object except with various variables cleared
1213         # This copy can be re-merged with the parent after operations on the copy
1214         function fork()
1215         {
1216                 $copy = $this;
1217                 $copy->mOutput = new ParserOutput;
1218                 return $copy;
1219         }
1220
1221         # Merges a copy split off with fork()
1222         function merge( &$copy )
1223         {
1224                 # Output objects
1225                 $this->mOutput->merge( $copy->mOutput );
1226
1227                 # Include throttling arrays
1228                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1229                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1230                                 $this->mIncludeCount[$dbk] += $count;
1231                         } else {
1232                                 $this->mIncludeCount[$dbk] = $count;
1233                         }
1234                 }
1235
1236                 # Strip states
1237                 foreach( $copy->mStripState as $dictName => $contentDict ) {
1238                         $this->mStripState[$dictName] += $contentDict;
1239                 }
1240         }
1241
1242         function braceSubstitution( $matches )
1243         {
1244                 global $wgLinkCache, $wgLang;
1245                 $fname = "Parser::braceSubstitution";
1246                 $found = false;
1247                 $nowiki = false;
1248
1249                 $text = $matches[1];
1250
1251                 # SUBST
1252                 $mwSubst =& MagicWord::get( MAG_SUBST );
1253                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1254                         if ( $this->mOutputType != OT_WIKI ) {
1255                                 # Invalid SUBST not replaced at PST time
1256                                 # Return without further processing
1257                                 $text = $matches[0];
1258                                 $found = true;
1259                         }
1260                 } elseif ( $this->mOutputType == OT_WIKI ) {
1261                         # SUBST not found in PST pass, do nothing
1262                         $text = $matches[0];
1263                         $found = true;
1264                 }
1265
1266                 # MSG, MSGNW and INT
1267                 if ( !$found ) {
1268                         # Check for MSGNW:
1269                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1270                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1271                                 $nowiki = true;
1272                         } else {
1273                                 # Remove obsolete MSG:
1274                                 $mwMsg =& MagicWord::get( MAG_MSG );
1275                                 $mwMsg->matchStartAndRemove( $text );
1276                         }
1277
1278                         # Check if it is an internal message
1279                         $mwInt =& MagicWord::get( MAG_INT );
1280                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1281                                 $text = wfMsg( $text );
1282                                 $found = true;
1283                         }
1284                 }
1285
1286                 # NS
1287                 if ( !$found ) {
1288                         # Check for NS: (namespace expansion)
1289                         $mwNs = MagicWord::get( MAG_NS );
1290                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1291                                 if ( intval( $text ) ) {
1292                                         $text = $wgLang->getNsText( intval( $text ) );
1293                                         $found = true;
1294                                 } else {
1295                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1296                                         if ( !is_null( $index ) ) {
1297                                                 $text = $wgLang->getNsText( $index );
1298                                                 $found = true;
1299                                         }
1300                                 }
1301                         }
1302                 }
1303
1304                 # LOCALURL and LOCALURLE
1305                 if ( !$found ) {
1306                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1307                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1308
1309                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1310                                 $func = 'getLocalURL';
1311                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1312                                 $func = 'escapeLocalURL';
1313                         } else {
1314                                 $func = '';
1315                         }
1316
1317                         if ( $func !== '' ) {
1318                                 $args = explode( "|", $text );
1319                                 $n = count( $args );
1320                                 if ( $n > 0 ) {
1321                                         $title = Title::newFromText( $args[0] );
1322                                         if ( !is_null( $title ) ) {
1323                                                 if ( $n > 1 ) {
1324                                                         $text = $title->$func( $args[1] );
1325                                                 } else {
1326                                                         $text = $title->$func();
1327                                                 }
1328                                                 $found = true;
1329                                         }
1330                                 }
1331                         }
1332                 }
1333
1334                 # Check for a match against internal variables
1335                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1336                         $text = $this->mVariables[$text];
1337                         $found = true;
1338                         $this->mOutput->mContainsOldMagic = true;
1339                 }
1340
1341                 # Load from database
1342                 if ( !$found ) {
1343                         $title = Title::newFromText( $text, NS_TEMPLATE );
1344                         if ( is_object( $title ) && !$title->isExternal() ) {
1345                                 # Check for excessive inclusion
1346                                 $dbk = $title->getPrefixedDBkey();
1347                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1348                                         $this->mIncludeCount[$dbk] = 0;
1349                                 }
1350                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1351                                         $article = new Article( $title );
1352                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1353                                         if ( $articleContent !== false ) {
1354                                                 $found = true;
1355                                                 $text = $articleContent;
1356
1357                                                 # Escaping and link table handling
1358                                                 # Not required for preSaveTransform()
1359                                                 if ( $this->mOutputType == OT_HTML ) {
1360                                                         if ( $nowiki ) {
1361                                                                 $text = wfEscapeWikiText( $text );
1362                                                         } else {
1363                                                                 $text = $this->removeHTMLtags( $text );
1364                                                         }
1365                                                         # Do not enter included links in link table
1366                                                         $wgLinkCache->suspend();
1367
1368                                                         # Run full parser on the included text
1369                                                         $text = $this->strip( $text, $this->mStripState );
1370                                                         $text = $this->doWikiPass2( $text, true );
1371
1372                                                         # Add the result to the strip state for re-inclusion after
1373                                                         # the rest of the processing
1374                                                         $text = $this->insertStripItem( $text, $this->mStripState );
1375
1376                                                         # Resume the link cache and register the inclusion as a link
1377                                                         $wgLinkCache->resume();
1378                                                         $wgLinkCache->addLinkObj( $title );
1379
1380                                                 }
1381                                         }
1382                                 }
1383
1384                                 # If the title is valid but undisplayable, make a link to it
1385                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1386                                         $text = "[[" . $title->getPrefixedText() . "]]";
1387                                         $found = true;
1388                                 }
1389                         }
1390                 }
1391
1392                 if ( !$found ) {
1393                         return $matches[0];
1394                 } else {
1395                         return $text;
1396                 }
1397         }
1398
1399         # Cleans up HTML, removes dangerous tags and attributes
1400         /* private */ function removeHTMLtags( $text )
1401         {
1402                 $fname = "Parser::removeHTMLtags";
1403                 wfProfileIn( $fname );
1404                 $htmlpairs = array( # Tags that must be closed
1405                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1406                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1407                         "strike", "strong", "tt", "var", "div", "center",
1408                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1409                         "ruby", "rt" , "rb" , "rp", "p"
1410                 );
1411                 $htmlsingle = array(
1412                         "br", "hr", "li", "dt", "dd"
1413                 );
1414                 $htmlnest = array( # Tags that can be nested--??
1415                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1416                         "dl", "font", "big", "small", "sub", "sup"
1417                 );
1418                 $tabletags = array( # Can only appear inside table
1419                         "td", "th", "tr"
1420                 );
1421
1422                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1423                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1424
1425                 $htmlattrs = $this->getHTMLattrs () ;
1426
1427                 # Remove HTML comments
1428                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1429
1430                 $bits = explode( "<", $text );
1431                 $text = array_shift( $bits );
1432                 $tagstack = array(); $tablestack = array();
1433
1434                 foreach ( $bits as $x ) {
1435                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1436                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1437                           $x, $regs );
1438                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1439                         error_reporting( $prev );
1440
1441                         $badtag = 0 ;
1442                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1443                                 # Check our stack
1444                                 if ( $slash ) {
1445                                         # Closing a tag...
1446                                         if ( ! in_array( $t, $htmlsingle ) &&
1447                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1448                                                 array_push( $tagstack, $ot );
1449                                                 $badtag = 1;
1450                                         } else {
1451                                                 if ( $t == "table" ) {
1452                                                         $tagstack = array_pop( $tablestack );
1453                                                 }
1454                                                 $newparams = "";
1455                                         }
1456                                 } else {
1457                                         # Keep track for later
1458                                         if ( in_array( $t, $tabletags ) &&
1459                                           ! in_array( "table", $tagstack ) ) {
1460                                                 $badtag = 1;
1461                                         } else if ( in_array( $t, $tagstack ) &&
1462                                           ! in_array ( $t , $htmlnest ) ) {
1463                                                 $badtag = 1 ;
1464                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1465                                                 if ( $t == "table" ) {
1466                                                         array_push( $tablestack, $tagstack );
1467                                                         $tagstack = array();
1468                                                 }
1469                                                 array_push( $tagstack, $t );
1470                                         }
1471                                         # Strip non-approved attributes from the tag
1472                                         $newparams = $this->fixTagAttributes($params);
1473
1474                                 }
1475                                 if ( ! $badtag ) {
1476                                         $rest = str_replace( ">", "&gt;", $rest );
1477                                         $text .= "<$slash$t $newparams$brace$rest";
1478                                         continue;
1479                                 }
1480                         }
1481                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1482                 }
1483                 # Close off any remaining tags
1484                 while ( $t = array_pop( $tagstack ) ) {
1485                         $text .= "</$t>\n";
1486                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1487                 }
1488                 wfProfileOut( $fname );
1489                 return $text;
1490         }
1491
1492 /*
1493  *
1494  * This function accomplishes several tasks:
1495  * 1) Auto-number headings if that option is enabled
1496  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1497  * 3) Add a Table of contents on the top for users who have enabled the option
1498  * 4) Auto-anchor headings
1499  *
1500  * It loops through all headlines, collects the necessary data, then splits up the
1501  * string and re-inserts the newly formatted headlines.
1502  *
1503  */
1504
1505         /* private */ function formatHeadings( $text )
1506         {
1507                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1508                 $doShowToc = $this->mOptions->getShowToc();
1509                 if( !$this->mTitle->userCanEdit() ) {
1510                         $showEditLink = 0;
1511                         $rightClickHack = 0;
1512                 } else {
1513                         $showEditLink = $this->mOptions->getEditSection();
1514                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1515                 }
1516
1517                 # Inhibit editsection links if requested in the page
1518                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1519                 if( $esw->matchAndRemove( $text ) ) {
1520                         $showEditLink = 0;
1521                 }
1522                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1523                 # do not add TOC
1524                 $mw =& MagicWord::get( MAG_NOTOC );
1525                 if( $mw->matchAndRemove( $text ) ) {
1526                         $doShowToc = 0;
1527                 }
1528
1529                 # never add the TOC to the Main Page. This is an entry page that should not
1530                 # be more than 1-2 screens large anyway
1531                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1532                         $doShowToc = 0;
1533                 }
1534
1535                 # Get all headlines for numbering them and adding funky stuff like [edit]
1536                 # links - this is for later, but we need the number of headlines right now
1537                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1538
1539                 # if there are fewer than 4 headlines in the article, do not show TOC
1540                 if( $numMatches < 4 ) {
1541                         $doShowToc = 0;
1542                 }
1543
1544                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1545                 # override above conditions and always show TOC
1546                 $mw =& MagicWord::get( MAG_FORCETOC );
1547                 if ($mw->matchAndRemove( $text ) ) {
1548                         $doShowToc = 1;
1549                 }
1550
1551
1552                 # We need this to perform operations on the HTML
1553                 $sk =& $this->mOptions->getSkin();
1554
1555                 # headline counter
1556                 $headlineCount = 0;
1557
1558                 # Ugh .. the TOC should have neat indentation levels which can be
1559                 # passed to the skin functions. These are determined here
1560                 $toclevel = 0;
1561                 $toc = "";
1562                 $full = "";
1563                 $head = array();
1564                 $sublevelCount = array();
1565                 $level = 0;
1566                 $prevlevel = 0;
1567                 foreach( $matches[3] as $headline ) {
1568                         $numbering = "";
1569                         if( $level ) {
1570                                 $prevlevel = $level;
1571                         }
1572                         $level = $matches[1][$headlineCount];
1573                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1574                                 # reset when we enter a new level
1575                                 $sublevelCount[$level] = 0;
1576                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1577                                 $toclevel += $level - $prevlevel;
1578                         }
1579                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1580                                 # reset when we step back a level
1581                                 $sublevelCount[$level+1]=0;
1582                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1583                                 $toclevel -= $prevlevel - $level;
1584                         }
1585                         # count number of headlines for each level
1586                         @$sublevelCount[$level]++;
1587                         if( $doNumberHeadings || $doShowToc ) {
1588                                 $dot = 0;
1589                                 for( $i = 1; $i <= $level; $i++ ) {
1590                                         if( !empty( $sublevelCount[$i] ) ) {
1591                                                 if( $dot ) {
1592                                                         $numbering .= ".";
1593                                                 }
1594                                                 $numbering .= $sublevelCount[$i];
1595                                                 $dot = 1;
1596                                         }
1597                                 }
1598                         }
1599
1600                         # The canonized header is a version of the header text safe to use for links
1601                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1602                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1603
1604                         # strip out HTML
1605                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1606                         $tocline = trim( $canonized_headline );
1607                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1608                         $refer[$headlineCount] = $canonized_headline;
1609
1610                         # count how many in assoc. array so we can track dupes in anchors
1611                         @$refers[$canonized_headline]++;
1612                         $refcount[$headlineCount]=$refers[$canonized_headline];
1613
1614                         # Prepend the number to the heading text
1615
1616                         if( $doNumberHeadings || $doShowToc ) {
1617                                 $tocline = $numbering . " " . $tocline;
1618
1619                                 # Don't number the heading if it is the only one (looks silly)
1620                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1621                                         # the two are different if the line contains a link
1622                                         $headline=$numbering . " " . $headline;
1623                                 }
1624                         }
1625
1626                         # Create the anchor for linking from the TOC to the section
1627                         $anchor = $canonized_headline;
1628                         if($refcount[$headlineCount] > 1 ) {
1629                                 $anchor .= "_" . $refcount[$headlineCount];
1630                         }
1631                         if( $doShowToc ) {
1632                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1633                         }
1634                         if( $showEditLink ) {
1635                                 if ( empty( $head[$headlineCount] ) ) {
1636                                         $head[$headlineCount] = "";
1637                                 }
1638                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1639                         }
1640
1641                         # Add the edit section span
1642                         if( $rightClickHack ) {
1643                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1644                         }
1645
1646                         # give headline the correct <h#> tag
1647                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1648
1649                         $headlineCount++;
1650                 }
1651
1652                 if( $doShowToc ) {
1653                         $toclines = $headlineCount;
1654                         $toc .= $sk->tocUnindent( $toclevel );
1655                         $toc = $sk->tocTable( $toc );
1656                 }
1657
1658                 # split up and insert constructed headlines
1659
1660                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1661                 $i = 0;
1662
1663                 foreach( $blocks as $block ) {
1664                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1665                             # This is the [edit] link that appears for the top block of text when
1666                                 # section editing is enabled
1667                                 $full .= $sk->editSectionLink(0);
1668                         }
1669                         $full .= $block;
1670                         if( $doShowToc && !$i) {
1671                         # Top anchor now in skin
1672                                 $full = $full.$toc;
1673                         }
1674
1675                         if( !empty( $head[$i] ) ) {
1676                                 $full .= $head[$i];
1677                         }
1678                         $i++;
1679                 }
1680
1681                 return $full;
1682         }
1683
1684         /* private */ function doMagicISBN( &$tokenizer )
1685         {
1686                 global $wgLang;
1687
1688                 # Check whether next token is a text token
1689                 # If yes, fetch it and convert the text into a
1690                 # Special::BookSources link
1691                 $token = $tokenizer->previewToken();
1692                 while ( $token["type"] == "" )
1693                 {
1694                         $tokenizer->nextToken();
1695                         $token = $tokenizer->previewToken();
1696                 }
1697                 if ( $token["type"] == "text" )
1698                 {
1699                         $token = $tokenizer->nextToken();
1700                         $x = $token["text"];
1701                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1702
1703                         $isbn = $blank = "" ;
1704                         while ( " " == $x{0} ) {
1705                                 $blank .= " ";
1706                                 $x = substr( $x, 1 );
1707                         }
1708                         while ( strstr( $valid, $x{0} ) != false ) {
1709                                 $isbn .= $x{0};
1710                                 $x = substr( $x, 1 );
1711                         }
1712                         $num = str_replace( "-", "", $isbn );
1713                         $num = str_replace( " ", "", $num );
1714
1715                         if ( "" == $num ) {
1716                                 $text = "ISBN $blank$x";
1717                         } else {
1718                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1719                                 $text = "<a href=\"" .
1720                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1721                                         "\" class=\"internal\">ISBN $isbn</a>";
1722                                 $text .= $x;
1723                         }
1724                 } else {
1725                         $text = "ISBN ";
1726                 }
1727                 return $text;
1728         }
1729         /* private */ function doMagicRFC( &$tokenizer )
1730         {
1731                 global $wgLang;
1732
1733                 # Check whether next token is a text token
1734                 # If yes, fetch it and convert the text into a
1735                 # link to an RFC source
1736                 $token = $tokenizer->previewToken();
1737                 while ( $token["type"] == "" )
1738                 {
1739                         $tokenizer->nextToken();
1740                         $token = $tokenizer->previewToken();
1741                 }
1742                 if ( $token["type"] == "text" )
1743                 {
1744                         $token = $tokenizer->nextToken();
1745                         $x = $token["text"];
1746                         $valid = "0123456789";
1747
1748                         $rfc = $blank = "" ;
1749                         while ( " " == $x{0} ) {
1750                                 $blank .= " ";
1751                                 $x = substr( $x, 1 );
1752                         }
1753                         while ( strstr( $valid, $x{0} ) != false ) {
1754                                 $rfc .= $x{0};
1755                                 $x = substr( $x, 1 );
1756                         }
1757
1758                         if ( "" == $rfc ) {
1759                                 $text .= "RFC $blank$x";
1760                         } else {
1761                                 $url = wfmsg( "rfcurl" );
1762                                 $url = str_replace( "$1", $rfc, $url);
1763                                 $sk =& $this->mOptions->getSkin();
1764                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1765                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1766                         }
1767                 } else {
1768                         $text = "RFC ";
1769                 }
1770                 return $text;
1771         }
1772
1773         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1774         {
1775                 $this->mOptions = $options;
1776                 $this->mTitle =& $title;
1777                 $this->mOutputType = OT_WIKI;
1778
1779                 if ( $clearState ) {
1780                         $this->clearState();
1781                 }
1782
1783                 $stripState = false;
1784                 $pairs = array(
1785                         "\r\n" => "\n",
1786                         );
1787                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1788                 // now with regexes
1789                 $pairs = array(
1790                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1791                         "/<br *?>/i" => "<br/>",
1792                 );
1793                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1794                 $text = $this->strip( $text, $stripState, false );
1795                 $text = $this->pstPass2( $text, $user );
1796                 $text = $this->unstrip( $text, $stripState );
1797                 return $text;
1798         }
1799
1800         /* private */ function pstPass2( $text, &$user )
1801         {
1802                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1803
1804                 # Variable replacement
1805                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1806                 $text = $this->replaceVariables( $text );
1807
1808                 # Signatures
1809                 #
1810                 $n = $user->getName();
1811                 $k = $user->getOption( "nickname" );
1812                 if ( "" == $k ) { $k = $n; }
1813                 if(isset($wgLocaltimezone)) {
1814                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1815                 }
1816                 /* Note: this is an ugly timezone hack for the European wikis */
1817                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1818                   " (" . date( "T" ) . ")";
1819                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1820
1821                 $text = preg_replace( "/~~~~~/", $d, $text );
1822                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1823                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1824                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1825                   Namespace::getUser() ) . ":$n|$k]]", $text );
1826
1827                 # Context links: [[|name]] and [[name (context)|]]
1828                 #
1829                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1830                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1831                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1832                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1833
1834                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1835                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1836                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1837                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1838                                                                                                                 # [[ns:page (cont)|]]
1839                 $context = "";
1840                 $t = $this->mTitle->getText();
1841                 if ( preg_match( $conpat, $t, $m ) ) {
1842                         $context = $m[2];
1843                 }
1844                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1845                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1846                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1847
1848                 if ( "" == $context ) {
1849                         $text = preg_replace( $p2, "[[\\1]]", $text );
1850                 } else {
1851                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1852                 }
1853
1854                 /*
1855                 $mw =& MagicWord::get( MAG_SUBST );
1856                 $wgCurParser = $this->fork();
1857                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1858                 $this->merge( $wgCurParser );
1859                 */
1860
1861                 # Trim trailing whitespace
1862                 # MAG_END (__END__) tag allows for trailing
1863                 # whitespace to be deliberately included
1864                 $text = rtrim( $text );
1865                 $mw =& MagicWord::get( MAG_END );
1866                 $mw->matchAndRemove( $text );
1867
1868                 return $text;
1869         }
1870
1871         # Set up some variables which are usually set up in parse()
1872         # so that an external function can call some class members with confidence
1873         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1874         {
1875                 $this->mTitle =& $title;
1876                 $this->mOptions = $options;
1877                 $this->mOutputType = $outputType;
1878                 if ( $clearState ) {
1879                         $this->clearState();
1880                 }
1881         }
1882
1883         function transformMsg( $text, $options ) {
1884                 global $wgTitle;
1885                 static $executing = false;
1886
1887                 # Guard against infinite recursion
1888                 if ( $executing ) {
1889                         return $text;
1890                 }
1891                 $executing = true;
1892
1893                 $this->mTitle = $wgTitle;
1894                 $this->mOptions = $options;
1895                 $this->mOutputType = OT_MSG;
1896                 $this->clearState();
1897                 $text = $this->replaceVariables( $text );
1898
1899                 $executing = false;
1900                 return $text;
1901         }
1902 }
1903
1904 class ParserOutput
1905 {
1906         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1907
1908         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1909                 $containsOldMagic = false )
1910         {
1911                 $this->mText = $text;
1912                 $this->mLanguageLinks = $languageLinks;
1913                 $this->mCategoryLinks = $categoryLinks;
1914                 $this->mContainsOldMagic = $containsOldMagic;
1915         }
1916
1917         function getText() { return $this->mText; }
1918         function getLanguageLinks() { return $this->mLanguageLinks; }
1919         function getCategoryLinks() { return $this->mCategoryLinks; }
1920         function containsOldMagic() { return $this->mContainsOldMagic; }
1921         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1922         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1923         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1924         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1925
1926         function merge( $other ) {
1927                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1928                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1929                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1930         }
1931
1932 }
1933
1934 class ParserOptions
1935 {
1936         # All variables are private
1937         var $mUseTeX;                    # Use texvc to expand <math> tags
1938         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1939         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1940         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1941         var $mAllowExternalImages;       # Allow external images inline
1942         var $mSkin;                      # Reference to the preferred skin
1943         var $mDateFormat;                # Date format index
1944         var $mEditSection;               # Create "edit section" links
1945         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1946         var $mNumberHeadings;            # Automatically number headings
1947         var $mShowToc;                   # Show table of contents
1948
1949         function getUseTeX() { return $this->mUseTeX; }
1950         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1951         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1952         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1953         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1954         function getSkin() { return $this->mSkin; }
1955         function getDateFormat() { return $this->mDateFormat; }
1956         function getEditSection() { return $this->mEditSection; }
1957         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1958         function getNumberHeadings() { return $this->mNumberHeadings; }
1959         function getShowToc() { return $this->mShowToc; }
1960
1961         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1962         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1963         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1964         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1965         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1966         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1967         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1968         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1969         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1970         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1971         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1972
1973         /* static */ function newFromUser( &$user )
1974         {
1975                 $popts = new ParserOptions;
1976                 $popts->initialiseFromUser( &$user );
1977                 return $popts;
1978         }
1979
1980         function initialiseFromUser( &$userInput )
1981         {
1982                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1983
1984                 if ( !$userInput ) {
1985                         $user = new User;
1986                         $user->setLoaded( true );
1987                 } else {
1988                         $user =& $userInput;
1989                 }
1990
1991                 $this->mUseTeX = $wgUseTeX;
1992                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1993                 $this->mUseDynamicDates = $wgUseDynamicDates;
1994                 $this->mInterwikiMagic = $wgInterwikiMagic;
1995                 $this->mAllowExternalImages = $wgAllowExternalImages;
1996                 $this->mSkin =& $user->getSkin();
1997                 $this->mDateFormat = $user->getOption( "date" );
1998                 $this->mEditSection = $user->getOption( "editsection" );
1999                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2000                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2001                 $this->mShowToc = $user->getOption( "showtoc" );
2002         }
2003
2004
2005 }
2006
2007 # Regex callbacks, used in Parser::replaceVariables
2008 function wfBraceSubstitution( $matches )
2009 {
2010         global $wgCurParser;
2011         return $wgCurParser->braceSubstitution( $matches );
2012 }
2013
2014 ?>