includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         # This method generates the list of subcategories and pages for a category
 260         function categoryMagic ()
 261         {
 262                 global $wgLang , $wgUser ;
 263                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 264
 265                 $cns = Namespace::getCategory() ;
 266                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 267
 268                 $r = "<br style=\"clear:both;\"/>\n";
 269
 270
 271                 $sk =& $wgUser->getSkin() ;
 272
 273                 $articles = array() ;
 274                 $children = array() ;
 275                 $data = array () ;
 276                 $id = $this->mTitle->getArticleID() ;
 277
 278                 # For existing categories
 279                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 280                 $res = wfQuery ( $sql, DB_READ ) ;
 281                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 282
 283                 # For non-existing categories
 284                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 285                 $res = wfQuery ( $sql, DB_READ ) ;
 286                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 287
 288                 # For all pages that link to this category
 289                 foreach ( $data AS $x )
 290                 {
 291                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 292                         if ( $t != "" ) $t .= ":" ;
 293                         $t .= $x->cur_title ;
 294
 295                         if ( $x->cur_namespace == $cns ) {
 296                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 297                         } else {
 298                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 299                         }
 300                 }
 301                 wfFreeResult ( $res ) ;
 302
 303                 # Showing subcategories
 304                 if ( count ( $children ) > 0 )
 305                 {
 306                         asort ( $children ) ;
 307                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 308                         $r .= implode ( ", " , $children ) ;
 309                 }
 310
 311                 # Showing pages in this category
 312                 if ( count ( $articles ) > 0 )
 313                 {
 314                         $ti = $this->mTitle->getText() ;
 315                         asort ( $articles ) ;
 316                         $h =  wfMsg( "category_header", $ti );
 317                         $r .= "<h2>{$h}</h2>\n" ;
 318                         $r .= implode ( ", " , $articles ) ;
 319                 }
 320
 321
 322                 return $r ;
 323         }
 324
 325         function getHTMLattrs ()
 326         {
 327                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 328                                 "title", "align", "lang", "dir", "width", "height",
 329                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 330                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 331                                 /* FONT */ "type", "start", "value", "compact",
 332                                 /* For various lists, mostly deprecated but safe */
 333                                 "summary", "width", "border", "frame", "rules",
 334                                 "cellspacing", "cellpadding", "valign", "char",
 335                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 336                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 337                                 "id", "class", "name", "style" /* For CSS */
 338                                 );
 339                 return $htmlattrs ;
 340         }
 341
 342         function fixTagAttributes ( $t )
 343         {
 344                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 345                 $htmlattrs = $this->getHTMLattrs() ;
 346
 347                 # Strip non-approved attributes from the tag
 348                 $t = preg_replace(
 349                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 350                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 351                         $t);
 352                 # Strip javascript "expression" from stylesheets. Brute force approach:
 353                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 354
 355                 if( preg_match(
 356                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 357                         wfMungeToUtf8( $t ) ) )
 358                 {
 359                         $t="";
 360                 }
 361
 362                 return trim ( $t ) ;
 363         }
 364
 365         function doTableStuff ( $t )
 366         {
 367                 $t = explode ( "\n" , $t ) ;
 368                 $td = array () ; # Is currently a td tag open?
 369                         $ltd = array () ; # Was it TD or TH?
 370                         $tr = array () ; # Is currently a tr tag open?
 371                         $ltr = array () ; # tr attributes
 372                         foreach ( $t AS $k => $x )
 373                         {
 374                                 $x = trim ( $x ) ;
 375                                 $fc = substr ( $x , 0 , 1 ) ;
 376                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 377                                 {
 378                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 379                                         array_push ( $td , false ) ;
 380                                         array_push ( $ltd , "" ) ;
 381                                         array_push ( $tr , false ) ;
 382                                         array_push ( $ltr , "" ) ;
 383                                 }
 384                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 385                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 386                                 {
 387                                         $z = "</table>\n" ;
 388                                         $l = array_pop ( $ltd ) ;
 389                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 390                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 391                                         array_pop ( $ltr ) ;
 392                                         $t[$k] = $z ;
 393                                 }
 394                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 395                                                 {
 396                                                 $z = trim ( substr ( $x , 2 ) ) ;
 397                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 398                                                 }*/
 399                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 400                                 {
 401                                         $x = substr ( $x , 1 ) ;
 402                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 403                                         $z = "" ;
 404                                         $l = array_pop ( $ltd ) ;
 405                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 406                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 407                                         array_pop ( $ltr ) ;
 408                                         $t[$k] = $z ;
 409                                         array_push ( $tr , false ) ;
 410                                         array_push ( $td , false ) ;
 411                                         array_push ( $ltd , "" ) ;
 412                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 413                                 }
 414                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 415                                 {
 416                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 417                                         {
 418                                                 $fc = "+" ;
 419                                                 $x = substr ( $x , 1 ) ;
 420                                         }
 421                                         $after = substr ( $x , 1 ) ;
 422                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 423                                         $after = explode ( "||" , $after ) ;
 424                                         $t[$k] = "" ;
 425                                         foreach ( $after AS $theline )
 426                                         {
 427                                                 $z = "" ;
 428                                                 if ( $fc != "+" )
 429                                                 {
 430                                                         $tra = array_pop ( $ltr ) ;
 431                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 432                                                         array_push ( $tr , true ) ;
 433                                                         array_push ( $ltr , "" ) ;
 434                                                 }
 435
 436                                                 $l = array_pop ( $ltd ) ;
 437                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 438                                                 if ( $fc == "|" ) $l = "td" ;
 439                                                 else if ( $fc == "!" ) $l = "th" ;
 440                                                 else if ( $fc == "+" ) $l = "caption" ;
 441                                                 else $l = "" ;
 442                                                 array_push ( $ltd , $l ) ;
 443                                                 $y = explode ( "|" , $theline , 2 ) ;
 444                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 445                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 446                                                 $t[$k] .= $y ;
 447                                                 array_push ( $td , true ) ;
 448                                         }
 449                                 }
 450                         }
 451
 452                 # Closing open td, tr && table
 453                 while ( count ( $td ) > 0 )
 454                 {
 455                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 456                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 457                         $t[] = "</table>" ;
 458                 }
 459
 460                 $t = implode ( "\n" , $t ) ;
 461                 #               $t = $this->removeHTMLtags( $t );
 462                 return $t ;
 463         }
 464
 465         function internalParse( $text, $linestart, $args = array() )
 466         {
 467                 $fname = "Parser::internalParse";
 468                 wfProfileIn( $fname );
 469
 470                 $text = $this->removeHTMLtags( $text );
 471                 $text = $this->replaceVariables( $text, $args );
 472
 473                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 474
 475                 $text = $this->doHeadings( $text );
 476                 if($this->mOptions->getUseDynamicDates()) {
 477                         global $wgDateFormatter;
 478                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 479                 }
 480                 $text = $this->replaceExternalLinks( $text );
 481                 $text = $this->doTokenizedParser ( $text );
 482                 $text = $this->doTableStuff ( $text ) ;
 483                 $text = $this->formatHeadings( $text );
 484                 $sk =& $this->mOptions->getSkin();
 485                 $text = $sk->transformContent( $text );
 486
 487                 if ( !isset ( $this->categoryMagicDone ) ) {
 488                    $text .= $this->categoryMagic () ;
 489                    $this->categoryMagicDone = true ;
 490                    }
 491
 492                 wfProfileOut( $fname );
 493                 return $text;
 494         }
 495
 496
 497         /* private */ function doHeadings( $text )
 498         {
 499                 for ( $i = 6; $i >= 1; --$i ) {
 500                         $h = substr( "======", 0, $i );
 501                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 502                           "<h{$i}>\\1</h{$i}>\\2", $text );
 503                 }
 504                 return $text;
 505         }
 506
 507         # Note: we have to do external links before the internal ones,
 508         # and otherwise take great care in the order of things here, so
 509         # that we don't end up interpreting some URLs twice.
 510
 511         /* private */ function replaceExternalLinks( $text )
 512         {
 513                 $fname = "Parser::replaceExternalLinks";
 514                 wfProfileIn( $fname );
 515                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 516                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 517                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 518                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 519                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 520                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 521                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 522                 wfProfileOut( $fname );
 523                 return $text;
 524         }
 525
 526         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 527         {
 528                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 529                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 530
 531                 # this is  the list of separators that should be ignored if they
 532                 # are the last character of an URL but that should be included
 533                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 534                 # in this case, the last comma should not become part of the URL,
 535                 # but in "www.foo.com/123,2342,32.htm" it should.
 536                 $sep = ",;\.:";
 537                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 538                 $images = "gif|png|jpg|jpeg";
 539
 540                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 541                 # they are interpreted as part of the string (used to tell PHP
 542                 # that the content of the string should be inserted there).
 543                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 544                   "((?i){$images})([^{$uc}]|$)/";
 545
 546                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 547                 $sk =& $this->mOptions->getSkin();
 548
 549                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 550                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 551                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 552                 }
 553                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 554                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 555                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 556                   "</a>\\5", $s );
 557                 $s = str_replace( $unique, $protocol, $s );
 558
 559                 $a = explode( "[{$protocol}:", " " . $s );
 560                 $s = array_shift( $a );
 561                 $s = substr( $s, 1 );
 562
 563                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 564                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 565
 566                 foreach ( $a as $line ) {
 567                         if ( preg_match( $e1, $line, $m ) ) {
 568                                 $link = "{$protocol}:{$m[1]}";
 569                                 $trail = $m[2];
 570                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 571                                 else { $text = wfEscapeHTML( $link ); }
 572                         } else if ( preg_match( $e2, $line, $m ) ) {
 573                                 $link = "{$protocol}:{$m[1]}";
 574                                 $text = $m[2];
 575                                 $trail = $m[3];
 576                         } else {
 577                                 $s .= "[{$protocol}:" . $line;
 578                                 continue;
 579                         }
 580                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 581                                 $paren = "";
 582                         } else {
 583                                 # Expand the URL for printable version
 584                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 585                         }
 586                         $la = $sk->getExternalLinkAttributes( $link, $text );
 587                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 588
 589                 }
 590                 return $s;
 591         }
 592
 593         /* private */ function handle3Quotes( &$state, $token )
 594         {
 595                 if ( $state["strong"] !== false ) {
 596                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 597                         {
 598                                 # ''' lala ''lala '''
 599                                 $s = "</em></strong><em>";
 600                         } else {
 601                                 $s = "</strong>";
 602                         }
 603                         $state["strong"] = FALSE;
 604                 } else {
 605                         $s = "<strong>";
 606                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 607                 }
 608                 return $s;
 609         }
 610
 611         /* private */ function handle2Quotes( &$state, $token )
 612         {
 613                 if ( $state["em"] !== false ) {
 614                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 615                         {
 616                                 # ''lala'''lala'' ....'''
 617                                 $s = "</strong></em><strong>";
 618                         } else {
 619                                 $s = "</em>";
 620                         }
 621                         $state["em"] = FALSE;
 622                 } else {
 623                         $s = "<em>";
 624                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 625
 626                 }
 627                 return $s;
 628         }
 629
 630         /* private */ function handle5Quotes( &$state, $token )
 631         {
 632                 $s = "";
 633                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 634                         if ( $state["em"] < $state["strong"] ) {
 635                                 $s .= "</strong></em>";
 636                         } else {
 637                                 $s .= "</em></strong>";
 638                         }
 639                         $state["strong"] = $state["em"] = FALSE;
 640                 } elseif ( $state["em"] !== false ) {
 641                         $s .= "</em><strong>";
 642                         $state["em"] = FALSE;
 643                         $state["strong"] = $token["pos"];
 644                 } elseif ( $state["strong"] !== false ) {
 645                         $s .= "</strong><em>";
 646                         $state["strong"] = FALSE;
 647                         $state["em"] = $token["pos"];
 648                 } else { # not $em and not $strong
 649                         $s .= "<strong><em>";
 650                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 651                 }
 652                 return $s;
 653         }
 654
 655         /* private */ function doTokenizedParser( $str )
 656         {
 657                 global $wgLang; # for language specific parser hook
 658
 659                 $tokenizer=Tokenizer::newFromString( $str );
 660                 $tokenStack = array();
 661
 662                 $s="";
 663                 $state["em"]      = FALSE;
 664                 $state["strong"]  = FALSE;
 665                 $tagIsOpen = FALSE;
 666                 $threeopen = false;
 667
 668                 # The tokenizer splits the text into tokens and returns them one by one.
 669                 # Every call to the tokenizer returns a new token.
 670                 while ( $token = $tokenizer->nextToken() )
 671                 {
 672                         switch ( $token["type"] )
 673                         {
 674                                 case "text":
 675                                         # simple text with no further markup
 676                                         $txt = $token["text"];
 677                                         break;
 678                                 case "[[[":
 679                                         # remember the tag opened with 3 [
 680                                         $threeopen = true;
 681                                 case "[[":
 682                                         # link opening tag.
 683                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 684                                         $tagIsOpen = TRUE;
 685                                         array_push( $tokenStack, $token );
 686                                         $txt="";
 687                                         break;
 688
 689                                 case "]]]":
 690                                 case "]]":
 691                                         # link close tag.
 692                                         # get text from stack, glue it together, and call the code to handle a
 693                                         # link
 694
 695                                         if ( count( $tokenStack ) == 0 )
 696                                         {
 697                                                 # stack empty. Found a ]] without an opening [[
 698                                                 $txt = "]]";
 699                                         } else {
 700                                                 $linkText = "";
 701                                                 $lastToken = array_pop( $tokenStack );
 702                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 703                                                 {
 704                                                         if( !empty( $lastToken["text"] ) ) {
 705                                                                 $linkText = $lastToken["text"] . $linkText;
 706                                                         }
 707                                                         $lastToken = array_pop( $tokenStack );
 708                                                 }
 709
 710                                                 $txt = $linkText ."]]";
 711
 712                                                 if( isset( $lastToken["text"] ) ) {
 713                                                         $prefix = $lastToken["text"];
 714                                                 } else {
 715                                                         $prefix = "";
 716                                                 }
 717                                                 $nextToken = $tokenizer->previewToken();
 718                                                 if ( $nextToken["type"] == "text" )
 719                                                 {
 720                                                         # Preview just looks at it. Now we have to fetch it.
 721                                                         $nextToken = $tokenizer->nextToken();
 722                                                         $txt .= $nextToken["text"];
 723                                                 }
 724                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 725
 726                                                 # did the tag start with 3 [ ?
 727                                                 if($threeopen) {
 728                                                         # show the first as text
 729                                                         $txt = "[".$txt;
 730                                                         $threeopen=false;
 731                                                 }
 732
 733                                         }
 734                                         $tagIsOpen = (count( $tokenStack ) != 0);
 735                                         break;
 736                                 case "----":
 737                                         $txt = "\n<hr />\n";
 738                                         break;
 739                                 case "'''":
 740                                         # This and the three next ones handle quotes
 741                                         $txt = $this->handle3Quotes( $state, $token );
 742                                         break;
 743                                 case "''":
 744                                         $txt = $this->handle2Quotes( $state, $token );
 745                                         break;
 746                                 case "'''''":
 747                                         $txt = $this->handle5Quotes( $state, $token );
 748                                         break;
 749                                 case "":
 750                                         # empty token
 751                                         $txt="";
 752                                         break;
 753                                 case "RFC ":
 754                                         if ( $tagIsOpen ) {
 755                                                 $txt = "RFC ";
 756                                         } else {
 757                                                 $txt = $this->doMagicRFC( $tokenizer );
 758                                         }
 759                                         break;
 760                                 case "ISBN ":
 761                                         if ( $tagIsOpen ) {
 762                                                 $txt = "ISBN ";
 763                                         } else {
 764                                                 $txt = $this->doMagicISBN( $tokenizer );
 765                                         }
 766                                         break;
 767                                 default:
 768                                         # Call language specific Hook.
 769                                         $txt = $wgLang->processToken( $token, $tokenStack );
 770                                         if ( NULL == $txt ) {
 771                                                 # An unkown token. Highlight.
 772                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 773                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 774                                         }
 775                                         break;
 776                         }
 777                         # If we're parsing the interior of a link, don't append the interior to $s,
 778                         # but push it to the stack so it can be processed when a ]] token is found.
 779                         if ( $tagIsOpen  && $txt != "" ) {
 780                                 $token["type"] = "text";
 781                                 $token["text"] = $txt;
 782                                 array_push( $tokenStack, $token );
 783                         } else {
 784                                 $s .= $txt;
 785                         }
 786                 } #end while
 787                 if ( count( $tokenStack ) != 0 )
 788                 {
 789                         # still objects on stack. opened [[ tag without closing ]] tag.
 790                         $txt = "";
 791                         while ( $lastToken = array_pop( $tokenStack ) )
 792                         {
 793                                 if ( $lastToken["type"] == "text" )
 794                                 {
 795                                         $txt = $lastToken["text"] . $txt;
 796                                 } else {
 797                                         $txt = $lastToken["type"] . $txt;
 798                                 }
 799                         }
 800                         $s .= $txt;
 801                 }
 802                 return $s;
 803         }
 804
 805         /* private */ function handleInternalLink( $line, $prefix )
 806         {
 807                 global $wgLang, $wgLinkCache;
 808                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 809                 static $fname = "Parser::handleInternalLink" ;
 810                 wfProfileIn( $fname );
 811
 812                 wfProfileIn( "$fname-setup" );
 813                 static $tc = FALSE;
 814                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 815                 $sk =& $this->mOptions->getSkin();
 816
 817                 # Match a link having the form [[namespace:link|alternate]]trail
 818                 static $e1 = FALSE;
 819                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 820                 # Match the end of a line for a word that's not followed by whitespace,
 821                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 822                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 823                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 824                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 825
 826
 827                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 828                 static $image = FALSE;
 829                 static $special = FALSE;
 830                 static $media = FALSE;
 831                 static $category = FALSE;
 832                 if ( !$image ) { $image = Namespace::getImage(); }
 833                 if ( !$special ) { $special = Namespace::getSpecial(); }
 834                 if ( !$media ) { $media = Namespace::getMedia(); }
 835                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 836
 837                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 838
 839                 wfProfileOut( "$fname-setup" );
 840                 $s = "";
 841
 842                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 843                         $text = $m[2];
 844                         $trail = $m[3];
 845                 } else { # Invalid form; output directly
 846                         $s .= $prefix . "[[" . $line ;
 847                         return $s;
 848                 }
 849
 850                 /* Valid link forms:
 851                 Foobar -- normal
 852                 :Foobar -- override special treatment of prefix (images, language links)
 853                 /Foobar -- convert to CurrentPage/Foobar
 854                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 855                 */
 856                 $c = substr($m[1],0,1);
 857                 $noforce = ($c != ":");
 858                 if( $c == "/" ) { # subpage
 859                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 860                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 861                                 $noslash=$m[1];
 862                         } else {
 863                                 $noslash=substr($m[1],1);
 864                         }
 865                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 866                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 867                                 if( "" == $text ) {
 868                                         $text= $m[1];
 869                                 } # this might be changed for ugliness reasons
 870                         } else {
 871                                 $link = $noslash; # no subpage allowed, use standard link
 872                         }
 873                 } elseif( $noforce ) { # no subpage
 874                         $link = $m[1];
 875                 } else {
 876                         $link = substr( $m[1], 1 );
 877                 }
 878                 if( "" == $text )
 879                         $text = $link;
 880
 881                 $nt = Title::newFromText( $link );
 882                 if( !$nt ) {
 883                         $s .= $prefix . "[[" . $line;
 884                         return $s;
 885                 }
 886                 $ns = $nt->getNamespace();
 887                 $iw = $nt->getInterWiki();
 888                 if( $noforce ) {
 889                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 890                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 891                                 return (trim($s) == '')? '': $s;
 892                         }
 893                         if( $ns == $image ) {
 894                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 895                                 $wgLinkCache->addImageLinkObj( $nt );
 896                                 return $s;
 897                         }
 898                         if ( $ns == $category ) {
 899                                 $t = $nt->getText() ;
 900                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 901                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 902                                 $this->mOutput->mCategoryLinks[] = $t ;
 903                                 $s .= $prefix . $trail ;
 904                                 return $s ;
 905                         }
 906                 }
 907                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 908                     ( strpos( $link, "#" ) == FALSE ) ) {
 909                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 910                         return $s;
 911                 }
 912
 913                 if( $ns == $media ) {
 914                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 915                         $wgLinkCache->addImageLinkObj( $nt );
 916                         return $s;
 917                 } elseif( $ns == $special ) {
 918                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 919                         return $s;
 920                 }
 921                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 922
 923                 wfProfileOut( $fname );
 924                 return $s;
 925         }
 926
 927         # Some functions here used by doBlockLevels()
 928         #
 929         /* private */ function closeParagraph()
 930         {
 931                 $result = "";
 932                 if ( '' != $this->mLastSection ) {
 933                         $result = "</" . $this->mLastSection  . ">\n";
 934                 }
 935                 $this->mInPre = false;
 936                 $this->mLastSection = "";
 937                 return $result;
 938         }
 939         # getCommon() returns the length of the longest common substring
 940         # of both arguments, starting at the beginning of both.
 941         #
 942         /* private */ function getCommon( $st1, $st2 )
 943         {
 944                 $fl = strlen( $st1 );
 945                 $shorter = strlen( $st2 );
 946                 if ( $fl < $shorter ) { $shorter = $fl; }
 947
 948                 for ( $i = 0; $i < $shorter; ++$i ) {
 949                         if ( $st1{$i} != $st2{$i} ) { break; }
 950                 }
 951                 return $i;
 952         }
 953         # These next three functions open, continue, and close the list
 954         # element appropriate to the prefix character passed into them.
 955         #
 956         /* private */ function openList( $char )
 957     {
 958                 $result = $this->closeParagraph();
 959
 960                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 961                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 962                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 963                 else if ( ";" == $char ) {
 964                         $result .= "<dl><dt>";
 965                         $this->mDTopen = true;
 966                 }
 967                 else { $result = "<!-- ERR 1 -->"; }
 968
 969                 return $result;
 970         }
 971
 972         /* private */ function nextItem( $char )
 973         {
 974                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 975                 else if ( ":" == $char || ";" == $char ) {
 976                         $close = "</dd>";
 977                         if ( $this->mDTopen ) { $close = "</dt>"; }
 978                         if ( ";" == $char ) {
 979                                 $this->mDTopen = true;
 980                                 return $close . "<dt>";
 981                         } else {
 982                                 $this->mDTopen = false;
 983                                 return $close . "<dd>";
 984                         }
 985                 }
 986                 return "<!-- ERR 2 -->";
 987         }
 988
 989         /* private */function closeList( $char )
 990         {
 991                 if ( "*" == $char ) { $text = "</li></ul>"; }
 992                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 993                 else if ( ":" == $char ) {
 994                         if ( $this->mDTopen ) {
 995                                 $this->mDTopen = false;
 996                                 $text = "</dt></dl>";
 997                         } else {
 998                                 $text = "</dd></dl>";
 999                         }
1000                 }
1001                 else {  return "<!-- ERR 3 -->"; }
1002                 return $text."\n";
1003         }
1004
1005         /* private */ function doBlockLevels( $text, $linestart )
1006         {
1007                 $fname = "Parser::doBlockLevels";
1008                 wfProfileIn( $fname );
1009                 # Parsing through the text line by line.  The main thing
1010                 # happening here is handling of block-level elements p, pre,
1011                 # and making lists from lines starting with * # : etc.
1012                 #
1013                 $a = explode( "\n", $text );
1014
1015                 $lastPref = $text = $lastLine = '';
1016                 $this->mDTopen = $inBlockElem = false;
1017                 $npl = 0;
1018                 $pstack = false;
1019
1020                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1021                 foreach ( $a as $t ) {
1022                         $oLine = $t;
1023                         $opl = strlen( $lastPref );
1024                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1025                         $preOpenMatch = preg_match("/<pre/i", $t );
1026                         if (!$this->mInPre) {
1027                                 $this->mInPre = !empty($preOpenMatch);
1028                         }
1029                         if ( !$this->mInPre ) {
1030                                 $npl = strspn( $t, "*#:;" );
1031                                 $pref = substr( $t, 0, $npl );
1032                                 $pref2 = str_replace( ";", ":", $pref );
1033                                 $t = substr( $t, $npl );
1034                         } else {
1035                                 $npl = 0;
1036                                 $pref = $pref2 = '';
1037                         }
1038
1039                         // list generation
1040                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1041                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1042                                 if ( $pstack ) { $pstack = false; }
1043
1044                                 if ( ";" == substr( $pref, -1 ) ) {
1045                                         $cpos = strpos( $t, ":" );
1046                                         if ( false !== $cpos ) {
1047                                                 $term = substr( $t, 0, $cpos );
1048                                                 $text .= $term . $this->nextItem( ":" );
1049                                                 $t = substr( $t, $cpos + 1 );
1050                                         }
1051                                 }
1052                         } else if (0 != $npl || 0 != $opl) {
1053                                 $cpl = $this->getCommon( $pref, $lastPref );
1054                                 if ( $pstack ) { $pstack = false; }
1055
1056                                 while ( $cpl < $opl ) {
1057                                         $text .= $this->closeList( $lastPref{$opl-1} );
1058                                         --$opl;
1059                                 }
1060                                 if ( $npl <= $cpl && $cpl > 0 ) {
1061                                         $text .= $this->nextItem( $pref{$cpl-1} );
1062                                 }
1063                                 while ( $npl > $cpl ) {
1064                                         $char = substr( $pref, $cpl, 1 );
1065                                         $text .= $this->openList( $char );
1066
1067                                         if ( ";" == $char ) {
1068                                                 $cpos = strpos( $t, ":" );
1069                                                 if ( ! ( false === $cpos ) ) {
1070                                                         $term = substr( $t, 0, $cpos );
1071                                                         $text .= $term . $this->nextItem( ":" );
1072                                                         $t = substr( $t, $cpos + 1 );
1073                                                 }
1074                                         }
1075                                         ++$cpl;
1076                                 }
1077                                 $lastPref = $pref2;
1078                         }
1079                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1080                                 $uniq_prefix = UNIQ_PREFIX;
1081                                 // XXX: use a stack for nestable elements like span, table and div
1082                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1083                                 $closematch = preg_match(
1084                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1085                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1086                                 if ( $openmatch or $closematch ) {
1087                                         if ( $pstack ) { $pstack = false; }
1088                                         $text .= $this->closeParagraph();
1089                                         if($preOpenMatch and !$preCloseMatch) {
1090                                                 $this->mInPre = true;
1091                                         }
1092                                         if ( $closematch  ) {
1093                                                 $inBlockElem = false;
1094                                         } else {
1095                                                 $inBlockElem = true;
1096                                         }
1097                                 } else if ( !$inBlockElem ) {
1098                                         if ( " " == $t{0} ) {
1099                                                 // pre
1100                                                 if ($this->mLastSection != 'pre') {
1101                                                         $pstack = false;
1102                                                         $text .= $this->closeParagraph().'<pre>';
1103                                                         $this->mLastSection = 'pre';
1104                                                 }
1105                                         } else {
1106                                                 // paragraph
1107                                                 if ( '' == trim($t) ) {
1108                                                         if ( $pstack ) {
1109                                                                 $text .= $pstack.'<br/>';
1110                                                                 $pstack = false;
1111                                                                 $this->mLastSection = 'p';
1112                                                         } else {
1113                                                                 if ($this->mLastSection != 'p' ) {
1114                                                                         $text .= $this->closeParagraph();
1115                                                                         $this->mLastSection = '';
1116                                                                         $pstack = "<p>";
1117                                                                 } else {
1118                                                                         $pstack = '</p><p>';
1119                                                                 }
1120                                                         }
1121                                                 } else {
1122                                                         if ( $pstack ) {
1123                                                                 $text .= $pstack;
1124                                                                 $pstack = false;
1125                                                                 $this->mLastSection = 'p';
1126                                                         } else if ($this->mLastSection != 'p') {
1127                                                                 $text .= $this->closeParagraph().'<p>';
1128                                                                 $this->mLastSection = 'p';
1129                                                         }
1130                                                 }
1131                                         }
1132                                 }
1133                         }
1134                         if ($pstack === false) {
1135                                 $text .= $t."\n";
1136                         }
1137                 }
1138                 while ( $npl ) {
1139                         $text .= $this->closeList( $pref2{$npl-1} );
1140                         --$npl;
1141                 }
1142                 if ( "" != $this->mLastSection ) {
1143                         $text .= "</" . $this->mLastSection . ">";
1144                         $this->mLastSection = "";
1145                 }
1146
1147                 wfProfileOut( $fname );
1148                 return $text;
1149         }
1150
1151         function getVariableValue( $index ) {
1152                 global $wgLang, $wgSitename, $wgServer;
1153
1154                 switch ( $index ) {
1155                         case MAG_CURRENTMONTH:
1156                                 return date( "m" );
1157                         case MAG_CURRENTMONTHNAME:
1158                                 return $wgLang->getMonthName( date("n") );
1159                         case MAG_CURRENTMONTHNAMEGEN:
1160                                 return $wgLang->getMonthNameGen( date("n") );
1161                         case MAG_CURRENTDAY:
1162                                 return date("j");
1163                         case MAG_CURRENTDAYNAME:
1164                                 return $wgLang->getWeekdayName( date("w")+1 );
1165                         case MAG_CURRENTYEAR:
1166                                 return date( "Y" );
1167                         case MAG_CURRENTTIME:
1168                                 return $wgLang->time( wfTimestampNow(), false );
1169                         case MAG_NUMBEROFARTICLES:
1170                                 return wfNumberOfArticles();
1171                         case MAG_SITENAME:
1172                                 return $wgSitename;
1173                         case MAG_SERVER:
1174                                 return $wgServer;
1175                         default:
1176                                 return NULL;
1177                 }
1178         }
1179
1180         function initialiseVariables()
1181         {
1182                 global $wgVariableIDs;
1183                 $this->mVariables = array();
1184                 foreach ( $wgVariableIDs as $id ) {
1185                         $mw =& MagicWord::get( $id );
1186                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1187                 }
1188         }
1189
1190         /* private */ function replaceVariables( $text, $args = array() )
1191         {
1192                 global $wgLang, $wgScript, $wgArticlePath;
1193
1194                 $fname = "Parser::replaceVariables";
1195                 wfProfileIn( $fname );
1196
1197                 $bail = false;
1198                 if ( !$this->mVariables ) {
1199                         $this->initialiseVariables();
1200                 }
1201                 $titleChars = Title::legalChars();
1202                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1203
1204                 # This function is called recursively. To keep track of arguments we need a stack:
1205                 array_push( $this->mArgStack, $args );
1206
1207                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1208                 $GLOBALS['wgCurParser'] =& $this;
1209                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1210
1211                 array_pop( $this->mArgStack );
1212
1213                 return $text;
1214         }
1215
1216         function braceSubstitution( $matches )
1217         {
1218                 global $wgLinkCache, $wgLang;
1219                 $fname = "Parser::braceSubstitution";
1220                 $found = false;
1221                 $nowiki = false;
1222                 $title = NULL;
1223
1224                 # $newline is an optional newline character before the braces
1225                 # $part1 is the bit before the first |, and must contain only title characters
1226                 # $args is a list of arguments, starting from index 0, not including $part1
1227
1228                 $newline = $matches[1];
1229                 $part1 = $matches[2];
1230                 # If the third subpattern matched anything, it will start with |
1231                 if ( $matches[3] !== "" ) {
1232                         $args = explode( "|", substr( $matches[3], 1 ) );
1233                 } else {
1234                         $args = array();
1235                 }
1236                 $argc = count( $args );
1237
1238                 # SUBST
1239                 $mwSubst =& MagicWord::get( MAG_SUBST );
1240                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1241                         if ( $this->mOutputType != OT_WIKI ) {
1242                                 # Invalid SUBST not replaced at PST time
1243                                 # Return without further processing
1244                                 $text = $matches[0];
1245                                 $found = true;
1246                         }
1247                 } elseif ( $this->mOutputType == OT_WIKI ) {
1248                         # SUBST not found in PST pass, do nothing
1249                         $text = $matches[0];
1250                         $found = true;
1251                 }
1252
1253                 # MSG, MSGNW and INT
1254                 if ( !$found ) {
1255                         # Check for MSGNW:
1256                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1257                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1258                                 $nowiki = true;
1259                         } else {
1260                                 # Remove obsolete MSG:
1261                                 $mwMsg =& MagicWord::get( MAG_MSG );
1262                                 $mwMsg->matchStartAndRemove( $part1 );
1263                         }
1264
1265                         # Check if it is an internal message
1266                         $mwInt =& MagicWord::get( MAG_INT );
1267                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1268                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1269                                         $text = wfMsgReal( $part1, $args, true );
1270                                         $found = true;
1271                                 }
1272                         }
1273                 }
1274
1275                 # NS
1276                 if ( !$found ) {
1277                         # Check for NS: (namespace expansion)
1278                         $mwNs = MagicWord::get( MAG_NS );
1279                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1280                                 if ( intval( $part1 ) ) {
1281                                         $text = $wgLang->getNsText( intval( $part1 ) );
1282                                         $found = true;
1283                                 } else {
1284                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1285                                         if ( !is_null( $index ) ) {
1286                                                 $text = $wgLang->getNsText( $index );
1287                                                 $found = true;
1288                                         }
1289                                 }
1290                         }
1291                 }
1292
1293                 # LOCALURL and LOCALURLE
1294                 if ( !$found ) {
1295                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1296                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1297
1298                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1299                                 $func = 'getLocalURL';
1300                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1301                                 $func = 'escapeLocalURL';
1302                         } else {
1303                                 $func = '';
1304                         }
1305
1306                         if ( $func !== '' ) {
1307                                 $title = Title::newFromText( $part1 );
1308                                 if ( !is_null( $title ) ) {
1309                                         if ( $argc > 0 ) {
1310                                                 $text = $title->$func( $args[0] );
1311                                         } else {
1312                                                 $text = $title->$func();
1313                                         }
1314                                         $found = true;
1315                                 }
1316                         }
1317                 }
1318
1319                 # Internal variables
1320                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1321                         $text = $this->mVariables[$part1];
1322                         $found = true;
1323                         $this->mOutput->mContainsOldMagic = true;
1324                 }
1325
1326                 # Arguments input from the caller
1327                 $inputArgs = end( $this->mArgStack );
1328                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1329                         $text = $inputArgs[$part1];
1330                         $found = true;
1331                 }
1332
1333                 # Load from database
1334                 if ( !$found ) {
1335                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1336                         if ( !is_null( $title ) && !$title->isExternal() ) {
1337                                 # Check for excessive inclusion
1338                                 $dbk = $title->getPrefixedDBkey();
1339                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1340                                         $article = new Article( $title );
1341                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1342                                         if ( $articleContent !== false ) {
1343                                                 $found = true;
1344                                                 $text = $articleContent;
1345
1346                                         }
1347                                 }
1348
1349                                 # If the title is valid but undisplayable, make a link to it
1350                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1351                                         $text = "[[" . $title->getPrefixedText() . "]]";
1352                                         $found = true;
1353                                 }
1354                         }
1355                 }
1356
1357                 # Recursive parsing, escaping and link table handling
1358                 # Only for HTML output
1359                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1360                         $text = wfEscapeWikiText( $text );
1361                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1362                         # Clean up argument array
1363                         $assocArgs = array();
1364                         $index = 1;
1365                         foreach( $args as $arg ) {
1366                                 $eqpos = strpos( $arg, "=" );
1367                                 if ( $eqpos === false ) {
1368                                         $assocArgs[$index++] = $arg;
1369                                 } else {
1370                                         $name = trim( substr( $arg, 0, $eqpos ) );
1371                                         $value = trim( substr( $arg, $eqpos+1 ) );
1372                                         if ( $value === false ) {
1373                                                 $value = "";
1374                                         }
1375                                         if ( $name !== false ) {
1376                                                 $assocArgs[$name] = $value;
1377                                         }
1378                                 }
1379                         }
1380
1381                         # Do not enter included links in link table
1382                         if ( !is_null( $title ) ) {
1383                                 $wgLinkCache->suspend();
1384                         }
1385
1386                         # Run full parser on the included text
1387                         $text = $this->strip( $text, $this->mStripState );
1388                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1389
1390                         # Add the result to the strip state for re-inclusion after
1391                         # the rest of the processing
1392                         $text = $this->insertStripItem( $text, $this->mStripState );
1393
1394                         # Resume the link cache and register the inclusion as a link
1395                         if ( !is_null( $title ) ) {
1396                                 $wgLinkCache->resume();
1397                                 $wgLinkCache->addLinkObj( $title );
1398                         }
1399                 }
1400
1401                 if ( !$found ) {
1402                         return $matches[0];
1403                 } else {
1404                         return $newline . $text;
1405                 }
1406         }
1407
1408         # Returns true if the function is allowed to include this entity
1409         function incrementIncludeCount( $dbk )
1410         {
1411                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1412                         $this->mIncludeCount[$dbk] = 0;
1413                 }
1414                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1415                         return true;
1416                 } else {
1417                         return false;
1418                 }
1419         }
1420
1421
1422         # Cleans up HTML, removes dangerous tags and attributes
1423         /* private */ function removeHTMLtags( $text )
1424         {
1425                 $fname = "Parser::removeHTMLtags";
1426                 wfProfileIn( $fname );
1427                 $htmlpairs = array( # Tags that must be closed
1428                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1429                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1430                         "strike", "strong", "tt", "var", "div", "center",
1431                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1432                         "ruby", "rt" , "rb" , "rp", "p"
1433                 );
1434                 $htmlsingle = array(
1435                         "br", "hr", "li", "dt", "dd"
1436                 );
1437                 $htmlnest = array( # Tags that can be nested--??
1438                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1439                         "dl", "font", "big", "small", "sub", "sup"
1440                 );
1441                 $tabletags = array( # Can only appear inside table
1442                         "td", "th", "tr"
1443                 );
1444
1445                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1446                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1447
1448                 $htmlattrs = $this->getHTMLattrs () ;
1449
1450                 # Remove HTML comments
1451                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1452
1453                 $bits = explode( "<", $text );
1454                 $text = array_shift( $bits );
1455                 $tagstack = array(); $tablestack = array();
1456
1457                 foreach ( $bits as $x ) {
1458                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1459                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1460                           $x, $regs );
1461                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1462                         error_reporting( $prev );
1463
1464                         $badtag = 0 ;
1465                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1466                                 # Check our stack
1467                                 if ( $slash ) {
1468                                         # Closing a tag...
1469                                         if ( ! in_array( $t, $htmlsingle ) &&
1470                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1471                                                 array_push( $tagstack, $ot );
1472                                                 $badtag = 1;
1473                                         } else {
1474                                                 if ( $t == "table" ) {
1475                                                         $tagstack = array_pop( $tablestack );
1476                                                 }
1477                                                 $newparams = "";
1478                                         }
1479                                 } else {
1480                                         # Keep track for later
1481                                         if ( in_array( $t, $tabletags ) &&
1482                                           ! in_array( "table", $tagstack ) ) {
1483                                                 $badtag = 1;
1484                                         } else if ( in_array( $t, $tagstack ) &&
1485                                           ! in_array ( $t , $htmlnest ) ) {
1486                                                 $badtag = 1 ;
1487                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1488                                                 if ( $t == "table" ) {
1489                                                         array_push( $tablestack, $tagstack );
1490                                                         $tagstack = array();
1491                                                 }
1492                                                 array_push( $tagstack, $t );
1493                                         }
1494                                         # Strip non-approved attributes from the tag
1495                                         $newparams = $this->fixTagAttributes($params);
1496
1497                                 }
1498                                 if ( ! $badtag ) {
1499                                         $rest = str_replace( ">", "&gt;", $rest );
1500                                         $text .= "<$slash$t $newparams$brace$rest";
1501                                         continue;
1502                                 }
1503                         }
1504                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1505                 }
1506                 # Close off any remaining tags
1507                 while ( $t = array_pop( $tagstack ) ) {
1508                         $text .= "</$t>\n";
1509                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1510                 }
1511                 wfProfileOut( $fname );
1512                 return $text;
1513         }
1514
1515 /*
1516  *
1517  * This function accomplishes several tasks:
1518  * 1) Auto-number headings if that option is enabled
1519  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1520  * 3) Add a Table of contents on the top for users who have enabled the option
1521  * 4) Auto-anchor headings
1522  *
1523  * It loops through all headlines, collects the necessary data, then splits up the
1524  * string and re-inserts the newly formatted headlines.
1525  *
1526  */
1527
1528         /* private */ function formatHeadings( $text )
1529         {
1530                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1531                 $doShowToc = $this->mOptions->getShowToc();
1532                 if( !$this->mTitle->userCanEdit() ) {
1533                         $showEditLink = 0;
1534                         $rightClickHack = 0;
1535                 } else {
1536                         $showEditLink = $this->mOptions->getEditSection();
1537                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1538                 }
1539
1540                 # Inhibit editsection links if requested in the page
1541                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1542                 if( $esw->matchAndRemove( $text ) ) {
1543                         $showEditLink = 0;
1544                 }
1545                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1546                 # do not add TOC
1547                 $mw =& MagicWord::get( MAG_NOTOC );
1548                 if( $mw->matchAndRemove( $text ) ) {
1549                         $doShowToc = 0;
1550                 }
1551
1552                 # never add the TOC to the Main Page. This is an entry page that should not
1553                 # be more than 1-2 screens large anyway
1554                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1555                         $doShowToc = 0;
1556                 }
1557
1558                 # Get all headlines for numbering them and adding funky stuff like [edit]
1559                 # links - this is for later, but we need the number of headlines right now
1560                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1561
1562                 # if there are fewer than 4 headlines in the article, do not show TOC
1563                 if( $numMatches < 4 ) {
1564                         $doShowToc = 0;
1565                 }
1566
1567                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1568                 # override above conditions and always show TOC
1569                 $mw =& MagicWord::get( MAG_FORCETOC );
1570                 if ($mw->matchAndRemove( $text ) ) {
1571                         $doShowToc = 1;
1572                 }
1573
1574
1575                 # We need this to perform operations on the HTML
1576                 $sk =& $this->mOptions->getSkin();
1577
1578                 # headline counter
1579                 $headlineCount = 0;
1580
1581                 # Ugh .. the TOC should have neat indentation levels which can be
1582                 # passed to the skin functions. These are determined here
1583                 $toclevel = 0;
1584                 $toc = "";
1585                 $full = "";
1586                 $head = array();
1587                 $sublevelCount = array();
1588                 $level = 0;
1589                 $prevlevel = 0;
1590                 foreach( $matches[3] as $headline ) {
1591                         $numbering = "";
1592                         if( $level ) {
1593                                 $prevlevel = $level;
1594                         }
1595                         $level = $matches[1][$headlineCount];
1596                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1597                                 # reset when we enter a new level
1598                                 $sublevelCount[$level] = 0;
1599                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1600                                 $toclevel += $level - $prevlevel;
1601                         }
1602                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1603                                 # reset when we step back a level
1604                                 $sublevelCount[$level+1]=0;
1605                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1606                                 $toclevel -= $prevlevel - $level;
1607                         }
1608                         # count number of headlines for each level
1609                         @$sublevelCount[$level]++;
1610                         if( $doNumberHeadings || $doShowToc ) {
1611                                 $dot = 0;
1612                                 for( $i = 1; $i <= $level; $i++ ) {
1613                                         if( !empty( $sublevelCount[$i] ) ) {
1614                                                 if( $dot ) {
1615                                                         $numbering .= ".";
1616                                                 }
1617                                                 $numbering .= $sublevelCount[$i];
1618                                                 $dot = 1;
1619                                         }
1620                                 }
1621                         }
1622
1623                         # The canonized header is a version of the header text safe to use for links
1624                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1625                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1626
1627                         # strip out HTML
1628                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1629                         $tocline = trim( $canonized_headline );
1630                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1631                         $refer[$headlineCount] = $canonized_headline;
1632
1633                         # count how many in assoc. array so we can track dupes in anchors
1634                         @$refers[$canonized_headline]++;
1635                         $refcount[$headlineCount]=$refers[$canonized_headline];
1636
1637                         # Prepend the number to the heading text
1638
1639                         if( $doNumberHeadings || $doShowToc ) {
1640                                 $tocline = $numbering . " " . $tocline;
1641
1642                                 # Don't number the heading if it is the only one (looks silly)
1643                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1644                                         # the two are different if the line contains a link
1645                                         $headline=$numbering . " " . $headline;
1646                                 }
1647                         }
1648
1649                         # Create the anchor for linking from the TOC to the section
1650                         $anchor = $canonized_headline;
1651                         if($refcount[$headlineCount] > 1 ) {
1652                                 $anchor .= "_" . $refcount[$headlineCount];
1653                         }
1654                         if( $doShowToc ) {
1655                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1656                         }
1657                         if( $showEditLink ) {
1658                                 if ( empty( $head[$headlineCount] ) ) {
1659                                         $head[$headlineCount] = "";
1660                                 }
1661                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1662                         }
1663
1664                         # Add the edit section span
1665                         if( $rightClickHack ) {
1666                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1667                         }
1668
1669                         # give headline the correct <h#> tag
1670                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1671
1672                         $headlineCount++;
1673                 }
1674
1675                 if( $doShowToc ) {
1676                         $toclines = $headlineCount;
1677                         $toc .= $sk->tocUnindent( $toclevel );
1678                         $toc = $sk->tocTable( $toc );
1679                 }
1680
1681                 # split up and insert constructed headlines
1682
1683                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1684                 $i = 0;
1685
1686                 foreach( $blocks as $block ) {
1687                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1688                             # This is the [edit] link that appears for the top block of text when
1689                                 # section editing is enabled
1690
1691                                 # Disabled because it broke block formatting
1692                                 # For example, a bullet point in the top line
1693                                 # $full .= $sk->editSectionLink(0);
1694                         }
1695                         $full .= $block;
1696                         if( $doShowToc && !$i) {
1697                         # Top anchor now in skin
1698                                 $full = $full.$toc;
1699                         }
1700
1701                         if( !empty( $head[$i] ) ) {
1702                                 $full .= $head[$i];
1703                         }
1704                         $i++;
1705                 }
1706
1707                 return $full;
1708         }
1709
1710         /* private */ function doMagicISBN( &$tokenizer )
1711         {
1712                 global $wgLang;
1713
1714                 # Check whether next token is a text token
1715                 # If yes, fetch it and convert the text into a
1716                 # Special::BookSources link
1717                 $token = $tokenizer->previewToken();
1718                 while ( $token["type"] == "" )
1719                 {
1720                         $tokenizer->nextToken();
1721                         $token = $tokenizer->previewToken();
1722                 }
1723                 if ( $token["type"] == "text" )
1724                 {
1725                         $token = $tokenizer->nextToken();
1726                         $x = $token["text"];
1727                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1728
1729                         $isbn = $blank = "" ;
1730                         while ( " " == $x{0} ) {
1731                                 $blank .= " ";
1732                                 $x = substr( $x, 1 );
1733                         }
1734                         while ( strstr( $valid, $x{0} ) != false ) {
1735                                 $isbn .= $x{0};
1736                                 $x = substr( $x, 1 );
1737                         }
1738                         $num = str_replace( "-", "", $isbn );
1739                         $num = str_replace( " ", "", $num );
1740
1741                         if ( "" == $num ) {
1742                                 $text = "ISBN $blank$x";
1743                         } else {
1744                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1745                                 $text = "<a href=\"" .
1746                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1747                                         "\" class=\"internal\">ISBN $isbn</a>";
1748                                 $text .= $x;
1749                         }
1750                 } else {
1751                         $text = "ISBN ";
1752                 }
1753                 return $text;
1754         }
1755         /* private */ function doMagicRFC( &$tokenizer )
1756         {
1757                 global $wgLang;
1758
1759                 # Check whether next token is a text token
1760                 # If yes, fetch it and convert the text into a
1761                 # link to an RFC source
1762                 $token = $tokenizer->previewToken();
1763                 while ( $token["type"] == "" )
1764                 {
1765                         $tokenizer->nextToken();
1766                         $token = $tokenizer->previewToken();
1767                 }
1768                 if ( $token["type"] == "text" )
1769                 {
1770                         $token = $tokenizer->nextToken();
1771                         $x = $token["text"];
1772                         $valid = "0123456789";
1773
1774                         $rfc = $blank = "" ;
1775                         while ( " " == $x{0} ) {
1776                                 $blank .= " ";
1777                                 $x = substr( $x, 1 );
1778                         }
1779                         while ( strstr( $valid, $x{0} ) != false ) {
1780                                 $rfc .= $x{0};
1781                                 $x = substr( $x, 1 );
1782                         }
1783
1784                         if ( "" == $rfc ) {
1785                                 $text .= "RFC $blank$x";
1786                         } else {
1787                                 $url = wfmsg( "rfcurl" );
1788                                 $url = str_replace( "$1", $rfc, $url);
1789                                 $sk =& $this->mOptions->getSkin();
1790                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1791                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1792                         }
1793                 } else {
1794                         $text = "RFC ";
1795                 }
1796                 return $text;
1797         }
1798
1799         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1800         {
1801                 $this->mOptions = $options;
1802                 $this->mTitle =& $title;
1803                 $this->mOutputType = OT_WIKI;
1804
1805                 if ( $clearState ) {
1806                         $this->clearState();
1807                 }
1808
1809                 $stripState = false;
1810                 $pairs = array(
1811                         "\r\n" => "\n",
1812                         );
1813                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1814                 // now with regexes
1815                 $pairs = array(
1816                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1817                         "/<br *?>/i" => "<br/>",
1818                 );
1819                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1820                 $text = $this->strip( $text, $stripState, false );
1821                 $text = $this->pstPass2( $text, $user );
1822                 $text = $this->unstrip( $text, $stripState );
1823                 return $text;
1824         }
1825
1826         /* private */ function pstPass2( $text, &$user )
1827         {
1828                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1829
1830                 # Variable replacement
1831                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1832                 $text = $this->replaceVariables( $text );
1833
1834                 # Signatures
1835                 #
1836                 $n = $user->getName();
1837                 $k = $user->getOption( "nickname" );
1838                 if ( "" == $k ) { $k = $n; }
1839                 if(isset($wgLocaltimezone)) {
1840                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1841                 }
1842                 /* Note: this is an ugly timezone hack for the European wikis */
1843                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1844                   " (" . date( "T" ) . ")";
1845                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1846
1847                 $text = preg_replace( "/~~~~~/", $d, $text );
1848                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1849                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1850                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1851                   Namespace::getUser() ) . ":$n|$k]]", $text );
1852
1853                 # Context links: [[|name]] and [[name (context)|]]
1854                 #
1855                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1856                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1857                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1858                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1859
1860                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1861                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1862                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1863                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1864                                                                                                                 # [[ns:page (cont)|]]
1865                 $context = "";
1866                 $t = $this->mTitle->getText();
1867                 if ( preg_match( $conpat, $t, $m ) ) {
1868                         $context = $m[2];
1869                 }
1870                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1871                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1872                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1873
1874                 if ( "" == $context ) {
1875                         $text = preg_replace( $p2, "[[\\1]]", $text );
1876                 } else {
1877                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1878                 }
1879
1880                 /*
1881                 $mw =& MagicWord::get( MAG_SUBST );
1882                 $wgCurParser = $this->fork();
1883                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1884                 $this->merge( $wgCurParser );
1885                 */
1886
1887                 # Trim trailing whitespace
1888                 # MAG_END (__END__) tag allows for trailing
1889                 # whitespace to be deliberately included
1890                 $text = rtrim( $text );
1891                 $mw =& MagicWord::get( MAG_END );
1892                 $mw->matchAndRemove( $text );
1893
1894                 return $text;
1895         }
1896
1897         # Set up some variables which are usually set up in parse()
1898         # so that an external function can call some class members with confidence
1899         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1900         {
1901                 $this->mTitle =& $title;
1902                 $this->mOptions = $options;
1903                 $this->mOutputType = $outputType;
1904                 if ( $clearState ) {
1905                         $this->clearState();
1906                 }
1907         }
1908
1909         function transformMsg( $text, $options ) {
1910                 global $wgTitle;
1911                 static $executing = false;
1912
1913                 # Guard against infinite recursion
1914                 if ( $executing ) {
1915                         return $text;
1916                 }
1917                 $executing = true;
1918
1919                 $this->mTitle = $wgTitle;
1920                 $this->mOptions = $options;
1921                 $this->mOutputType = OT_MSG;
1922                 $this->clearState();
1923                 $text = $this->replaceVariables( $text );
1924
1925                 $executing = false;
1926                 return $text;
1927         }
1928 }
1929
1930 class ParserOutput
1931 {
1932         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1933
1934         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1935                 $containsOldMagic = false )
1936         {
1937                 $this->mText = $text;
1938                 $this->mLanguageLinks = $languageLinks;
1939                 $this->mCategoryLinks = $categoryLinks;
1940                 $this->mContainsOldMagic = $containsOldMagic;
1941         }
1942
1943         function getText() { return $this->mText; }
1944         function getLanguageLinks() { return $this->mLanguageLinks; }
1945         function getCategoryLinks() { return $this->mCategoryLinks; }
1946         function containsOldMagic() { return $this->mContainsOldMagic; }
1947         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1948         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1949         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1950         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1951
1952         function merge( $other ) {
1953                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1954                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1955                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1956         }
1957
1958 }
1959
1960 class ParserOptions
1961 {
1962         # All variables are private
1963         var $mUseTeX;                    # Use texvc to expand <math> tags
1964         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1965         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1966         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1967         var $mAllowExternalImages;       # Allow external images inline
1968         var $mSkin;                      # Reference to the preferred skin
1969         var $mDateFormat;                # Date format index
1970         var $mEditSection;               # Create "edit section" links
1971         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1972         var $mNumberHeadings;            # Automatically number headings
1973         var $mShowToc;                   # Show table of contents
1974
1975         function getUseTeX() { return $this->mUseTeX; }
1976         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1977         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1978         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1979         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1980         function getSkin() { return $this->mSkin; }
1981         function getDateFormat() { return $this->mDateFormat; }
1982         function getEditSection() { return $this->mEditSection; }
1983         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1984         function getNumberHeadings() { return $this->mNumberHeadings; }
1985         function getShowToc() { return $this->mShowToc; }
1986
1987         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1988         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1989         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1990         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1991         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1992         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1993         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1994         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1995         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1996         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1997         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1998
1999         /* static */ function newFromUser( &$user )
2000         {
2001                 $popts = new ParserOptions;
2002                 $popts->initialiseFromUser( $user );
2003                 return $popts;
2004         }
2005
2006         function initialiseFromUser( &$userInput )
2007         {
2008                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2009
2010                 if ( !$userInput ) {
2011                         $user = new User;
2012                         $user->setLoaded( true );
2013                 } else {
2014                         $user =& $userInput;
2015                 }
2016
2017                 $this->mUseTeX = $wgUseTeX;
2018                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2019                 $this->mUseDynamicDates = $wgUseDynamicDates;
2020                 $this->mInterwikiMagic = $wgInterwikiMagic;
2021                 $this->mAllowExternalImages = $wgAllowExternalImages;
2022                 $this->mSkin =& $user->getSkin();
2023                 $this->mDateFormat = $user->getOption( "date" );
2024                 $this->mEditSection = $user->getOption( "editsection" );
2025                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2026                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2027                 $this->mShowToc = $user->getOption( "showtoc" );
2028         }
2029
2030
2031 }
2032
2033 # Regex callbacks, used in Parser::replaceVariables
2034 function wfBraceSubstitution( $matches )
2035 {
2036         global $wgCurParser;
2037         return $wgCurParser->braceSubstitution( $matches );
2038 }
2039
2040 ?>