includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         # This method generates the list of subcategories and pages for a category
 260         function categoryMagic ()
 261         {
 262                 global $wgLang , $wgUser ;
 263                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 264
 265                 $cns = Namespace::getCategory() ;
 266                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 267
 268                 $r = "<br style=\"clear:both;\"/>\n";
 269
 270
 271                 $sk =& $wgUser->getSkin() ;
 272
 273                 $articles = array() ;
 274                 $children = array() ;
 275                 $data = array () ;
 276                 $id = $this->mTitle->getArticleID() ;
 277
 278                 # For existing categories
 279                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 280                 $res = wfQuery ( $sql, DB_READ ) ;
 281                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 282
 283                 # For non-existing categories
 284                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 285                 $res = wfQuery ( $sql, DB_READ ) ;
 286                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 287
 288                 # For all pages that link to this category
 289                 foreach ( $data AS $x )
 290                 {
 291                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 292                         if ( $t != "" ) $t .= ":" ;
 293                         $t .= $x->cur_title ;
 294
 295                         if ( $x->cur_namespace == $cns ) {
 296                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 297                         } else {
 298                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 299                         }
 300                 }
 301                 wfFreeResult ( $res ) ;
 302
 303                 # Showing subcategories
 304                 if ( count ( $children ) > 0 )
 305                 {
 306                         asort ( $children ) ;
 307                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 308                         $r .= implode ( ", " , $children ) ;
 309                 }
 310
 311                 # Showing pages in this category
 312                 if ( count ( $articles ) > 0 )
 313                 {
 314                         $ti = $this->mTitle->getText() ;
 315                         asort ( $articles ) ;
 316                         $h =  wfMsg( "category_header", $ti );
 317                         $r .= "<h2>{$h}</h2>\n" ;
 318                         $r .= implode ( ", " , $articles ) ;
 319                 }
 320
 321
 322                 return $r ;
 323         }
 324
 325         function getHTMLattrs ()
 326         {
 327                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 328                                 "title", "align", "lang", "dir", "width", "height",
 329                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 330                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 331                                 /* FONT */ "type", "start", "value", "compact",
 332                                 /* For various lists, mostly deprecated but safe */
 333                                 "summary", "width", "border", "frame", "rules",
 334                                 "cellspacing", "cellpadding", "valign", "char",
 335                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 336                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 337                                 "id", "class", "name", "style" /* For CSS */
 338                                 );
 339                 return $htmlattrs ;
 340         }
 341
 342         function fixTagAttributes ( $t )
 343         {
 344                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 345                 $htmlattrs = $this->getHTMLattrs() ;
 346
 347                 # Strip non-approved attributes from the tag
 348                 $t = preg_replace(
 349                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 350                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 351                         $t);
 352                 # Strip javascript "expression" from stylesheets. Brute force approach:
 353                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 354
 355                 if( preg_match(
 356                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 357                         wfMungeToUtf8( $t ) ) )
 358                 {
 359                         $t="";
 360                 }
 361
 362                 return trim ( $t ) ;
 363         }
 364
 365         function doTableStuff ( $t )
 366         {
 367                 $t = explode ( "\n" , $t ) ;
 368                 $td = array () ; # Is currently a td tag open?
 369                         $ltd = array () ; # Was it TD or TH?
 370                         $tr = array () ; # Is currently a tr tag open?
 371                         $ltr = array () ; # tr attributes
 372                         foreach ( $t AS $k => $x )
 373                         {
 374                                 $x = trim ( $x ) ;
 375                                 $fc = substr ( $x , 0 , 1 ) ;
 376                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 377                                 {
 378                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 379                                         array_push ( $td , false ) ;
 380                                         array_push ( $ltd , "" ) ;
 381                                         array_push ( $tr , false ) ;
 382                                         array_push ( $ltr , "" ) ;
 383                                 }
 384                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 385                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 386                                 {
 387                                         $z = "</table>\n" ;
 388                                         $l = array_pop ( $ltd ) ;
 389                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 390                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 391                                         array_pop ( $ltr ) ;
 392                                         $t[$k] = $z ;
 393                                 }
 394                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 395                                                 {
 396                                                 $z = trim ( substr ( $x , 2 ) ) ;
 397                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 398                                                 }*/
 399                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 400                                 {
 401                                         $x = substr ( $x , 1 ) ;
 402                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 403                                         $z = "" ;
 404                                         $l = array_pop ( $ltd ) ;
 405                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 406                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 407                                         array_pop ( $ltr ) ;
 408                                         $t[$k] = $z ;
 409                                         array_push ( $tr , false ) ;
 410                                         array_push ( $td , false ) ;
 411                                         array_push ( $ltd , "" ) ;
 412                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 413                                 }
 414                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 415                                 {
 416                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 417                                         {
 418                                                 $fc = "+" ;
 419                                                 $x = substr ( $x , 1 ) ;
 420                                         }
 421                                         $after = substr ( $x , 1 ) ;
 422                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 423                                         $after = explode ( "||" , $after ) ;
 424                                         $t[$k] = "" ;
 425                                         foreach ( $after AS $theline )
 426                                         {
 427                                                 $z = "" ;
 428                                                 if ( $fc != "+" )
 429                                                 {
 430                                                         $tra = array_pop ( $ltr ) ;
 431                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 432                                                         array_push ( $tr , true ) ;
 433                                                         array_push ( $ltr , "" ) ;
 434                                                 }
 435
 436                                                 $l = array_pop ( $ltd ) ;
 437                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 438                                                 if ( $fc == "|" ) $l = "td" ;
 439                                                 else if ( $fc == "!" ) $l = "th" ;
 440                                                 else if ( $fc == "+" ) $l = "caption" ;
 441                                                 else $l = "" ;
 442                                                 array_push ( $ltd , $l ) ;
 443                                                 $y = explode ( "|" , $theline , 2 ) ;
 444                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 445                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 446                                                 $t[$k] .= $y ;
 447                                                 array_push ( $td , true ) ;
 448                                         }
 449                                 }
 450                         }
 451
 452                 # Closing open td, tr && table
 453                 while ( count ( $td ) > 0 )
 454                 {
 455                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 456                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 457                         $t[] = "</table>" ;
 458                 }
 459
 460                 $t = implode ( "\n" , $t ) ;
 461                 #               $t = $this->removeHTMLtags( $t );
 462                 return $t ;
 463         }
 464
 465         function internalParse( $text, $linestart, $args = array() )
 466         {
 467                 $fname = "Parser::internalParse";
 468                 wfProfileIn( $fname );
 469
 470                 $text = $this->removeHTMLtags( $text );
 471                 $text = $this->replaceVariables( $text, $args );
 472
 473                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 474
 475                 $text = $this->doHeadings( $text );
 476                 if($this->mOptions->getUseDynamicDates()) {
 477                         global $wgDateFormatter;
 478                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 479                 }
 480                 $text = $this->replaceExternalLinks( $text );
 481                 $text = $this->doTokenizedParser ( $text );
 482                 $text = $this->doTableStuff ( $text ) ;
 483                 $text = $this->formatHeadings( $text );
 484                 $sk =& $this->mOptions->getSkin();
 485                 $text = $sk->transformContent( $text );
 486
 487                 if ( !isset ( $this->categoryMagicDone ) ) {
 488                    $text .= $this->categoryMagic () ;
 489                    $this->categoryMagicDone = true ;
 490                    }
 491
 492                 wfProfileOut( $fname );
 493                 return $text;
 494         }
 495
 496
 497         /* private */ function doHeadings( $text )
 498         {
 499                 for ( $i = 6; $i >= 1; --$i ) {
 500                         $h = substr( "======", 0, $i );
 501                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 502                           "<h{$i}>\\1</h{$i}>\\2", $text );
 503                 }
 504                 return $text;
 505         }
 506
 507         # Note: we have to do external links before the internal ones,
 508         # and otherwise take great care in the order of things here, so
 509         # that we don't end up interpreting some URLs twice.
 510
 511         /* private */ function replaceExternalLinks( $text )
 512         {
 513                 $fname = "Parser::replaceExternalLinks";
 514                 wfProfileIn( $fname );
 515                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 516                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 517                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 518                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 519                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 520                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 521                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 522                 wfProfileOut( $fname );
 523                 return $text;
 524         }
 525
 526         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 527         {
 528                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 529                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 530
 531                 # this is  the list of separators that should be ignored if they
 532                 # are the last character of an URL but that should be included
 533                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 534                 # in this case, the last comma should not become part of the URL,
 535                 # but in "www.foo.com/123,2342,32.htm" it should.
 536                 $sep = ",;\.:";
 537                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 538                 $images = "gif|png|jpg|jpeg";
 539
 540                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 541                 # they are interpreted as part of the string (used to tell PHP
 542                 # that the content of the string should be inserted there).
 543                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 544                   "((?i){$images})([^{$uc}]|$)/";
 545
 546                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 547                 $sk =& $this->mOptions->getSkin();
 548
 549                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 550                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 551                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 552                 }
 553                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 554                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 555                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 556                   "</a>\\5", $s );
 557                 $s = str_replace( $unique, $protocol, $s );
 558
 559                 $a = explode( "[{$protocol}:", " " . $s );
 560                 $s = array_shift( $a );
 561                 $s = substr( $s, 1 );
 562
 563                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 564                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 565
 566                 foreach ( $a as $line ) {
 567                         if ( preg_match( $e1, $line, $m ) ) {
 568                                 $link = "{$protocol}:{$m[1]}";
 569                                 $trail = $m[2];
 570                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 571                                 else { $text = wfEscapeHTML( $link ); }
 572                         } else if ( preg_match( $e2, $line, $m ) ) {
 573                                 $link = "{$protocol}:{$m[1]}";
 574                                 $text = $m[2];
 575                                 $trail = $m[3];
 576                         } else {
 577                                 $s .= "[{$protocol}:" . $line;
 578                                 continue;
 579                         }
 580                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 581                                 $paren = "";
 582                         } else {
 583                                 # Expand the URL for printable version
 584                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 585                         }
 586                         $la = $sk->getExternalLinkAttributes( $link, $text );
 587                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 588
 589                 }
 590                 return $s;
 591         }
 592
 593         /* private */ function handle3Quotes( &$state, $token )
 594         {
 595                 if ( $state["strong"] !== false ) {
 596                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 597                         {
 598                                 # ''' lala ''lala '''
 599                                 $s = "</em></strong><em>";
 600                         } else {
 601                                 $s = "</strong>";
 602                         }
 603                         $state["strong"] = FALSE;
 604                 } else {
 605                         $s = "<strong>";
 606                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 607                 }
 608                 return $s;
 609         }
 610
 611         /* private */ function handle2Quotes( &$state, $token )
 612         {
 613                 if ( $state["em"] !== false ) {
 614                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 615                         {
 616                                 # ''lala'''lala'' ....'''
 617                                 $s = "</strong></em><strong>";
 618                         } else {
 619                                 $s = "</em>";
 620                         }
 621                         $state["em"] = FALSE;
 622                 } else {
 623                         $s = "<em>";
 624                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 625
 626                 }
 627                 return $s;
 628         }
 629
 630         /* private */ function handle5Quotes( &$state, $token )
 631         {
 632                 $s = "";
 633                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 634                         if ( $state["em"] < $state["strong"] ) {
 635                                 $s .= "</strong></em>";
 636                         } else {
 637                                 $s .= "</em></strong>";
 638                         }
 639                         $state["strong"] = $state["em"] = FALSE;
 640                 } elseif ( $state["em"] !== false ) {
 641                         $s .= "</em><strong>";
 642                         $state["em"] = FALSE;
 643                         $state["strong"] = $token["pos"];
 644                 } elseif ( $state["strong"] !== false ) {
 645                         $s .= "</strong><em>";
 646                         $state["strong"] = FALSE;
 647                         $state["em"] = $token["pos"];
 648                 } else { # not $em and not $strong
 649                         $s .= "<strong><em>";
 650                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 651                 }
 652                 return $s;
 653         }
 654
 655         /* private */ function doTokenizedParser( $str )
 656         {
 657                 global $wgLang; # for language specific parser hook
 658
 659                 $tokenizer=Tokenizer::newFromString( $str );
 660                 $tokenStack = array();
 661
 662                 $s="";
 663                 $state["em"]      = FALSE;
 664                 $state["strong"]  = FALSE;
 665                 $tagIsOpen = FALSE;
 666                 $threeopen = false;
 667
 668                 # The tokenizer splits the text into tokens and returns them one by one.
 669                 # Every call to the tokenizer returns a new token.
 670                 while ( $token = $tokenizer->nextToken() )
 671                 {
 672                         switch ( $token["type"] )
 673                         {
 674                                 case "text":
 675                                         # simple text with no further markup
 676                                         $txt = $token["text"];
 677                                         break;
 678                                 case "[[[":
 679                                         # remember the tag opened with 3 [
 680                                         $threeopen = true;
 681                                 case "[[":
 682                                         # link opening tag.
 683                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 684                                         $tagIsOpen = TRUE;
 685                                         array_push( $tokenStack, $token );
 686                                         $txt="";
 687                                         break;
 688
 689                                 case "]]]":
 690                                 case "]]":
 691                                         # link close tag.
 692                                         # get text from stack, glue it together, and call the code to handle a
 693                                         # link
 694
 695                                         if ( count( $tokenStack ) == 0 )
 696                                         {
 697                                                 # stack empty. Found a ]] without an opening [[
 698                                                 $txt = "]]";
 699                                         } else {
 700                                                 $linkText = "";
 701                                                 $lastToken = array_pop( $tokenStack );
 702                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 703                                                 {
 704                                                         if( !empty( $lastToken["text"] ) ) {
 705                                                                 $linkText = $lastToken["text"] . $linkText;
 706                                                         }
 707                                                         $lastToken = array_pop( $tokenStack );
 708                                                 }
 709
 710                                                 $txt = $linkText ."]]";
 711
 712                                                 if( isset( $lastToken["text"] ) ) {
 713                                                         $prefix = $lastToken["text"];
 714                                                 } else {
 715                                                         $prefix = "";
 716                                                 }
 717                                                 $nextToken = $tokenizer->previewToken();
 718                                                 if ( $nextToken["type"] == "text" )
 719                                                 {
 720                                                         # Preview just looks at it. Now we have to fetch it.
 721                                                         $nextToken = $tokenizer->nextToken();
 722                                                         $txt .= $nextToken["text"];
 723                                                 }
 724                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 725
 726                                                 # did the tag start with 3 [ ?
 727                                                 if($threeopen) {
 728                                                         # show the first as text
 729                                                         $txt = "[".$txt;
 730                                                         $threeopen=false;
 731                                                 }
 732
 733                                         }
 734                                         $tagIsOpen = (count( $tokenStack ) != 0);
 735                                         break;
 736                                 case "----":
 737                                         $txt = "\n<hr />\n";
 738                                         break;
 739                                 case "'''":
 740                                         # This and the three next ones handle quotes
 741                                         $txt = $this->handle3Quotes( $state, $token );
 742                                         break;
 743                                 case "''":
 744                                         $txt = $this->handle2Quotes( $state, $token );
 745                                         break;
 746                                 case "'''''":
 747                                         $txt = $this->handle5Quotes( $state, $token );
 748                                         break;
 749                                 case "":
 750                                         # empty token
 751                                         $txt="";
 752                                         break;
 753                                 case "RFC ":
 754                                         if ( $tagIsOpen ) {
 755                                                 $txt = "RFC ";
 756                                         } else {
 757                                                 $txt = $this->doMagicRFC( $tokenizer );
 758                                         }
 759                                         break;
 760                                 case "ISBN ":
 761                                         if ( $tagIsOpen ) {
 762                                                 $txt = "ISBN ";
 763                                         } else {
 764                                                 $txt = $this->doMagicISBN( $tokenizer );
 765                                         }
 766                                         break;
 767                                 default:
 768                                         # Call language specific Hook.
 769                                         $txt = $wgLang->processToken( $token, $tokenStack );
 770                                         if ( NULL == $txt ) {
 771                                                 # An unkown token. Highlight.
 772                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 773                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 774                                         }
 775                                         break;
 776                         }
 777                         # If we're parsing the interior of a link, don't append the interior to $s,
 778                         # but push it to the stack so it can be processed when a ]] token is found.
 779                         if ( $tagIsOpen  && $txt != "" ) {
 780                                 $token["type"] = "text";
 781                                 $token["text"] = $txt;
 782                                 array_push( $tokenStack, $token );
 783                         } else {
 784                                 $s .= $txt;
 785                         }
 786                 } #end while
 787                 if ( count( $tokenStack ) != 0 )
 788                 {
 789                         # still objects on stack. opened [[ tag without closing ]] tag.
 790                         $txt = "";
 791                         while ( $lastToken = array_pop( $tokenStack ) )
 792                         {
 793                                 if ( $lastToken["type"] == "text" )
 794                                 {
 795                                         $txt = $lastToken["text"] . $txt;
 796                                 } else {
 797                                         $txt = $lastToken["type"] . $txt;
 798                                 }
 799                         }
 800                         $s .= $txt;
 801                 }
 802                 return $s;
 803         }
 804
 805         /* private */ function handleInternalLink( $line, $prefix )
 806         {
 807                 global $wgLang, $wgLinkCache;
 808                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 809                 static $fname = "Parser::handleInternalLink" ;
 810                 wfProfileIn( $fname );
 811
 812                 wfProfileIn( "$fname-setup" );
 813                 static $tc = FALSE;
 814                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 815                 $sk =& $this->mOptions->getSkin();
 816
 817                 # Match a link having the form [[namespace:link|alternate]]trail
 818                 static $e1 = FALSE;
 819                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 820                 # Match the end of a line for a word that's not followed by whitespace,
 821                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 822                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 823                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 824                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 825
 826
 827                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 828                 static $image = FALSE;
 829                 static $special = FALSE;
 830                 static $media = FALSE;
 831                 static $category = FALSE;
 832                 if ( !$image ) { $image = Namespace::getImage(); }
 833                 if ( !$special ) { $special = Namespace::getSpecial(); }
 834                 if ( !$media ) { $media = Namespace::getMedia(); }
 835                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 836
 837                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 838
 839                 wfProfileOut( "$fname-setup" );
 840                 $s = "";
 841
 842                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 843                         $text = $m[2];
 844                         $trail = $m[3];
 845                 } else { # Invalid form; output directly
 846                         $s .= $prefix . "[[" . $line ;
 847                         return $s;
 848                 }
 849
 850                 /* Valid link forms:
 851                 Foobar -- normal
 852                 :Foobar -- override special treatment of prefix (images, language links)
 853                 /Foobar -- convert to CurrentPage/Foobar
 854                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 855                 */
 856                 $c = substr($m[1],0,1);
 857                 $noforce = ($c != ":");
 858                 if( $c == "/" ) { # subpage
 859                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 860                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 861                                 $noslash=$m[1];
 862                         } else {
 863                                 $noslash=substr($m[1],1);
 864                         }
 865                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 866                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 867                                 if( "" == $text ) {
 868                                         $text= $m[1];
 869                                 } # this might be changed for ugliness reasons
 870                         } else {
 871                                 $link = $noslash; # no subpage allowed, use standard link
 872                         }
 873                 } elseif( $noforce ) { # no subpage
 874                         $link = $m[1];
 875                 } else {
 876                         $link = substr( $m[1], 1 );
 877                 }
 878                 if( "" == $text )
 879                         $text = $link;
 880
 881                 $nt = Title::newFromText( $link );
 882                 if( !$nt ) {
 883                         $s .= $prefix . "[[" . $line;
 884                         return $s;
 885                 }
 886                 $ns = $nt->getNamespace();
 887                 $iw = $nt->getInterWiki();
 888                 if( $noforce ) {
 889                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 890                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 891                                 return (trim($s) == '')? '': $s;
 892                         }
 893                         if( $ns == $image ) {
 894                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 895                                 $wgLinkCache->addImageLinkObj( $nt );
 896                                 return $s;
 897                         }
 898                         if ( $ns == $category ) {
 899                                 $t = $nt->getText() ;
 900                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 901                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 902                                 $this->mOutput->mCategoryLinks[] = $t ;
 903                                 $s .= $prefix . $trail ;
 904                                 return $s ;
 905                         }
 906                 }
 907                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 908                     ( strpos( $link, "#" ) == FALSE ) ) {
 909                         # Self-links are handled specially; generally de-link and change to bold.
 910                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 911                         return $s;
 912                 }
 913
 914                 if( $ns == $media ) {
 915                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 916                         $wgLinkCache->addImageLinkObj( $nt );
 917                         return $s;
 918                 } elseif( $ns == $special ) {
 919                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 920                         return $s;
 921                 }
 922                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 923
 924                 wfProfileOut( $fname );
 925                 return $s;
 926         }
 927
 928         # Some functions here used by doBlockLevels()
 929         #
 930         /* private */ function closeParagraph()
 931         {
 932                 $result = "";
 933                 if ( '' != $this->mLastSection ) {
 934                         $result = "</" . $this->mLastSection  . ">\n";
 935                 }
 936                 $this->mInPre = false;
 937                 $this->mLastSection = "";
 938                 return $result;
 939         }
 940         # getCommon() returns the length of the longest common substring
 941         # of both arguments, starting at the beginning of both.
 942         #
 943         /* private */ function getCommon( $st1, $st2 )
 944         {
 945                 $fl = strlen( $st1 );
 946                 $shorter = strlen( $st2 );
 947                 if ( $fl < $shorter ) { $shorter = $fl; }
 948
 949                 for ( $i = 0; $i < $shorter; ++$i ) {
 950                         if ( $st1{$i} != $st2{$i} ) { break; }
 951                 }
 952                 return $i;
 953         }
 954         # These next three functions open, continue, and close the list
 955         # element appropriate to the prefix character passed into them.
 956         #
 957         /* private */ function openList( $char )
 958     {
 959                 $result = $this->closeParagraph();
 960
 961                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 962                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 963                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 964                 else if ( ";" == $char ) {
 965                         $result .= "<dl><dt>";
 966                         $this->mDTopen = true;
 967                 }
 968                 else { $result = "<!-- ERR 1 -->"; }
 969
 970                 return $result;
 971         }
 972
 973         /* private */ function nextItem( $char )
 974         {
 975                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 976                 else if ( ":" == $char || ";" == $char ) {
 977                         $close = "</dd>";
 978                         if ( $this->mDTopen ) { $close = "</dt>"; }
 979                         if ( ";" == $char ) {
 980                                 $this->mDTopen = true;
 981                                 return $close . "<dt>";
 982                         } else {
 983                                 $this->mDTopen = false;
 984                                 return $close . "<dd>";
 985                         }
 986                 }
 987                 return "<!-- ERR 2 -->";
 988         }
 989
 990         /* private */function closeList( $char )
 991         {
 992                 if ( "*" == $char ) { $text = "</li></ul>"; }
 993                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 994                 else if ( ":" == $char ) {
 995                         if ( $this->mDTopen ) {
 996                                 $this->mDTopen = false;
 997                                 $text = "</dt></dl>";
 998                         } else {
 999                                 $text = "</dd></dl>";
1000                         }
1001                 }
1002                 else {  return "<!-- ERR 3 -->"; }
1003                 return $text."\n";
1004         }
1005
1006         /* private */ function doBlockLevels( $text, $linestart )
1007         {
1008                 $fname = "Parser::doBlockLevels";
1009                 wfProfileIn( $fname );
1010                 # Parsing through the text line by line.  The main thing
1011                 # happening here is handling of block-level elements p, pre,
1012                 # and making lists from lines starting with * # : etc.
1013                 #
1014                 $a = explode( "\n", $text );
1015
1016                 $lastPref = $text = $lastLine = '';
1017                 $this->mDTopen = $inBlockElem = false;
1018                 $npl = 0;
1019                 $pstack = false;
1020
1021                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1022                 foreach ( $a as $t ) {
1023                         $oLine = $t;
1024                         $opl = strlen( $lastPref );
1025                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1026                         $preOpenMatch = preg_match("/<pre/i", $t );
1027                         if (!$this->mInPre) {
1028                                 $this->mInPre = !empty($preOpenMatch);
1029                         }
1030                         if ( !$this->mInPre ) {
1031                                 $npl = strspn( $t, "*#:;" );
1032                                 $pref = substr( $t, 0, $npl );
1033                                 $pref2 = str_replace( ";", ":", $pref );
1034                                 $t = substr( $t, $npl );
1035                         } else {
1036                                 $npl = 0;
1037                                 $pref = $pref2 = '';
1038                         }
1039
1040                         // list generation
1041                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1042                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1043                                 if ( $pstack ) { $pstack = false; }
1044
1045                                 if ( ";" == substr( $pref, -1 ) ) {
1046                                         $cpos = strpos( $t, ":" );
1047                                         if ( false !== $cpos ) {
1048                                                 $term = substr( $t, 0, $cpos );
1049                                                 $text .= $term . $this->nextItem( ":" );
1050                                                 $t = substr( $t, $cpos + 1 );
1051                                         }
1052                                 }
1053                         } else if (0 != $npl || 0 != $opl) {
1054                                 $cpl = $this->getCommon( $pref, $lastPref );
1055                                 if ( $pstack ) { $pstack = false; }
1056
1057                                 while ( $cpl < $opl ) {
1058                                         $text .= $this->closeList( $lastPref{$opl-1} );
1059                                         --$opl;
1060                                 }
1061                                 if ( $npl <= $cpl && $cpl > 0 ) {
1062                                         $text .= $this->nextItem( $pref{$cpl-1} );
1063                                 }
1064                                 while ( $npl > $cpl ) {
1065                                         $char = substr( $pref, $cpl, 1 );
1066                                         $text .= $this->openList( $char );
1067
1068                                         if ( ";" == $char ) {
1069                                                 $cpos = strpos( $t, ":" );
1070                                                 if ( ! ( false === $cpos ) ) {
1071                                                         $term = substr( $t, 0, $cpos );
1072                                                         $text .= $term . $this->nextItem( ":" );
1073                                                         $t = substr( $t, $cpos + 1 );
1074                                                 }
1075                                         }
1076                                         ++$cpl;
1077                                 }
1078                                 $lastPref = $pref2;
1079                         }
1080                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1081                                 $uniq_prefix = UNIQ_PREFIX;
1082                                 // XXX: use a stack for nestable elements like span, table and div
1083                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1084                                 $closematch = preg_match(
1085                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1086                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1087                                 if ( $openmatch or $closematch ) {
1088                                         if ( $pstack ) { $pstack = false; }
1089                                         $text .= $this->closeParagraph();
1090                                         if($preOpenMatch and !$preCloseMatch) {
1091                                                 $this->mInPre = true;
1092                                         }
1093                                         if ( $closematch  ) {
1094                                                 $inBlockElem = false;
1095                                         } else {
1096                                                 $inBlockElem = true;
1097                                         }
1098                                 } else if ( !$inBlockElem ) {
1099                                         if ( " " == $t{0} ) {
1100                                                 // pre
1101                                                 if ($this->mLastSection != 'pre') {
1102                                                         $pstack = false;
1103                                                         $text .= $this->closeParagraph().'<pre>';
1104                                                         $this->mLastSection = 'pre';
1105                                                 }
1106                                         } else {
1107                                                 // paragraph
1108                                                 if ( '' == trim($t) ) {
1109                                                         if ( $pstack ) {
1110                                                                 $text .= $pstack.'<br/>';
1111                                                                 $pstack = false;
1112                                                                 $this->mLastSection = 'p';
1113                                                         } else {
1114                                                                 if ($this->mLastSection != 'p' ) {
1115                                                                         $text .= $this->closeParagraph();
1116                                                                         $this->mLastSection = '';
1117                                                                         $pstack = "<p>";
1118                                                                 } else {
1119                                                                         $pstack = '</p><p>';
1120                                                                 }
1121                                                         }
1122                                                 } else {
1123                                                         if ( $pstack ) {
1124                                                                 $text .= $pstack;
1125                                                                 $pstack = false;
1126                                                                 $this->mLastSection = 'p';
1127                                                         } else if ($this->mLastSection != 'p') {
1128                                                                 $text .= $this->closeParagraph().'<p>';
1129                                                                 $this->mLastSection = 'p';
1130                                                         }
1131                                                 }
1132                                         }
1133                                 }
1134                         }
1135                         if ($pstack === false) {
1136                                 $text .= $t."\n";
1137                         }
1138                 }
1139                 while ( $npl ) {
1140                         $text .= $this->closeList( $pref2{$npl-1} );
1141                         --$npl;
1142                 }
1143                 if ( "" != $this->mLastSection ) {
1144                         $text .= "</" . $this->mLastSection . ">";
1145                         $this->mLastSection = "";
1146                 }
1147
1148                 wfProfileOut( $fname );
1149                 return $text;
1150         }
1151
1152         function getVariableValue( $index ) {
1153                 global $wgLang, $wgSitename, $wgServer;
1154
1155                 switch ( $index ) {
1156                         case MAG_CURRENTMONTH:
1157                                 return date( "m" );
1158                         case MAG_CURRENTMONTHNAME:
1159                                 return $wgLang->getMonthName( date("n") );
1160                         case MAG_CURRENTMONTHNAMEGEN:
1161                                 return $wgLang->getMonthNameGen( date("n") );
1162                         case MAG_CURRENTDAY:
1163                                 return date("j");
1164                         case MAG_CURRENTDAYNAME:
1165                                 return $wgLang->getWeekdayName( date("w")+1 );
1166                         case MAG_CURRENTYEAR:
1167                                 return date( "Y" );
1168                         case MAG_CURRENTTIME:
1169                                 return $wgLang->time( wfTimestampNow(), false );
1170                         case MAG_NUMBEROFARTICLES:
1171                                 return wfNumberOfArticles();
1172                         case MAG_SITENAME:
1173                                 return $wgSitename;
1174                         case MAG_SERVER:
1175                                 return $wgServer;
1176                         default:
1177                                 return NULL;
1178                 }
1179         }
1180
1181         function initialiseVariables()
1182         {
1183                 global $wgVariableIDs;
1184                 $this->mVariables = array();
1185                 foreach ( $wgVariableIDs as $id ) {
1186                         $mw =& MagicWord::get( $id );
1187                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1188                 }
1189         }
1190
1191         /* private */ function replaceVariables( $text, $args = array() )
1192         {
1193                 global $wgLang, $wgScript, $wgArticlePath;
1194
1195                 $fname = "Parser::replaceVariables";
1196                 wfProfileIn( $fname );
1197
1198                 $bail = false;
1199                 if ( !$this->mVariables ) {
1200                         $this->initialiseVariables();
1201                 }
1202                 $titleChars = Title::legalChars();
1203                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1204
1205                 # This function is called recursively. To keep track of arguments we need a stack:
1206                 array_push( $this->mArgStack, $args );
1207
1208                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1209                 $GLOBALS['wgCurParser'] =& $this;
1210                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1211
1212                 array_pop( $this->mArgStack );
1213
1214                 return $text;
1215         }
1216
1217         function braceSubstitution( $matches )
1218         {
1219                 global $wgLinkCache, $wgLang;
1220                 $fname = "Parser::braceSubstitution";
1221                 $found = false;
1222                 $nowiki = false;
1223                 $title = NULL;
1224
1225                 # $newline is an optional newline character before the braces
1226                 # $part1 is the bit before the first |, and must contain only title characters
1227                 # $args is a list of arguments, starting from index 0, not including $part1
1228
1229                 $newline = $matches[1];
1230                 $part1 = $matches[2];
1231                 # If the third subpattern matched anything, it will start with |
1232                 if ( $matches[3] !== "" ) {
1233                         $args = explode( "|", substr( $matches[3], 1 ) );
1234                 } else {
1235                         $args = array();
1236                 }
1237                 $argc = count( $args );
1238
1239                 # SUBST
1240                 $mwSubst =& MagicWord::get( MAG_SUBST );
1241                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1242                         if ( $this->mOutputType != OT_WIKI ) {
1243                                 # Invalid SUBST not replaced at PST time
1244                                 # Return without further processing
1245                                 $text = $matches[0];
1246                                 $found = true;
1247                         }
1248                 } elseif ( $this->mOutputType == OT_WIKI ) {
1249                         # SUBST not found in PST pass, do nothing
1250                         $text = $matches[0];
1251                         $found = true;
1252                 }
1253
1254                 # MSG, MSGNW and INT
1255                 if ( !$found ) {
1256                         # Check for MSGNW:
1257                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1258                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1259                                 $nowiki = true;
1260                         } else {
1261                                 # Remove obsolete MSG:
1262                                 $mwMsg =& MagicWord::get( MAG_MSG );
1263                                 $mwMsg->matchStartAndRemove( $part1 );
1264                         }
1265
1266                         # Check if it is an internal message
1267                         $mwInt =& MagicWord::get( MAG_INT );
1268                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1269                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1270                                         $text = wfMsgReal( $part1, $args, true );
1271                                         $found = true;
1272                                 }
1273                         }
1274                 }
1275
1276                 # NS
1277                 if ( !$found ) {
1278                         # Check for NS: (namespace expansion)
1279                         $mwNs = MagicWord::get( MAG_NS );
1280                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1281                                 if ( intval( $part1 ) ) {
1282                                         $text = $wgLang->getNsText( intval( $part1 ) );
1283                                         $found = true;
1284                                 } else {
1285                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1286                                         if ( !is_null( $index ) ) {
1287                                                 $text = $wgLang->getNsText( $index );
1288                                                 $found = true;
1289                                         }
1290                                 }
1291                         }
1292                 }
1293
1294                 # LOCALURL and LOCALURLE
1295                 if ( !$found ) {
1296                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1297                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1298
1299                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1300                                 $func = 'getLocalURL';
1301                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1302                                 $func = 'escapeLocalURL';
1303                         } else {
1304                                 $func = '';
1305                         }
1306
1307                         if ( $func !== '' ) {
1308                                 $title = Title::newFromText( $part1 );
1309                                 if ( !is_null( $title ) ) {
1310                                         if ( $argc > 0 ) {
1311                                                 $text = $title->$func( $args[0] );
1312                                         } else {
1313                                                 $text = $title->$func();
1314                                         }
1315                                         $found = true;
1316                                 }
1317                         }
1318                 }
1319
1320                 # Internal variables
1321                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1322                         $text = $this->mVariables[$part1];
1323                         $found = true;
1324                         $this->mOutput->mContainsOldMagic = true;
1325                 }
1326
1327                 # Arguments input from the caller
1328                 $inputArgs = end( $this->mArgStack );
1329                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1330                         $text = $inputArgs[$part1];
1331                         $found = true;
1332                 }
1333
1334                 # Load from database
1335                 if ( !$found ) {
1336                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1337                         if ( !is_null( $title ) && !$title->isExternal() ) {
1338                                 # Check for excessive inclusion
1339                                 $dbk = $title->getPrefixedDBkey();
1340                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1341                                         $article = new Article( $title );
1342                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1343                                         if ( $articleContent !== false ) {
1344                                                 $found = true;
1345                                                 $text = $articleContent;
1346
1347                                         }
1348                                 }
1349
1350                                 # If the title is valid but undisplayable, make a link to it
1351                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1352                                         $text = "[[" . $title->getPrefixedText() . "]]";
1353                                         $found = true;
1354                                 }
1355                         }
1356                 }
1357
1358                 # Recursive parsing, escaping and link table handling
1359                 # Only for HTML output
1360                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1361                         $text = wfEscapeWikiText( $text );
1362                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1363                         # Clean up argument array
1364                         $assocArgs = array();
1365                         $index = 1;
1366                         foreach( $args as $arg ) {
1367                                 $eqpos = strpos( $arg, "=" );
1368                                 if ( $eqpos === false ) {
1369                                         $assocArgs[$index++] = $arg;
1370                                 } else {
1371                                         $name = trim( substr( $arg, 0, $eqpos ) );
1372                                         $value = trim( substr( $arg, $eqpos+1 ) );
1373                                         if ( $value === false ) {
1374                                                 $value = "";
1375                                         }
1376                                         if ( $name !== false ) {
1377                                                 $assocArgs[$name] = $value;
1378                                         }
1379                                 }
1380                         }
1381
1382                         # Do not enter included links in link table
1383                         if ( !is_null( $title ) ) {
1384                                 $wgLinkCache->suspend();
1385                         }
1386
1387                         # Run full parser on the included text
1388                         $text = $this->strip( $text, $this->mStripState );
1389                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1390
1391                         # Add the result to the strip state for re-inclusion after
1392                         # the rest of the processing
1393                         $text = $this->insertStripItem( $text, $this->mStripState );
1394
1395                         # Resume the link cache and register the inclusion as a link
1396                         if ( !is_null( $title ) ) {
1397                                 $wgLinkCache->resume();
1398                                 $wgLinkCache->addLinkObj( $title );
1399                         }
1400                 }
1401
1402                 if ( !$found ) {
1403                         return $matches[0];
1404                 } else {
1405                         return $newline . $text;
1406                 }
1407         }
1408
1409         # Returns true if the function is allowed to include this entity
1410         function incrementIncludeCount( $dbk )
1411         {
1412                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1413                         $this->mIncludeCount[$dbk] = 0;
1414                 }
1415                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1416                         return true;
1417                 } else {
1418                         return false;
1419                 }
1420         }
1421
1422
1423         # Cleans up HTML, removes dangerous tags and attributes
1424         /* private */ function removeHTMLtags( $text )
1425         {
1426                 $fname = "Parser::removeHTMLtags";
1427                 wfProfileIn( $fname );
1428                 $htmlpairs = array( # Tags that must be closed
1429                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1430                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1431                         "strike", "strong", "tt", "var", "div", "center",
1432                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1433                         "ruby", "rt" , "rb" , "rp", "p"
1434                 );
1435                 $htmlsingle = array(
1436                         "br", "hr", "li", "dt", "dd"
1437                 );
1438                 $htmlnest = array( # Tags that can be nested--??
1439                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1440                         "dl", "font", "big", "small", "sub", "sup"
1441                 );
1442                 $tabletags = array( # Can only appear inside table
1443                         "td", "th", "tr"
1444                 );
1445
1446                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1447                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1448
1449                 $htmlattrs = $this->getHTMLattrs () ;
1450
1451                 # Remove HTML comments
1452                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1453
1454                 $bits = explode( "<", $text );
1455                 $text = array_shift( $bits );
1456                 $tagstack = array(); $tablestack = array();
1457
1458                 foreach ( $bits as $x ) {
1459                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1460                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1461                           $x, $regs );
1462                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1463                         error_reporting( $prev );
1464
1465                         $badtag = 0 ;
1466                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1467                                 # Check our stack
1468                                 if ( $slash ) {
1469                                         # Closing a tag...
1470                                         if ( ! in_array( $t, $htmlsingle ) &&
1471                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1472                                                 array_push( $tagstack, $ot );
1473                                                 $badtag = 1;
1474                                         } else {
1475                                                 if ( $t == "table" ) {
1476                                                         $tagstack = array_pop( $tablestack );
1477                                                 }
1478                                                 $newparams = "";
1479                                         }
1480                                 } else {
1481                                         # Keep track for later
1482                                         if ( in_array( $t, $tabletags ) &&
1483                                           ! in_array( "table", $tagstack ) ) {
1484                                                 $badtag = 1;
1485                                         } else if ( in_array( $t, $tagstack ) &&
1486                                           ! in_array ( $t , $htmlnest ) ) {
1487                                                 $badtag = 1 ;
1488                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1489                                                 if ( $t == "table" ) {
1490                                                         array_push( $tablestack, $tagstack );
1491                                                         $tagstack = array();
1492                                                 }
1493                                                 array_push( $tagstack, $t );
1494                                         }
1495                                         # Strip non-approved attributes from the tag
1496                                         $newparams = $this->fixTagAttributes($params);
1497
1498                                 }
1499                                 if ( ! $badtag ) {
1500                                         $rest = str_replace( ">", "&gt;", $rest );
1501                                         $text .= "<$slash$t $newparams$brace$rest";
1502                                         continue;
1503                                 }
1504                         }
1505                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1506                 }
1507                 # Close off any remaining tags
1508                 while ( $t = array_pop( $tagstack ) ) {
1509                         $text .= "</$t>\n";
1510                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1511                 }
1512                 wfProfileOut( $fname );
1513                 return $text;
1514         }
1515
1516 /*
1517  *
1518  * This function accomplishes several tasks:
1519  * 1) Auto-number headings if that option is enabled
1520  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1521  * 3) Add a Table of contents on the top for users who have enabled the option
1522  * 4) Auto-anchor headings
1523  *
1524  * It loops through all headlines, collects the necessary data, then splits up the
1525  * string and re-inserts the newly formatted headlines.
1526  *
1527  */
1528
1529         /* private */ function formatHeadings( $text )
1530         {
1531                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1532                 $doShowToc = $this->mOptions->getShowToc();
1533                 if( !$this->mTitle->userCanEdit() ) {
1534                         $showEditLink = 0;
1535                         $rightClickHack = 0;
1536                 } else {
1537                         $showEditLink = $this->mOptions->getEditSection();
1538                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1539                 }
1540
1541                 # Inhibit editsection links if requested in the page
1542                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1543                 if( $esw->matchAndRemove( $text ) ) {
1544                         $showEditLink = 0;
1545                 }
1546                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1547                 # do not add TOC
1548                 $mw =& MagicWord::get( MAG_NOTOC );
1549                 if( $mw->matchAndRemove( $text ) ) {
1550                         $doShowToc = 0;
1551                 }
1552
1553                 # never add the TOC to the Main Page. This is an entry page that should not
1554                 # be more than 1-2 screens large anyway
1555                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1556                         $doShowToc = 0;
1557                 }
1558
1559                 # Get all headlines for numbering them and adding funky stuff like [edit]
1560                 # links - this is for later, but we need the number of headlines right now
1561                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1562
1563                 # if there are fewer than 4 headlines in the article, do not show TOC
1564                 if( $numMatches < 4 ) {
1565                         $doShowToc = 0;
1566                 }
1567
1568                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1569                 # override above conditions and always show TOC
1570                 $mw =& MagicWord::get( MAG_FORCETOC );
1571                 if ($mw->matchAndRemove( $text ) ) {
1572                         $doShowToc = 1;
1573                 }
1574
1575
1576                 # We need this to perform operations on the HTML
1577                 $sk =& $this->mOptions->getSkin();
1578
1579                 # headline counter
1580                 $headlineCount = 0;
1581
1582                 # Ugh .. the TOC should have neat indentation levels which can be
1583                 # passed to the skin functions. These are determined here
1584                 $toclevel = 0;
1585                 $toc = "";
1586                 $full = "";
1587                 $head = array();
1588                 $sublevelCount = array();
1589                 $level = 0;
1590                 $prevlevel = 0;
1591                 foreach( $matches[3] as $headline ) {
1592                         $numbering = "";
1593                         if( $level ) {
1594                                 $prevlevel = $level;
1595                         }
1596                         $level = $matches[1][$headlineCount];
1597                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1598                                 # reset when we enter a new level
1599                                 $sublevelCount[$level] = 0;
1600                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1601                                 $toclevel += $level - $prevlevel;
1602                         }
1603                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1604                                 # reset when we step back a level
1605                                 $sublevelCount[$level+1]=0;
1606                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1607                                 $toclevel -= $prevlevel - $level;
1608                         }
1609                         # count number of headlines for each level
1610                         @$sublevelCount[$level]++;
1611                         if( $doNumberHeadings || $doShowToc ) {
1612                                 $dot = 0;
1613                                 for( $i = 1; $i <= $level; $i++ ) {
1614                                         if( !empty( $sublevelCount[$i] ) ) {
1615                                                 if( $dot ) {
1616                                                         $numbering .= ".";
1617                                                 }
1618                                                 $numbering .= $sublevelCount[$i];
1619                                                 $dot = 1;
1620                                         }
1621                                 }
1622                         }
1623
1624                         # The canonized header is a version of the header text safe to use for links
1625                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1626                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1627
1628                         # strip out HTML
1629                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1630                         $tocline = trim( $canonized_headline );
1631                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1632                         $refer[$headlineCount] = $canonized_headline;
1633
1634                         # count how many in assoc. array so we can track dupes in anchors
1635                         @$refers[$canonized_headline]++;
1636                         $refcount[$headlineCount]=$refers[$canonized_headline];
1637
1638                         # Prepend the number to the heading text
1639
1640                         if( $doNumberHeadings || $doShowToc ) {
1641                                 $tocline = $numbering . " " . $tocline;
1642
1643                                 # Don't number the heading if it is the only one (looks silly)
1644                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1645                                         # the two are different if the line contains a link
1646                                         $headline=$numbering . " " . $headline;
1647                                 }
1648                         }
1649
1650                         # Create the anchor for linking from the TOC to the section
1651                         $anchor = $canonized_headline;
1652                         if($refcount[$headlineCount] > 1 ) {
1653                                 $anchor .= "_" . $refcount[$headlineCount];
1654                         }
1655                         if( $doShowToc ) {
1656                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1657                         }
1658                         if( $showEditLink ) {
1659                                 if ( empty( $head[$headlineCount] ) ) {
1660                                         $head[$headlineCount] = "";
1661                                 }
1662                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1663                         }
1664
1665                         # Add the edit section span
1666                         if( $rightClickHack ) {
1667                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1668                         }
1669
1670                         # give headline the correct <h#> tag
1671                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1672
1673                         $headlineCount++;
1674                 }
1675
1676                 if( $doShowToc ) {
1677                         $toclines = $headlineCount;
1678                         $toc .= $sk->tocUnindent( $toclevel );
1679                         $toc = $sk->tocTable( $toc );
1680                 }
1681
1682                 # split up and insert constructed headlines
1683
1684                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1685                 $i = 0;
1686
1687                 foreach( $blocks as $block ) {
1688                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1689                             # This is the [edit] link that appears for the top block of text when
1690                                 # section editing is enabled
1691
1692                                 # Disabled because it broke block formatting
1693                                 # For example, a bullet point in the top line
1694                                 # $full .= $sk->editSectionLink(0);
1695                         }
1696                         $full .= $block;
1697                         if( $doShowToc && !$i) {
1698                         # Top anchor now in skin
1699                                 $full = $full.$toc;
1700                         }
1701
1702                         if( !empty( $head[$i] ) ) {
1703                                 $full .= $head[$i];
1704                         }
1705                         $i++;
1706                 }
1707
1708                 return $full;
1709         }
1710
1711         /* private */ function doMagicISBN( &$tokenizer )
1712         {
1713                 global $wgLang;
1714
1715                 # Check whether next token is a text token
1716                 # If yes, fetch it and convert the text into a
1717                 # Special::BookSources link
1718                 $token = $tokenizer->previewToken();
1719                 while ( $token["type"] == "" )
1720                 {
1721                         $tokenizer->nextToken();
1722                         $token = $tokenizer->previewToken();
1723                 }
1724                 if ( $token["type"] == "text" )
1725                 {
1726                         $token = $tokenizer->nextToken();
1727                         $x = $token["text"];
1728                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1729
1730                         $isbn = $blank = "" ;
1731                         while ( " " == $x{0} ) {
1732                                 $blank .= " ";
1733                                 $x = substr( $x, 1 );
1734                         }
1735                         while ( strstr( $valid, $x{0} ) != false ) {
1736                                 $isbn .= $x{0};
1737                                 $x = substr( $x, 1 );
1738                         }
1739                         $num = str_replace( "-", "", $isbn );
1740                         $num = str_replace( " ", "", $num );
1741
1742                         if ( "" == $num ) {
1743                                 $text = "ISBN $blank$x";
1744                         } else {
1745                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1746                                 $text = "<a href=\"" .
1747                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1748                                         "\" class=\"internal\">ISBN $isbn</a>";
1749                                 $text .= $x;
1750                         }
1751                 } else {
1752                         $text = "ISBN ";
1753                 }
1754                 return $text;
1755         }
1756         /* private */ function doMagicRFC( &$tokenizer )
1757         {
1758                 global $wgLang;
1759
1760                 # Check whether next token is a text token
1761                 # If yes, fetch it and convert the text into a
1762                 # link to an RFC source
1763                 $token = $tokenizer->previewToken();
1764                 while ( $token["type"] == "" )
1765                 {
1766                         $tokenizer->nextToken();
1767                         $token = $tokenizer->previewToken();
1768                 }
1769                 if ( $token["type"] == "text" )
1770                 {
1771                         $token = $tokenizer->nextToken();
1772                         $x = $token["text"];
1773                         $valid = "0123456789";
1774
1775                         $rfc = $blank = "" ;
1776                         while ( " " == $x{0} ) {
1777                                 $blank .= " ";
1778                                 $x = substr( $x, 1 );
1779                         }
1780                         while ( strstr( $valid, $x{0} ) != false ) {
1781                                 $rfc .= $x{0};
1782                                 $x = substr( $x, 1 );
1783                         }
1784
1785                         if ( "" == $rfc ) {
1786                                 $text .= "RFC $blank$x";
1787                         } else {
1788                                 $url = wfmsg( "rfcurl" );
1789                                 $url = str_replace( "$1", $rfc, $url);
1790                                 $sk =& $this->mOptions->getSkin();
1791                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1792                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1793                         }
1794                 } else {
1795                         $text = "RFC ";
1796                 }
1797                 return $text;
1798         }
1799
1800         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1801         {
1802                 $this->mOptions = $options;
1803                 $this->mTitle =& $title;
1804                 $this->mOutputType = OT_WIKI;
1805
1806                 if ( $clearState ) {
1807                         $this->clearState();
1808                 }
1809
1810                 $stripState = false;
1811                 $pairs = array(
1812                         "\r\n" => "\n",
1813                         );
1814                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1815                 // now with regexes
1816                 $pairs = array(
1817                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1818                         "/<br *?>/i" => "<br/>",
1819                 );
1820                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1821                 $text = $this->strip( $text, $stripState, false );
1822                 $text = $this->pstPass2( $text, $user );
1823                 $text = $this->unstrip( $text, $stripState );
1824                 return $text;
1825         }
1826
1827         /* private */ function pstPass2( $text, &$user )
1828         {
1829                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1830
1831                 # Variable replacement
1832                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1833                 $text = $this->replaceVariables( $text );
1834
1835                 # Signatures
1836                 #
1837                 $n = $user->getName();
1838                 $k = $user->getOption( "nickname" );
1839                 if ( "" == $k ) { $k = $n; }
1840                 if(isset($wgLocaltimezone)) {
1841                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1842                 }
1843                 /* Note: this is an ugly timezone hack for the European wikis */
1844                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1845                   " (" . date( "T" ) . ")";
1846                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1847
1848                 $text = preg_replace( "/~~~~~/", $d, $text );
1849                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1850                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1851                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1852                   Namespace::getUser() ) . ":$n|$k]]", $text );
1853
1854                 # Context links: [[|name]] and [[name (context)|]]
1855                 #
1856                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1857                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1858                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1859                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1860
1861                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1862                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1863                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1864                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1865                                                                                                                 # [[ns:page (cont)|]]
1866                 $context = "";
1867                 $t = $this->mTitle->getText();
1868                 if ( preg_match( $conpat, $t, $m ) ) {
1869                         $context = $m[2];
1870                 }
1871                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1872                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1873                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1874
1875                 if ( "" == $context ) {
1876                         $text = preg_replace( $p2, "[[\\1]]", $text );
1877                 } else {
1878                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1879                 }
1880
1881                 /*
1882                 $mw =& MagicWord::get( MAG_SUBST );
1883                 $wgCurParser = $this->fork();
1884                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1885                 $this->merge( $wgCurParser );
1886                 */
1887
1888                 # Trim trailing whitespace
1889                 # MAG_END (__END__) tag allows for trailing
1890                 # whitespace to be deliberately included
1891                 $text = rtrim( $text );
1892                 $mw =& MagicWord::get( MAG_END );
1893                 $mw->matchAndRemove( $text );
1894
1895                 return $text;
1896         }
1897
1898         # Set up some variables which are usually set up in parse()
1899         # so that an external function can call some class members with confidence
1900         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1901         {
1902                 $this->mTitle =& $title;
1903                 $this->mOptions = $options;
1904                 $this->mOutputType = $outputType;
1905                 if ( $clearState ) {
1906                         $this->clearState();
1907                 }
1908         }
1909
1910         function transformMsg( $text, $options ) {
1911                 global $wgTitle;
1912                 static $executing = false;
1913
1914                 # Guard against infinite recursion
1915                 if ( $executing ) {
1916                         return $text;
1917                 }
1918                 $executing = true;
1919
1920                 $this->mTitle = $wgTitle;
1921                 $this->mOptions = $options;
1922                 $this->mOutputType = OT_MSG;
1923                 $this->clearState();
1924                 $text = $this->replaceVariables( $text );
1925
1926                 $executing = false;
1927                 return $text;
1928         }
1929 }
1930
1931 class ParserOutput
1932 {
1933         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1934
1935         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1936                 $containsOldMagic = false )
1937         {
1938                 $this->mText = $text;
1939                 $this->mLanguageLinks = $languageLinks;
1940                 $this->mCategoryLinks = $categoryLinks;
1941                 $this->mContainsOldMagic = $containsOldMagic;
1942         }
1943
1944         function getText() { return $this->mText; }
1945         function getLanguageLinks() { return $this->mLanguageLinks; }
1946         function getCategoryLinks() { return $this->mCategoryLinks; }
1947         function containsOldMagic() { return $this->mContainsOldMagic; }
1948         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1949         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1950         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1951         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1952
1953         function merge( $other ) {
1954                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1955                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1956                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1957         }
1958
1959 }
1960
1961 class ParserOptions
1962 {
1963         # All variables are private
1964         var $mUseTeX;                    # Use texvc to expand <math> tags
1965         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1966         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1967         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1968         var $mAllowExternalImages;       # Allow external images inline
1969         var $mSkin;                      # Reference to the preferred skin
1970         var $mDateFormat;                # Date format index
1971         var $mEditSection;               # Create "edit section" links
1972         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1973         var $mNumberHeadings;            # Automatically number headings
1974         var $mShowToc;                   # Show table of contents
1975
1976         function getUseTeX() { return $this->mUseTeX; }
1977         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1978         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1979         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1980         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1981         function getSkin() { return $this->mSkin; }
1982         function getDateFormat() { return $this->mDateFormat; }
1983         function getEditSection() { return $this->mEditSection; }
1984         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1985         function getNumberHeadings() { return $this->mNumberHeadings; }
1986         function getShowToc() { return $this->mShowToc; }
1987
1988         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1989         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1990         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1991         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1992         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1993         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1994         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1995         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1996         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1997         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1998         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1999
2000         /* static */ function newFromUser( &$user )
2001         {
2002                 $popts = new ParserOptions;
2003                 $popts->initialiseFromUser( $user );
2004                 return $popts;
2005         }
2006
2007         function initialiseFromUser( &$userInput )
2008         {
2009                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2010
2011                 if ( !$userInput ) {
2012                         $user = new User;
2013                         $user->setLoaded( true );
2014                 } else {
2015                         $user =& $userInput;
2016                 }
2017
2018                 $this->mUseTeX = $wgUseTeX;
2019                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2020                 $this->mUseDynamicDates = $wgUseDynamicDates;
2021                 $this->mInterwikiMagic = $wgInterwikiMagic;
2022                 $this->mAllowExternalImages = $wgAllowExternalImages;
2023                 $this->mSkin =& $user->getSkin();
2024                 $this->mDateFormat = $user->getOption( "date" );
2025                 $this->mEditSection = $user->getOption( "editsection" );
2026                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2027                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2028                 $this->mShowToc = $user->getOption( "showtoc" );
2029         }
2030
2031
2032 }
2033
2034 # Regex callbacks, used in Parser::replaceVariables
2035 function wfBraceSubstitution( $matches )
2036 {
2037         global $wgCurParser;
2038         return $wgCurParser->braceSubstitution( $matches );
2039 }
2040
2041 ?>