includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         function categoryMagic ()
 260         {
 261                 global $wgLang , $wgUser ;
 262                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 263                 $id = $this->mTitle->getArticleID() ;
 264                 $cns = Namespace::getCategory() ;
 265                 if ( $this->mTitle->getNamespace() != $cns ) return "" ;
 266                 $ti = $this->mTitle->getText() ;
 267                 $r = "<br style=\"clear:both;\"/>\n";
 268
 269                 $articles = array() ;
 270                 $parents = array () ;
 271                 $children = array() ;
 272
 273                 $sk =& $wgUser->getSkin() ;
 274
 275                 $data = array () ;
 276                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 277                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 278
 279                 $res = wfQuery ( $sql1, DB_READ ) ;
 280                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 281
 282                 $res = wfQuery ( $sql2, DB_READ ) ;
 283                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 284
 285                 foreach ( $data AS $x )
 286                 {
 287                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 288                         if ( $t != "" ) $t .= ":" ;
 289                         $t .= $x->cur_title ;
 290
 291                         if ( $x->cur_namespace == $cns ) {
 292                                 array_push ( $children , $sk->makeLink ( $t ) ) ;
 293                         } else {
 294                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 295                         }
 296                 }
 297                 wfFreeResult ( $res ) ;
 298
 299                 # Children
 300                 if ( count ( $children ) > 0 )
 301                 {
 302                         asort ( $children ) ;
 303                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 304                         $r .= implode ( ", " , $children ) ;
 305                 }
 306
 307                 # Articles
 308                 if ( count ( $articles ) > 0 )
 309                 {
 310                         asort ( $articles ) ;
 311                         $h =  wfMsg( "category_header", $ti );
 312                         $r .= "<h2>{$h}</h2>\n" ;
 313                         $r .= implode ( ", " , $articles ) ;
 314                 }
 315
 316
 317                 return $r ;
 318         }
 319
 320         function getHTMLattrs ()
 321         {
 322                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 323                                 "title", "align", "lang", "dir", "width", "height",
 324                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 325                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 326                                 /* FONT */ "type", "start", "value", "compact",
 327                                 /* For various lists, mostly deprecated but safe */
 328                                 "summary", "width", "border", "frame", "rules",
 329                                 "cellspacing", "cellpadding", "valign", "char",
 330                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 331                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 332                                 "id", "class", "name", "style" /* For CSS */
 333                                 );
 334                 return $htmlattrs ;
 335         }
 336
 337         function fixTagAttributes ( $t )
 338         {
 339                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 340                 $htmlattrs = $this->getHTMLattrs() ;
 341
 342                 # Strip non-approved attributes from the tag
 343                 $t = preg_replace(
 344                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 345                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 346                         $t);
 347                 # Strip javascript "expression" from stylesheets. Brute force approach:
 348                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 349
 350                 if( preg_match(
 351                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 352                         wfMungeToUtf8( $t ) ) )
 353                 {
 354                         $t="";
 355                 }
 356
 357                 return trim ( $t ) ;
 358         }
 359
 360         function doTableStuff ( $t )
 361         {
 362                 $t = explode ( "\n" , $t ) ;
 363                 $td = array () ; # Is currently a td tag open?
 364                         $ltd = array () ; # Was it TD or TH?
 365                         $tr = array () ; # Is currently a tr tag open?
 366                         $ltr = array () ; # tr attributes
 367                         foreach ( $t AS $k => $x )
 368                         {
 369                                 $x = trim ( $x ) ;
 370                                 $fc = substr ( $x , 0 , 1 ) ;
 371                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 372                                 {
 373                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 374                                         array_push ( $td , false ) ;
 375                                         array_push ( $ltd , "" ) ;
 376                                         array_push ( $tr , false ) ;
 377                                         array_push ( $ltr , "" ) ;
 378                                 }
 379                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 380                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 381                                 {
 382                                         $z = "</table>\n" ;
 383                                         $l = array_pop ( $ltd ) ;
 384                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 385                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 386                                         array_pop ( $ltr ) ;
 387                                         $t[$k] = $z ;
 388                                 }
 389                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 390                                                 {
 391                                                 $z = trim ( substr ( $x , 2 ) ) ;
 392                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 393                                                 }*/
 394                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 395                                 {
 396                                         $x = substr ( $x , 1 ) ;
 397                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 398                                         $z = "" ;
 399                                         $l = array_pop ( $ltd ) ;
 400                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 401                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 402                                         array_pop ( $ltr ) ;
 403                                         $t[$k] = $z ;
 404                                         array_push ( $tr , false ) ;
 405                                         array_push ( $td , false ) ;
 406                                         array_push ( $ltd , "" ) ;
 407                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 408                                 }
 409                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 410                                 {
 411                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 412                                         {
 413                                                 $fc = "+" ;
 414                                                 $x = substr ( $x , 1 ) ;
 415                                         }
 416                                         $after = substr ( $x , 1 ) ;
 417                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 418                                         $after = explode ( "||" , $after ) ;
 419                                         $t[$k] = "" ;
 420                                         foreach ( $after AS $theline )
 421                                         {
 422                                                 $z = "" ;
 423                                                 if ( $fc != "+" )
 424                                                 {
 425                                                         $tra = array_pop ( $ltr ) ;
 426                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 427                                                         array_push ( $tr , true ) ;
 428                                                         array_push ( $ltr , "" ) ;
 429                                                 }
 430
 431                                                 $l = array_pop ( $ltd ) ;
 432                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 433                                                 if ( $fc == "|" ) $l = "td" ;
 434                                                 else if ( $fc == "!" ) $l = "th" ;
 435                                                 else if ( $fc == "+" ) $l = "caption" ;
 436                                                 else $l = "" ;
 437                                                 array_push ( $ltd , $l ) ;
 438                                                 $y = explode ( "|" , $theline , 2 ) ;
 439                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 440                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 441                                                 $t[$k] .= $y ;
 442                                                 array_push ( $td , true ) ;
 443                                         }
 444                                 }
 445                         }
 446
 447                 # Closing open td, tr && table
 448                 while ( count ( $td ) > 0 )
 449                 {
 450                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 451                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 452                         $t[] = "</table>" ;
 453                 }
 454
 455                 $t = implode ( "\n" , $t ) ;
 456                 #               $t = $this->removeHTMLtags( $t );
 457                 return $t ;
 458         }
 459
 460         function internalParse( $text, $linestart, $args = array() )
 461         {
 462                 $fname = "Parser::internalParse";
 463                 wfProfileIn( $fname );
 464
 465                 $text = $this->removeHTMLtags( $text );
 466                 $text = $this->replaceVariables( $text, $args );
 467
 468                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 469
 470                 $text = $this->doHeadings( $text );
 471                 if($this->mOptions->getUseDynamicDates()) {
 472                         global $wgDateFormatter;
 473                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 474                 }
 475                 $text = $this->replaceExternalLinks( $text );
 476                 $text = $this->doTokenizedParser ( $text );
 477                 $text = $this->doTableStuff ( $text ) ;
 478                 $text = $this->formatHeadings( $text );
 479                 $sk =& $this->mOptions->getSkin();
 480                 $text = $sk->transformContent( $text );
 481
 482                 if ( !isset ( $this->categoryMagicDone ) ) {
 483                    $text .= $this->categoryMagic () ;
 484                    $this->categoryMagicDone = true ;
 485                    }
 486
 487                 wfProfileOut( $fname );
 488                 return $text;
 489         }
 490
 491
 492         /* private */ function doHeadings( $text )
 493         {
 494                 for ( $i = 6; $i >= 1; --$i ) {
 495                         $h = substr( "======", 0, $i );
 496                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 497                           "<h{$i}>\\1</h{$i}>\\2", $text );
 498                 }
 499                 return $text;
 500         }
 501
 502         # Note: we have to do external links before the internal ones,
 503         # and otherwise take great care in the order of things here, so
 504         # that we don't end up interpreting some URLs twice.
 505
 506         /* private */ function replaceExternalLinks( $text )
 507         {
 508                 $fname = "Parser::replaceExternalLinks";
 509                 wfProfileIn( $fname );
 510                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 511                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 512                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 513                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 514                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 515                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 516                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 517                 wfProfileOut( $fname );
 518                 return $text;
 519         }
 520
 521         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 522         {
 523                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 524                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 525
 526                 # this is  the list of separators that should be ignored if they
 527                 # are the last character of an URL but that should be included
 528                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 529                 # in this case, the last comma should not become part of the URL,
 530                 # but in "www.foo.com/123,2342,32.htm" it should.
 531                 $sep = ",;\.:";
 532                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 533                 $images = "gif|png|jpg|jpeg";
 534
 535                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 536                 # they are interpreted as part of the string (used to tell PHP
 537                 # that the content of the string should be inserted there).
 538                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 539                   "((?i){$images})([^{$uc}]|$)/";
 540
 541                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 542                 $sk =& $this->mOptions->getSkin();
 543
 544                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 545                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 546                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 547                 }
 548                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 549                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 550                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 551                   "</a>\\5", $s );
 552                 $s = str_replace( $unique, $protocol, $s );
 553
 554                 $a = explode( "[{$protocol}:", " " . $s );
 555                 $s = array_shift( $a );
 556                 $s = substr( $s, 1 );
 557
 558                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 559                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 560
 561                 foreach ( $a as $line ) {
 562                         if ( preg_match( $e1, $line, $m ) ) {
 563                                 $link = "{$protocol}:{$m[1]}";
 564                                 $trail = $m[2];
 565                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 566                                 else { $text = wfEscapeHTML( $link ); }
 567                         } else if ( preg_match( $e2, $line, $m ) ) {
 568                                 $link = "{$protocol}:{$m[1]}";
 569                                 $text = $m[2];
 570                                 $trail = $m[3];
 571                         } else {
 572                                 $s .= "[{$protocol}:" . $line;
 573                                 continue;
 574                         }
 575                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 576                                 $paren = "";
 577                         } else {
 578                                 # Expand the URL for printable version
 579                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 580                         }
 581                         $la = $sk->getExternalLinkAttributes( $link, $text );
 582                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 583
 584                 }
 585                 return $s;
 586         }
 587
 588         /* private */ function handle3Quotes( &$state, $token )
 589         {
 590                 if ( $state["strong"] !== false ) {
 591                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 592                         {
 593                                 # ''' lala ''lala '''
 594                                 $s = "</em></strong><em>";
 595                         } else {
 596                                 $s = "</strong>";
 597                         }
 598                         $state["strong"] = FALSE;
 599                 } else {
 600                         $s = "<strong>";
 601                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 602                 }
 603                 return $s;
 604         }
 605
 606         /* private */ function handle2Quotes( &$state, $token )
 607         {
 608                 if ( $state["em"] !== false ) {
 609                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 610                         {
 611                                 # ''lala'''lala'' ....'''
 612                                 $s = "</strong></em><strong>";
 613                         } else {
 614                                 $s = "</em>";
 615                         }
 616                         $state["em"] = FALSE;
 617                 } else {
 618                         $s = "<em>";
 619                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 620
 621                 }
 622                 return $s;
 623         }
 624
 625         /* private */ function handle5Quotes( &$state, $token )
 626         {
 627                 $s = "";
 628                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 629                         if ( $state["em"] < $state["strong"] ) {
 630                                 $s .= "</strong></em>";
 631                         } else {
 632                                 $s .= "</em></strong>";
 633                         }
 634                         $state["strong"] = $state["em"] = FALSE;
 635                 } elseif ( $state["em"] !== false ) {
 636                         $s .= "</em><strong>";
 637                         $state["em"] = FALSE;
 638                         $state["strong"] = $token["pos"];
 639                 } elseif ( $state["strong"] !== false ) {
 640                         $s .= "</strong><em>";
 641                         $state["strong"] = FALSE;
 642                         $state["em"] = $token["pos"];
 643                 } else { # not $em and not $strong
 644                         $s .= "<strong><em>";
 645                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 646                 }
 647                 return $s;
 648         }
 649
 650         /* private */ function doTokenizedParser( $str )
 651         {
 652                 global $wgLang; # for language specific parser hook
 653
 654                 $tokenizer=Tokenizer::newFromString( $str );
 655                 $tokenStack = array();
 656
 657                 $s="";
 658                 $state["em"]      = FALSE;
 659                 $state["strong"]  = FALSE;
 660                 $tagIsOpen = FALSE;
 661                 $threeopen = false;
 662
 663                 # The tokenizer splits the text into tokens and returns them one by one.
 664                 # Every call to the tokenizer returns a new token.
 665                 while ( $token = $tokenizer->nextToken() )
 666                 {
 667                         switch ( $token["type"] )
 668                         {
 669                                 case "text":
 670                                         # simple text with no further markup
 671                                         $txt = $token["text"];
 672                                         break;
 673                                 case "[[[":
 674                                         # remember the tag opened with 3 [
 675                                         $threeopen = true;
 676                                 case "[[":
 677                                         # link opening tag.
 678                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 679                                         $tagIsOpen = TRUE;
 680                                         array_push( $tokenStack, $token );
 681                                         $txt="";
 682                                         break;
 683
 684                                 case "]]]":
 685                                 case "]]":
 686                                         # link close tag.
 687                                         # get text from stack, glue it together, and call the code to handle a
 688                                         # link
 689
 690                                         if ( count( $tokenStack ) == 0 )
 691                                         {
 692                                                 # stack empty. Found a ]] without an opening [[
 693                                                 $txt = "]]";
 694                                         } else {
 695                                                 $linkText = "";
 696                                                 $lastToken = array_pop( $tokenStack );
 697                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 698                                                 {
 699                                                         if( !empty( $lastToken["text"] ) ) {
 700                                                                 $linkText = $lastToken["text"] . $linkText;
 701                                                         }
 702                                                         $lastToken = array_pop( $tokenStack );
 703                                                 }
 704
 705                                                 $txt = $linkText ."]]";
 706
 707                                                 if( isset( $lastToken["text"] ) ) {
 708                                                         $prefix = $lastToken["text"];
 709                                                 } else {
 710                                                         $prefix = "";
 711                                                 }
 712                                                 $nextToken = $tokenizer->previewToken();
 713                                                 if ( $nextToken["type"] == "text" )
 714                                                 {
 715                                                         # Preview just looks at it. Now we have to fetch it.
 716                                                         $nextToken = $tokenizer->nextToken();
 717                                                         $txt .= $nextToken["text"];
 718                                                 }
 719                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 720
 721                                                 # did the tag start with 3 [ ?
 722                                                 if($threeopen) {
 723                                                         # show the first as text
 724                                                         $txt = "[".$txt;
 725                                                         $threeopen=false;
 726                                                 }
 727
 728                                         }
 729                                         $tagIsOpen = (count( $tokenStack ) != 0);
 730                                         break;
 731                                 case "----":
 732                                         $txt = "\n<hr />\n";
 733                                         break;
 734                                 case "'''":
 735                                         # This and the three next ones handle quotes
 736                                         $txt = $this->handle3Quotes( $state, $token );
 737                                         break;
 738                                 case "''":
 739                                         $txt = $this->handle2Quotes( $state, $token );
 740                                         break;
 741                                 case "'''''":
 742                                         $txt = $this->handle5Quotes( $state, $token );
 743                                         break;
 744                                 case "":
 745                                         # empty token
 746                                         $txt="";
 747                                         break;
 748                                 case "RFC ":
 749                                         if ( $tagIsOpen ) {
 750                                                 $txt = "RFC ";
 751                                         } else {
 752                                                 $txt = $this->doMagicRFC( $tokenizer );
 753                                         }
 754                                         break;
 755                                 case "ISBN ":
 756                                         if ( $tagIsOpen ) {
 757                                                 $txt = "ISBN ";
 758                                         } else {
 759                                                 $txt = $this->doMagicISBN( $tokenizer );
 760                                         }
 761                                         break;
 762                                 default:
 763                                         # Call language specific Hook.
 764                                         $txt = $wgLang->processToken( $token, $tokenStack );
 765                                         if ( NULL == $txt ) {
 766                                                 # An unkown token. Highlight.
 767                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 768                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 769                                         }
 770                                         break;
 771                         }
 772                         # If we're parsing the interior of a link, don't append the interior to $s,
 773                         # but push it to the stack so it can be processed when a ]] token is found.
 774                         if ( $tagIsOpen  && $txt != "" ) {
 775                                 $token["type"] = "text";
 776                                 $token["text"] = $txt;
 777                                 array_push( $tokenStack, $token );
 778                         } else {
 779                                 $s .= $txt;
 780                         }
 781                 } #end while
 782                 if ( count( $tokenStack ) != 0 )
 783                 {
 784                         # still objects on stack. opened [[ tag without closing ]] tag.
 785                         $txt = "";
 786                         while ( $lastToken = array_pop( $tokenStack ) )
 787                         {
 788                                 if ( $lastToken["type"] == "text" )
 789                                 {
 790                                         $txt = $lastToken["text"] . $txt;
 791                                 } else {
 792                                         $txt = $lastToken["type"] . $txt;
 793                                 }
 794                         }
 795                         $s .= $txt;
 796                 }
 797                 return $s;
 798         }
 799
 800         /* private */ function handleInternalLink( $line, $prefix )
 801         {
 802                 global $wgLang, $wgLinkCache;
 803                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 804                 static $fname = "Parser::handleInternalLink" ;
 805                 wfProfileIn( $fname );
 806
 807                 wfProfileIn( "$fname-setup" );
 808                 static $tc = FALSE;
 809                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 810                 $sk =& $this->mOptions->getSkin();
 811
 812                 # Match a link having the form [[namespace:link|alternate]]trail
 813                 static $e1 = FALSE;
 814                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 815                 # Match the end of a line for a word that's not followed by whitespace,
 816                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 817                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 818                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 819                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 820
 821
 822                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 823                 static $image = FALSE;
 824                 static $special = FALSE;
 825                 static $media = FALSE;
 826                 static $category = FALSE;
 827                 if ( !$image ) { $image = Namespace::getImage(); }
 828                 if ( !$special ) { $special = Namespace::getSpecial(); }
 829                 if ( !$media ) { $media = Namespace::getMedia(); }
 830                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 831
 832                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 833
 834                 wfProfileOut( "$fname-setup" );
 835                 $s = "";
 836
 837                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 838                         $text = $m[2];
 839                         $trail = $m[3];
 840                 } else { # Invalid form; output directly
 841                         $s .= $prefix . "[[" . $line ;
 842                         return $s;
 843                 }
 844
 845                 /* Valid link forms:
 846                 Foobar -- normal
 847                 :Foobar -- override special treatment of prefix (images, language links)
 848                 /Foobar -- convert to CurrentPage/Foobar
 849                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 850                 */
 851                 $c = substr($m[1],0,1);
 852                 $noforce = ($c != ":");
 853                 if( $c == "/" ) { # subpage
 854                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 855                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 856                                 $noslash=$m[1];
 857                         } else {
 858                                 $noslash=substr($m[1],1);
 859                         }
 860                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 861                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 862                                 if( "" == $text ) {
 863                                         $text= $m[1];
 864                                 } # this might be changed for ugliness reasons
 865                         } else {
 866                                 $link = $noslash; # no subpage allowed, use standard link
 867                         }
 868                 } elseif( $noforce ) { # no subpage
 869                         $link = $m[1];
 870                 } else {
 871                         $link = substr( $m[1], 1 );
 872                 }
 873                 if( "" == $text )
 874                         $text = $link;
 875
 876                 $nt = Title::newFromText( $link );
 877                 if( !$nt ) {
 878                         $s .= $prefix . "[[" . $line;
 879                         return $s;
 880                 }
 881                 $ns = $nt->getNamespace();
 882                 $iw = $nt->getInterWiki();
 883                 if( $noforce ) {
 884                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 885                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 886                                 return (trim($s) == '')? '': $s;
 887                         }
 888                         if( $ns == $image ) {
 889                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 890                                 $wgLinkCache->addImageLinkObj( $nt );
 891                                 return $s;
 892                         }
 893                         if ( $ns == $category ) {
 894                                 $t = $nt->getText() ;
 895                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 896                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 897                                 $this->mOutput->mCategoryLinks[] = $t ;
 898                                 $s .= $prefix . $trail ;
 899                                 return $s ;
 900                         }
 901                 }
 902                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 903                     ( strpos( $link, "#" ) == FALSE ) ) {
 904                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 905                         return $s;
 906                 }
 907
 908                 if( $ns == $media ) {
 909                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 910                         $wgLinkCache->addImageLinkObj( $nt );
 911                         return $s;
 912                 } elseif( $ns == $special ) {
 913                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 914                         return $s;
 915                 }
 916                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 917
 918                 wfProfileOut( $fname );
 919                 return $s;
 920         }
 921
 922         # Some functions here used by doBlockLevels()
 923         #
 924         /* private */ function closeParagraph()
 925         {
 926                 $result = "";
 927                 if ( '' != $this->mLastSection ) {
 928                         $result = "</" . $this->mLastSection  . ">\n";
 929                 }
 930                 $this->mInPre = false;
 931                 $this->mLastSection = "";
 932                 return $result;
 933         }
 934         # getCommon() returns the length of the longest common substring
 935         # of both arguments, starting at the beginning of both.
 936         #
 937         /* private */ function getCommon( $st1, $st2 )
 938         {
 939                 $fl = strlen( $st1 );
 940                 $shorter = strlen( $st2 );
 941                 if ( $fl < $shorter ) { $shorter = $fl; }
 942
 943                 for ( $i = 0; $i < $shorter; ++$i ) {
 944                         if ( $st1{$i} != $st2{$i} ) { break; }
 945                 }
 946                 return $i;
 947         }
 948         # These next three functions open, continue, and close the list
 949         # element appropriate to the prefix character passed into them.
 950         #
 951         /* private */ function openList( $char )
 952     {
 953                 $result = $this->closeParagraph();
 954
 955                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 956                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 957                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 958                 else if ( ";" == $char ) {
 959                         $result .= "<dl><dt>";
 960                         $this->mDTopen = true;
 961                 }
 962                 else { $result = "<!-- ERR 1 -->"; }
 963
 964                 return $result;
 965         }
 966
 967         /* private */ function nextItem( $char )
 968         {
 969                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 970                 else if ( ":" == $char || ";" == $char ) {
 971                         $close = "</dd>";
 972                         if ( $this->mDTopen ) { $close = "</dt>"; }
 973                         if ( ";" == $char ) {
 974                                 $this->mDTopen = true;
 975                                 return $close . "<dt>";
 976                         } else {
 977                                 $this->mDTopen = false;
 978                                 return $close . "<dd>";
 979                         }
 980                 }
 981                 return "<!-- ERR 2 -->";
 982         }
 983
 984         /* private */function closeList( $char )
 985         {
 986                 if ( "*" == $char ) { $text = "</li></ul>"; }
 987                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 988                 else if ( ":" == $char ) {
 989                         if ( $this->mDTopen ) {
 990                                 $this->mDTopen = false;
 991                                 $text = "</dt></dl>";
 992                         } else {
 993                                 $text = "</dd></dl>";
 994                         }
 995                 }
 996                 else {  return "<!-- ERR 3 -->"; }
 997                 return $text."\n";
 998         }
 999
1000         /* private */ function doBlockLevels( $text, $linestart )
1001         {
1002                 $fname = "Parser::doBlockLevels";
1003                 wfProfileIn( $fname );
1004                 # Parsing through the text line by line.  The main thing
1005                 # happening here is handling of block-level elements p, pre,
1006                 # and making lists from lines starting with * # : etc.
1007                 #
1008                 $a = explode( "\n", $text );
1009
1010                 $lastPref = $text = $lastLine = '';
1011                 $this->mDTopen = $inBlockElem = false;
1012                 $npl = 0;
1013                 $pstack = false;
1014
1015                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1016                 foreach ( $a as $t ) {
1017                         $oLine = $t;
1018                         $opl = strlen( $lastPref );
1019                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1020                         $preOpenMatch = preg_match("/<pre/i", $t );
1021                         if (!$this->mInPre) {
1022                                 $this->mInPre = !empty($preOpenMatch);
1023                         }
1024                         if ( !$this->mInPre ) {
1025                                 $npl = strspn( $t, "*#:;" );
1026                                 $pref = substr( $t, 0, $npl );
1027                                 $pref2 = str_replace( ";", ":", $pref );
1028                                 $t = substr( $t, $npl );
1029                         } else {
1030                                 $npl = 0;
1031                                 $pref = $pref2 = '';
1032                         }
1033
1034                         // list generation
1035                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1036                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1037                                 if ( $pstack ) { $pstack = false; }
1038
1039                                 if ( ";" == substr( $pref, -1 ) ) {
1040                                         $cpos = strpos( $t, ":" );
1041                                         if ( false !== $cpos ) {
1042                                                 $term = substr( $t, 0, $cpos );
1043                                                 $text .= $term . $this->nextItem( ":" );
1044                                                 $t = substr( $t, $cpos + 1 );
1045                                         }
1046                                 }
1047                         } else if (0 != $npl || 0 != $opl) {
1048                                 $cpl = $this->getCommon( $pref, $lastPref );
1049                                 if ( $pstack ) { $pstack = false; }
1050
1051                                 while ( $cpl < $opl ) {
1052                                         $text .= $this->closeList( $lastPref{$opl-1} );
1053                                         --$opl;
1054                                 }
1055                                 if ( $npl <= $cpl && $cpl > 0 ) {
1056                                         $text .= $this->nextItem( $pref{$cpl-1} );
1057                                 }
1058                                 while ( $npl > $cpl ) {
1059                                         $char = substr( $pref, $cpl, 1 );
1060                                         $text .= $this->openList( $char );
1061
1062                                         if ( ";" == $char ) {
1063                                                 $cpos = strpos( $t, ":" );
1064                                                 if ( ! ( false === $cpos ) ) {
1065                                                         $term = substr( $t, 0, $cpos );
1066                                                         $text .= $term . $this->nextItem( ":" );
1067                                                         $t = substr( $t, $cpos + 1 );
1068                                                 }
1069                                         }
1070                                         ++$cpl;
1071                                 }
1072                                 $lastPref = $pref2;
1073                         }
1074                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1075                                 $uniq_prefix = UNIQ_PREFIX;
1076                                 // XXX: use a stack for nestable elements like span, table and div
1077                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1078                                 $closematch = preg_match(
1079                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1080                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1081                                 if ( $openmatch or $closematch ) {
1082                                         if ( $pstack ) { $pstack = false; }
1083                                         $text .= $this->closeParagraph();
1084                                         if($preOpenMatch and !$preCloseMatch) {
1085                                                 $this->mInPre = true;
1086                                         }
1087                                         if ( $closematch  ) {
1088                                                 $inBlockElem = false;
1089                                         } else {
1090                                                 $inBlockElem = true;
1091                                         }
1092                                 } else if ( !$inBlockElem ) {
1093                                         if ( " " == $t{0} ) {
1094                                                 // pre
1095                                                 if ($this->mLastSection != 'pre') {
1096                                                         $pstack = false;
1097                                                         $text .= $this->closeParagraph().'<pre>';
1098                                                         $this->mLastSection = 'pre';
1099                                                 }
1100                                         } else {
1101                                                 // paragraph
1102                                                 if ( '' == trim($t) ) {
1103                                                         if ( $pstack ) {
1104                                                                 $text .= $pstack.'<br/>';
1105                                                                 $pstack = false;
1106                                                                 $this->mLastSection = 'p';
1107                                                         } else {
1108                                                                 if ($this->mLastSection != 'p' ) {
1109                                                                         $text .= $this->closeParagraph();
1110                                                                         $this->mLastSection = '';
1111                                                                         $pstack = "<p>";
1112                                                                 } else {
1113                                                                         $pstack = '</p><p>';
1114                                                                 }
1115                                                         }
1116                                                 } else {
1117                                                         if ( $pstack ) {
1118                                                                 $text .= $pstack;
1119                                                                 $pstack = false;
1120                                                                 $this->mLastSection = 'p';
1121                                                         } else if ($this->mLastSection != 'p') {
1122                                                                 $text .= $this->closeParagraph().'<p>';
1123                                                                 $this->mLastSection = 'p';
1124                                                         }
1125                                                 }
1126                                         }
1127                                 }
1128                         }
1129                         if ($pstack === false) {
1130                                 $text .= $t."\n";
1131                         }
1132                 }
1133                 while ( $npl ) {
1134                         $text .= $this->closeList( $pref2{$npl-1} );
1135                         --$npl;
1136                 }
1137                 if ( "" != $this->mLastSection ) {
1138                         $text .= "</" . $this->mLastSection . ">";
1139                         $this->mLastSection = "";
1140                 }
1141
1142                 wfProfileOut( $fname );
1143                 return $text;
1144         }
1145
1146         function getVariableValue( $index ) {
1147                 global $wgLang, $wgSitename, $wgServer;
1148
1149                 switch ( $index ) {
1150                         case MAG_CURRENTMONTH:
1151                                 return date( "m" );
1152                         case MAG_CURRENTMONTHNAME:
1153                                 return $wgLang->getMonthName( date("n") );
1154                         case MAG_CURRENTMONTHNAMEGEN:
1155                                 return $wgLang->getMonthNameGen( date("n") );
1156                         case MAG_CURRENTDAY:
1157                                 return date("j");
1158                         case MAG_CURRENTDAYNAME:
1159                                 return $wgLang->getWeekdayName( date("w")+1 );
1160                         case MAG_CURRENTYEAR:
1161                                 return date( "Y" );
1162                         case MAG_CURRENTTIME:
1163                                 return $wgLang->time( wfTimestampNow(), false );
1164                         case MAG_NUMBEROFARTICLES:
1165                                 return wfNumberOfArticles();
1166                         case MAG_SITENAME:
1167                                 return $wgSitename;
1168                         case MAG_SERVER:
1169                                 return $wgServer;
1170                         default:
1171                                 return NULL;
1172                 }
1173         }
1174
1175         function initialiseVariables()
1176         {
1177                 global $wgVariableIDs;
1178                 $this->mVariables = array();
1179                 foreach ( $wgVariableIDs as $id ) {
1180                         $mw =& MagicWord::get( $id );
1181                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1182                 }
1183         }
1184
1185         /* private */ function replaceVariables( $text, $args = array() )
1186         {
1187                 global $wgLang, $wgScript, $wgArticlePath;
1188
1189                 $fname = "Parser::replaceVariables";
1190                 wfProfileIn( $fname );
1191
1192                 $bail = false;
1193                 if ( !$this->mVariables ) {
1194                         $this->initialiseVariables();
1195                 }
1196                 $titleChars = Title::legalChars();
1197                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1198
1199                 # This function is called recursively. To keep track of arguments we need a stack:
1200                 array_push( $this->mArgStack, $args );
1201
1202                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1203                 $GLOBALS['wgCurParser'] =& $this;
1204                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1205
1206                 array_pop( $this->mArgStack );
1207
1208                 return $text;
1209         }
1210
1211         function braceSubstitution( $matches )
1212         {
1213                 global $wgLinkCache, $wgLang;
1214                 $fname = "Parser::braceSubstitution";
1215                 $found = false;
1216                 $nowiki = false;
1217                 $title = NULL;
1218
1219                 # $newline is an optional newline character before the braces
1220                 # $part1 is the bit before the first |, and must contain only title characters
1221                 # $args is a list of arguments, starting from index 0, not including $part1
1222
1223                 $newline = $matches[1];
1224                 $part1 = $matches[2];
1225                 # If the third subpattern matched anything, it will start with |
1226                 if ( $matches[3] !== "" ) {
1227                         $args = explode( "|", substr( $matches[3], 1 ) );
1228                 } else {
1229                         $args = array();
1230                 }
1231                 $argc = count( $args );
1232
1233                 # SUBST
1234                 $mwSubst =& MagicWord::get( MAG_SUBST );
1235                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1236                         if ( $this->mOutputType != OT_WIKI ) {
1237                                 # Invalid SUBST not replaced at PST time
1238                                 # Return without further processing
1239                                 $text = $matches[0];
1240                                 $found = true;
1241                         }
1242                 } elseif ( $this->mOutputType == OT_WIKI ) {
1243                         # SUBST not found in PST pass, do nothing
1244                         $text = $matches[0];
1245                         $found = true;
1246                 }
1247
1248                 # MSG, MSGNW and INT
1249                 if ( !$found ) {
1250                         # Check for MSGNW:
1251                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1252                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1253                                 $nowiki = true;
1254                         } else {
1255                                 # Remove obsolete MSG:
1256                                 $mwMsg =& MagicWord::get( MAG_MSG );
1257                                 $mwMsg->matchStartAndRemove( $part1 );
1258                         }
1259
1260                         # Check if it is an internal message
1261                         $mwInt =& MagicWord::get( MAG_INT );
1262                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1263                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1264                                         $text = wfMsgReal( $part1, $args, true );
1265                                         $found = true;
1266                                 }
1267                         }
1268                 }
1269
1270                 # NS
1271                 if ( !$found ) {
1272                         # Check for NS: (namespace expansion)
1273                         $mwNs = MagicWord::get( MAG_NS );
1274                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1275                                 if ( intval( $part1 ) ) {
1276                                         $text = $wgLang->getNsText( intval( $part1 ) );
1277                                         $found = true;
1278                                 } else {
1279                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1280                                         if ( !is_null( $index ) ) {
1281                                                 $text = $wgLang->getNsText( $index );
1282                                                 $found = true;
1283                                         }
1284                                 }
1285                         }
1286                 }
1287
1288                 # LOCALURL and LOCALURLE
1289                 if ( !$found ) {
1290                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1291                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1292
1293                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1294                                 $func = 'getLocalURL';
1295                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1296                                 $func = 'escapeLocalURL';
1297                         } else {
1298                                 $func = '';
1299                         }
1300
1301                         if ( $func !== '' ) {
1302                                 $title = Title::newFromText( $part1 );
1303                                 if ( !is_null( $title ) ) {
1304                                         if ( $argc > 0 ) {
1305                                                 $text = $title->$func( $args[0] );
1306                                         } else {
1307                                                 $text = $title->$func();
1308                                         }
1309                                         $found = true;
1310                                 }
1311                         }
1312                 }
1313
1314                 # Internal variables
1315                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1316                         $text = $this->mVariables[$part1];
1317                         $found = true;
1318                         $this->mOutput->mContainsOldMagic = true;
1319                 }
1320
1321                 # Arguments input from the caller
1322                 $inputArgs = end( $this->mArgStack );
1323                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1324                         $text = $inputArgs[$part1];
1325                         $found = true;
1326                 }
1327
1328                 # Load from database
1329                 if ( !$found ) {
1330                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1331                         if ( !is_null( $title ) && !$title->isExternal() ) {
1332                                 # Check for excessive inclusion
1333                                 $dbk = $title->getPrefixedDBkey();
1334                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1335                                         $article = new Article( $title );
1336                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1337                                         if ( $articleContent !== false ) {
1338                                                 $found = true;
1339                                                 $text = $articleContent;
1340
1341                                         }
1342                                 }
1343
1344                                 # If the title is valid but undisplayable, make a link to it
1345                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1346                                         $text = "[[" . $title->getPrefixedText() . "]]";
1347                                         $found = true;
1348                                 }
1349                         }
1350                 }
1351
1352                 # Recursive parsing, escaping and link table handling
1353                 # Only for HTML output
1354                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1355                         $text = wfEscapeWikiText( $text );
1356                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1357                         # Clean up argument array
1358                         $assocArgs = array();
1359                         $index = 1;
1360                         foreach( $args as $arg ) {
1361                                 $eqpos = strpos( $arg, "=" );
1362                                 if ( $eqpos === false ) {
1363                                         $assocArgs[$index++] = $arg;
1364                                 } else {
1365                                         $name = trim( substr( $arg, 0, $eqpos ) );
1366                                         $value = trim( substr( $arg, $eqpos+1 ) );
1367                                         if ( $value === false ) {
1368                                                 $value = "";
1369                                         }
1370                                         if ( $name !== false ) {
1371                                                 $assocArgs[$name] = $value;
1372                                         }
1373                                 }
1374                         }
1375
1376                         # Do not enter included links in link table
1377                         if ( !is_null( $title ) ) {
1378                                 $wgLinkCache->suspend();
1379                         }
1380
1381                         # Run full parser on the included text
1382                         $text = $this->strip( $text, $this->mStripState );
1383                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1384
1385                         # Add the result to the strip state for re-inclusion after
1386                         # the rest of the processing
1387                         $text = $this->insertStripItem( $text, $this->mStripState );
1388
1389                         # Resume the link cache and register the inclusion as a link
1390                         if ( !is_null( $title ) ) {
1391                                 $wgLinkCache->resume();
1392                                 $wgLinkCache->addLinkObj( $title );
1393                         }
1394                 }
1395
1396                 if ( !$found ) {
1397                         return $matches[0];
1398                 } else {
1399                         return $newline . $text;
1400                 }
1401         }
1402
1403         # Returns true if the function is allowed to include this entity
1404         function incrementIncludeCount( $dbk )
1405         {
1406                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1407                         $this->mIncludeCount[$dbk] = 0;
1408                 }
1409                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1410                         return true;
1411                 } else {
1412                         return false;
1413                 }
1414         }
1415
1416
1417         # Cleans up HTML, removes dangerous tags and attributes
1418         /* private */ function removeHTMLtags( $text )
1419         {
1420                 $fname = "Parser::removeHTMLtags";
1421                 wfProfileIn( $fname );
1422                 $htmlpairs = array( # Tags that must be closed
1423                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1424                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1425                         "strike", "strong", "tt", "var", "div", "center",
1426                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1427                         "ruby", "rt" , "rb" , "rp", "p"
1428                 );
1429                 $htmlsingle = array(
1430                         "br", "hr", "li", "dt", "dd"
1431                 );
1432                 $htmlnest = array( # Tags that can be nested--??
1433                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1434                         "dl", "font", "big", "small", "sub", "sup"
1435                 );
1436                 $tabletags = array( # Can only appear inside table
1437                         "td", "th", "tr"
1438                 );
1439
1440                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1441                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1442
1443                 $htmlattrs = $this->getHTMLattrs () ;
1444
1445                 # Remove HTML comments
1446                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1447
1448                 $bits = explode( "<", $text );
1449                 $text = array_shift( $bits );
1450                 $tagstack = array(); $tablestack = array();
1451
1452                 foreach ( $bits as $x ) {
1453                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1454                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1455                           $x, $regs );
1456                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1457                         error_reporting( $prev );
1458
1459                         $badtag = 0 ;
1460                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1461                                 # Check our stack
1462                                 if ( $slash ) {
1463                                         # Closing a tag...
1464                                         if ( ! in_array( $t, $htmlsingle ) &&
1465                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1466                                                 array_push( $tagstack, $ot );
1467                                                 $badtag = 1;
1468                                         } else {
1469                                                 if ( $t == "table" ) {
1470                                                         $tagstack = array_pop( $tablestack );
1471                                                 }
1472                                                 $newparams = "";
1473                                         }
1474                                 } else {
1475                                         # Keep track for later
1476                                         if ( in_array( $t, $tabletags ) &&
1477                                           ! in_array( "table", $tagstack ) ) {
1478                                                 $badtag = 1;
1479                                         } else if ( in_array( $t, $tagstack ) &&
1480                                           ! in_array ( $t , $htmlnest ) ) {
1481                                                 $badtag = 1 ;
1482                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1483                                                 if ( $t == "table" ) {
1484                                                         array_push( $tablestack, $tagstack );
1485                                                         $tagstack = array();
1486                                                 }
1487                                                 array_push( $tagstack, $t );
1488                                         }
1489                                         # Strip non-approved attributes from the tag
1490                                         $newparams = $this->fixTagAttributes($params);
1491
1492                                 }
1493                                 if ( ! $badtag ) {
1494                                         $rest = str_replace( ">", "&gt;", $rest );
1495                                         $text .= "<$slash$t $newparams$brace$rest";
1496                                         continue;
1497                                 }
1498                         }
1499                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1500                 }
1501                 # Close off any remaining tags
1502                 while ( $t = array_pop( $tagstack ) ) {
1503                         $text .= "</$t>\n";
1504                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1505                 }
1506                 wfProfileOut( $fname );
1507                 return $text;
1508         }
1509
1510 /*
1511  *
1512  * This function accomplishes several tasks:
1513  * 1) Auto-number headings if that option is enabled
1514  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1515  * 3) Add a Table of contents on the top for users who have enabled the option
1516  * 4) Auto-anchor headings
1517  *
1518  * It loops through all headlines, collects the necessary data, then splits up the
1519  * string and re-inserts the newly formatted headlines.
1520  *
1521  */
1522
1523         /* private */ function formatHeadings( $text )
1524         {
1525                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1526                 $doShowToc = $this->mOptions->getShowToc();
1527                 if( !$this->mTitle->userCanEdit() ) {
1528                         $showEditLink = 0;
1529                         $rightClickHack = 0;
1530                 } else {
1531                         $showEditLink = $this->mOptions->getEditSection();
1532                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1533                 }
1534
1535                 # Inhibit editsection links if requested in the page
1536                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1537                 if( $esw->matchAndRemove( $text ) ) {
1538                         $showEditLink = 0;
1539                 }
1540                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1541                 # do not add TOC
1542                 $mw =& MagicWord::get( MAG_NOTOC );
1543                 if( $mw->matchAndRemove( $text ) ) {
1544                         $doShowToc = 0;
1545                 }
1546
1547                 # never add the TOC to the Main Page. This is an entry page that should not
1548                 # be more than 1-2 screens large anyway
1549                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1550                         $doShowToc = 0;
1551                 }
1552
1553                 # Get all headlines for numbering them and adding funky stuff like [edit]
1554                 # links - this is for later, but we need the number of headlines right now
1555                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1556
1557                 # if there are fewer than 4 headlines in the article, do not show TOC
1558                 if( $numMatches < 4 ) {
1559                         $doShowToc = 0;
1560                 }
1561
1562                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1563                 # override above conditions and always show TOC
1564                 $mw =& MagicWord::get( MAG_FORCETOC );
1565                 if ($mw->matchAndRemove( $text ) ) {
1566                         $doShowToc = 1;
1567                 }
1568
1569
1570                 # We need this to perform operations on the HTML
1571                 $sk =& $this->mOptions->getSkin();
1572
1573                 # headline counter
1574                 $headlineCount = 0;
1575
1576                 # Ugh .. the TOC should have neat indentation levels which can be
1577                 # passed to the skin functions. These are determined here
1578                 $toclevel = 0;
1579                 $toc = "";
1580                 $full = "";
1581                 $head = array();
1582                 $sublevelCount = array();
1583                 $level = 0;
1584                 $prevlevel = 0;
1585                 foreach( $matches[3] as $headline ) {
1586                         $numbering = "";
1587                         if( $level ) {
1588                                 $prevlevel = $level;
1589                         }
1590                         $level = $matches[1][$headlineCount];
1591                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1592                                 # reset when we enter a new level
1593                                 $sublevelCount[$level] = 0;
1594                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1595                                 $toclevel += $level - $prevlevel;
1596                         }
1597                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1598                                 # reset when we step back a level
1599                                 $sublevelCount[$level+1]=0;
1600                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1601                                 $toclevel -= $prevlevel - $level;
1602                         }
1603                         # count number of headlines for each level
1604                         @$sublevelCount[$level]++;
1605                         if( $doNumberHeadings || $doShowToc ) {
1606                                 $dot = 0;
1607                                 for( $i = 1; $i <= $level; $i++ ) {
1608                                         if( !empty( $sublevelCount[$i] ) ) {
1609                                                 if( $dot ) {
1610                                                         $numbering .= ".";
1611                                                 }
1612                                                 $numbering .= $sublevelCount[$i];
1613                                                 $dot = 1;
1614                                         }
1615                                 }
1616                         }
1617
1618                         # The canonized header is a version of the header text safe to use for links
1619                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1620                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1621
1622                         # strip out HTML
1623                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1624                         $tocline = trim( $canonized_headline );
1625                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1626                         $refer[$headlineCount] = $canonized_headline;
1627
1628                         # count how many in assoc. array so we can track dupes in anchors
1629                         @$refers[$canonized_headline]++;
1630                         $refcount[$headlineCount]=$refers[$canonized_headline];
1631
1632                         # Prepend the number to the heading text
1633
1634                         if( $doNumberHeadings || $doShowToc ) {
1635                                 $tocline = $numbering . " " . $tocline;
1636
1637                                 # Don't number the heading if it is the only one (looks silly)
1638                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1639                                         # the two are different if the line contains a link
1640                                         $headline=$numbering . " " . $headline;
1641                                 }
1642                         }
1643
1644                         # Create the anchor for linking from the TOC to the section
1645                         $anchor = $canonized_headline;
1646                         if($refcount[$headlineCount] > 1 ) {
1647                                 $anchor .= "_" . $refcount[$headlineCount];
1648                         }
1649                         if( $doShowToc ) {
1650                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1651                         }
1652                         if( $showEditLink ) {
1653                                 if ( empty( $head[$headlineCount] ) ) {
1654                                         $head[$headlineCount] = "";
1655                                 }
1656                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1657                         }
1658
1659                         # Add the edit section span
1660                         if( $rightClickHack ) {
1661                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1662                         }
1663
1664                         # give headline the correct <h#> tag
1665                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1666
1667                         $headlineCount++;
1668                 }
1669
1670                 if( $doShowToc ) {
1671                         $toclines = $headlineCount;
1672                         $toc .= $sk->tocUnindent( $toclevel );
1673                         $toc = $sk->tocTable( $toc );
1674                 }
1675
1676                 # split up and insert constructed headlines
1677
1678                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1679                 $i = 0;
1680
1681                 foreach( $blocks as $block ) {
1682                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1683                             # This is the [edit] link that appears for the top block of text when
1684                                 # section editing is enabled
1685
1686                                 # Disabled because it broke block formatting
1687                                 # For example, a bullet point in the top line
1688                                 # $full .= $sk->editSectionLink(0);
1689                         }
1690                         $full .= $block;
1691                         if( $doShowToc && !$i) {
1692                         # Top anchor now in skin
1693                                 $full = $full.$toc;
1694                         }
1695
1696                         if( !empty( $head[$i] ) ) {
1697                                 $full .= $head[$i];
1698                         }
1699                         $i++;
1700                 }
1701
1702                 return $full;
1703         }
1704
1705         /* private */ function doMagicISBN( &$tokenizer )
1706         {
1707                 global $wgLang;
1708
1709                 # Check whether next token is a text token
1710                 # If yes, fetch it and convert the text into a
1711                 # Special::BookSources link
1712                 $token = $tokenizer->previewToken();
1713                 while ( $token["type"] == "" )
1714                 {
1715                         $tokenizer->nextToken();
1716                         $token = $tokenizer->previewToken();
1717                 }
1718                 if ( $token["type"] == "text" )
1719                 {
1720                         $token = $tokenizer->nextToken();
1721                         $x = $token["text"];
1722                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1723
1724                         $isbn = $blank = "" ;
1725                         while ( " " == $x{0} ) {
1726                                 $blank .= " ";
1727                                 $x = substr( $x, 1 );
1728                         }
1729                         while ( strstr( $valid, $x{0} ) != false ) {
1730                                 $isbn .= $x{0};
1731                                 $x = substr( $x, 1 );
1732                         }
1733                         $num = str_replace( "-", "", $isbn );
1734                         $num = str_replace( " ", "", $num );
1735
1736                         if ( "" == $num ) {
1737                                 $text = "ISBN $blank$x";
1738                         } else {
1739                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1740                                 $text = "<a href=\"" .
1741                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1742                                         "\" class=\"internal\">ISBN $isbn</a>";
1743                                 $text .= $x;
1744                         }
1745                 } else {
1746                         $text = "ISBN ";
1747                 }
1748                 return $text;
1749         }
1750         /* private */ function doMagicRFC( &$tokenizer )
1751         {
1752                 global $wgLang;
1753
1754                 # Check whether next token is a text token
1755                 # If yes, fetch it and convert the text into a
1756                 # link to an RFC source
1757                 $token = $tokenizer->previewToken();
1758                 while ( $token["type"] == "" )
1759                 {
1760                         $tokenizer->nextToken();
1761                         $token = $tokenizer->previewToken();
1762                 }
1763                 if ( $token["type"] == "text" )
1764                 {
1765                         $token = $tokenizer->nextToken();
1766                         $x = $token["text"];
1767                         $valid = "0123456789";
1768
1769                         $rfc = $blank = "" ;
1770                         while ( " " == $x{0} ) {
1771                                 $blank .= " ";
1772                                 $x = substr( $x, 1 );
1773                         }
1774                         while ( strstr( $valid, $x{0} ) != false ) {
1775                                 $rfc .= $x{0};
1776                                 $x = substr( $x, 1 );
1777                         }
1778
1779                         if ( "" == $rfc ) {
1780                                 $text .= "RFC $blank$x";
1781                         } else {
1782                                 $url = wfmsg( "rfcurl" );
1783                                 $url = str_replace( "$1", $rfc, $url);
1784                                 $sk =& $this->mOptions->getSkin();
1785                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1786                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1787                         }
1788                 } else {
1789                         $text = "RFC ";
1790                 }
1791                 return $text;
1792         }
1793
1794         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1795         {
1796                 $this->mOptions = $options;
1797                 $this->mTitle =& $title;
1798                 $this->mOutputType = OT_WIKI;
1799
1800                 if ( $clearState ) {
1801                         $this->clearState();
1802                 }
1803
1804                 $stripState = false;
1805                 $pairs = array(
1806                         "\r\n" => "\n",
1807                         );
1808                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1809                 // now with regexes
1810                 $pairs = array(
1811                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1812                         "/<br *?>/i" => "<br/>",
1813                 );
1814                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1815                 $text = $this->strip( $text, $stripState, false );
1816                 $text = $this->pstPass2( $text, $user );
1817                 $text = $this->unstrip( $text, $stripState );
1818                 return $text;
1819         }
1820
1821         /* private */ function pstPass2( $text, &$user )
1822         {
1823                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1824
1825                 # Variable replacement
1826                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1827                 $text = $this->replaceVariables( $text );
1828
1829                 # Signatures
1830                 #
1831                 $n = $user->getName();
1832                 $k = $user->getOption( "nickname" );
1833                 if ( "" == $k ) { $k = $n; }
1834                 if(isset($wgLocaltimezone)) {
1835                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1836                 }
1837                 /* Note: this is an ugly timezone hack for the European wikis */
1838                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1839                   " (" . date( "T" ) . ")";
1840                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1841
1842                 $text = preg_replace( "/~~~~~/", $d, $text );
1843                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1844                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1845                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1846                   Namespace::getUser() ) . ":$n|$k]]", $text );
1847
1848                 # Context links: [[|name]] and [[name (context)|]]
1849                 #
1850                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1851                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1852                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1853                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1854
1855                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1856                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1857                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1858                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1859                                                                                                                 # [[ns:page (cont)|]]
1860                 $context = "";
1861                 $t = $this->mTitle->getText();
1862                 if ( preg_match( $conpat, $t, $m ) ) {
1863                         $context = $m[2];
1864                 }
1865                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1866                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1867                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1868
1869                 if ( "" == $context ) {
1870                         $text = preg_replace( $p2, "[[\\1]]", $text );
1871                 } else {
1872                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1873                 }
1874
1875                 /*
1876                 $mw =& MagicWord::get( MAG_SUBST );
1877                 $wgCurParser = $this->fork();
1878                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1879                 $this->merge( $wgCurParser );
1880                 */
1881
1882                 # Trim trailing whitespace
1883                 # MAG_END (__END__) tag allows for trailing
1884                 # whitespace to be deliberately included
1885                 $text = rtrim( $text );
1886                 $mw =& MagicWord::get( MAG_END );
1887                 $mw->matchAndRemove( $text );
1888
1889                 return $text;
1890         }
1891
1892         # Set up some variables which are usually set up in parse()
1893         # so that an external function can call some class members with confidence
1894         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1895         {
1896                 $this->mTitle =& $title;
1897                 $this->mOptions = $options;
1898                 $this->mOutputType = $outputType;
1899                 if ( $clearState ) {
1900                         $this->clearState();
1901                 }
1902         }
1903
1904         function transformMsg( $text, $options ) {
1905                 global $wgTitle;
1906                 static $executing = false;
1907
1908                 # Guard against infinite recursion
1909                 if ( $executing ) {
1910                         return $text;
1911                 }
1912                 $executing = true;
1913
1914                 $this->mTitle = $wgTitle;
1915                 $this->mOptions = $options;
1916                 $this->mOutputType = OT_MSG;
1917                 $this->clearState();
1918                 $text = $this->replaceVariables( $text );
1919
1920                 $executing = false;
1921                 return $text;
1922         }
1923 }
1924
1925 class ParserOutput
1926 {
1927         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1928
1929         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1930                 $containsOldMagic = false )
1931         {
1932                 $this->mText = $text;
1933                 $this->mLanguageLinks = $languageLinks;
1934                 $this->mCategoryLinks = $categoryLinks;
1935                 $this->mContainsOldMagic = $containsOldMagic;
1936         }
1937
1938         function getText() { return $this->mText; }
1939         function getLanguageLinks() { return $this->mLanguageLinks; }
1940         function getCategoryLinks() { return $this->mCategoryLinks; }
1941         function containsOldMagic() { return $this->mContainsOldMagic; }
1942         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1943         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1944         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1945         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1946
1947         function merge( $other ) {
1948                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1949                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1950                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1951         }
1952
1953 }
1954
1955 class ParserOptions
1956 {
1957         # All variables are private
1958         var $mUseTeX;                    # Use texvc to expand <math> tags
1959         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1960         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1961         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1962         var $mAllowExternalImages;       # Allow external images inline
1963         var $mSkin;                      # Reference to the preferred skin
1964         var $mDateFormat;                # Date format index
1965         var $mEditSection;               # Create "edit section" links
1966         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1967         var $mNumberHeadings;            # Automatically number headings
1968         var $mShowToc;                   # Show table of contents
1969
1970         function getUseTeX() { return $this->mUseTeX; }
1971         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1972         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1973         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1974         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1975         function getSkin() { return $this->mSkin; }
1976         function getDateFormat() { return $this->mDateFormat; }
1977         function getEditSection() { return $this->mEditSection; }
1978         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1979         function getNumberHeadings() { return $this->mNumberHeadings; }
1980         function getShowToc() { return $this->mShowToc; }
1981
1982         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1983         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1984         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1985         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1986         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1987         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1988         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1989         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1990         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1991         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1992         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1993
1994         /* static */ function newFromUser( &$user )
1995         {
1996                 $popts = new ParserOptions;
1997                 $popts->initialiseFromUser( $user );
1998                 return $popts;
1999         }
2000
2001         function initialiseFromUser( &$userInput )
2002         {
2003                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2004
2005                 if ( !$userInput ) {
2006                         $user = new User;
2007                         $user->setLoaded( true );
2008                 } else {
2009                         $user =& $userInput;
2010                 }
2011
2012                 $this->mUseTeX = $wgUseTeX;
2013                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2014                 $this->mUseDynamicDates = $wgUseDynamicDates;
2015                 $this->mInterwikiMagic = $wgInterwikiMagic;
2016                 $this->mAllowExternalImages = $wgAllowExternalImages;
2017                 $this->mSkin =& $user->getSkin();
2018                 $this->mDateFormat = $user->getOption( "date" );
2019                 $this->mEditSection = $user->getOption( "editsection" );
2020                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2021                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2022                 $this->mShowToc = $user->getOption( "showtoc" );
2023         }
2024
2025
2026 }
2027
2028 # Regex callbacks, used in Parser::replaceVariables
2029 function wfBraceSubstitution( $matches )
2030 {
2031         global $wgCurParser;
2032         return $wgCurParser->braceSubstitution( $matches );
2033 }
2034
2035 ?>