includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 # prefix for escaping, used in two functions at least
  47 define( "UNIQ_PREFIX", "NaodW29");
  48
  49 class Parser
  50 {
  51         # Cleared with clearState():
  52         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  53         var $mVariables, $mIncludeCount;
  54
  55         # Temporary:
  56         var $mOptions, $mTitle, $mOutputType;
  57
  58         function Parser()
  59         {
  60                 $this->clearState();
  61         }
  62
  63         function clearState()
  64         {
  65                 $this->mOutput = new ParserOutput;
  66                 $this->mAutonumber = 0;
  67                 $this->mLastSection = "";
  68                 $this->mDTopen = false;
  69                 $this->mVariables = false;
  70                 $this->mIncludeCount = array();
  71                 $this->mStripState = array();
  72         }
  73
  74         # First pass--just handle <nowiki> sections, pass the rest off
  75         # to doWikiPass2() which does all the real work.
  76         #
  77         # Returns a ParserOutput
  78         #
  79         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  80         {
  81                 $fname = "Parser::parse";
  82                 wfProfileIn( $fname );
  83
  84                 if ( $clearState ) {
  85                         $this->clearState();
  86                 }
  87
  88                 $this->mOptions = $options;
  89                 $this->mTitle =& $title;
  90                 $this->mOutputType = OT_HTML;
  91
  92                 $stripState = NULL;
  93                 $text = $this->strip( $text, $this->mStripState );
  94                 $text = $this->doWikiPass2( $text, $linestart );
  95                 $text = $this->unstrip( $text, $this->mStripState );
  96                 # Clean up special characters
  97                 $fixtags = array(
  98                         "/<hr *>/i" => '<hr/>',
  99                         "/<br *>/i" => '<br/>',
 100                         "/<center *>/i"=>'<span style="text-align:center;">',
 101                         "/<\\/center *>/i" => '</span>',
 102                         # Clean up spare ampersands; note that we probably ought to be
 103                         # more careful about named entities.
 104                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 105                 );
 106                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 107
 108                 $this->mOutput->setText( $text );
 109                 wfProfileOut( $fname );
 110                 return $this->mOutput;
 111         }
 112
 113         /* static */ function getRandomString()
 114         {
 115                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 116         }
 117
 118         # Replaces all occurences of <$tag>content</$tag> in the text
 119         # with a random marker and returns the new text. the output parameter
 120         # $content will be an associative array filled with data on the form
 121         # $unique_marker => content.
 122
 123         # If $content is already set, the additional entries will be appended
 124
 125         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 126                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 127                 if ( !$content ) {
 128                         $content = array( );
 129                 }
 130                 $n = 1;
 131                 $stripped = "";
 132
 133                 while ( "" != $text ) {
 134                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 135                         $stripped .= $p[0];
 136                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 137                                 $text = "";
 138                         } else {
 139                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 140                                 $marker = $rnd . sprintf("%08X", $n++);
 141                                 $content[$marker] = $q[0];
 142                                 $stripped .= $marker;
 143                                 $text = $q[1];
 144                         }
 145                 }
 146                 return $stripped;
 147         }
 148
 149         # Strips <nowiki>, <pre> and <math>
 150         # Returns the text, and fills an array with data needed in unstrip()
 151         # If the $state is already a valid strip state, it adds to the state
 152         #
 153         function strip( $text, &$state )
 154         {
 155                 $render = ($this->mOutputType == OT_HTML);
 156                 if ( $state ) {
 157                         $nowiki_content = $state['nowiki'];
 158                         $hiero_content = $state['hiero'];
 159                         $math_content = $state['math'];
 160                         $pre_content = $state['pre'];
 161                         $item_content = $state['item'];
 162                 } else {
 163                         $nowiki_content = array();
 164                         $hiero_content = array();
 165                         $math_content = array();
 166                         $pre_content = array();
 167                         $item_content = array();
 168                 }
 169
 170                 # Replace any instances of the placeholders
 171                 $uniq_prefix = UNIQ_PREFIX;
 172                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 173
 174                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 175                 foreach( $nowiki_content as $marker => $content ){
 176                         if( $render ){
 177                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 178                         } else {
 179                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 180                         }
 181                 }
 182
 183                 if( $GLOBALS['wgUseWikiHiero'] ){
 184                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 185                         foreach( $hiero_content as $marker => $content ){
 186                                 if( $render ){
 187                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 188                                 } else {
 189                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 190                                 }
 191                         }
 192                 }
 193
 194                 if( $this->mOptions->getUseTeX() ){
 195                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 196                         foreach( $math_content as $marker => $content ){
 197                                 if( $render ){
 198                                         $math_content[$marker] = renderMath( $content );
 199                                 } else {
 200                                         $math_content[$marker] = "<math>$content</math>";
 201                                 }
 202                         }
 203                 }
 204
 205                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 206                 foreach( $pre_content as $marker => $content ){
 207                         if( $render ){
 208                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 209                         } else {
 210                                 $pre_content[$marker] = "<pre>$content</pre>";
 211                         }
 212                 }
 213
 214                 $state = array(
 215                   'nowiki' => $nowiki_content,
 216                   'hiero' => $hiero_content,
 217                   'math' => $math_content,
 218                   'pre' => $pre_content,
 219                   'item' => $item_content
 220                 );
 221                 return $text;
 222         }
 223
 224         function unstrip( $text, &$state )
 225         {
 226                 # Must expand in reverse order, otherwise nested tags will be corrupted
 227                 /*
 228                 $dicts = array( 'item', 'pre', 'math', 'hiero', 'nowiki' );
 229                 foreach ( $dicts as $dictName ) {
 230                         $content_dict = $state[$dictName];
 231                         foreach( $content_dict as $marker => $content ){
 232                                 $text = str_replace( $marker, $content, $text );
 233                         }
 234                 }*/
 235
 236                 $contentDict = end( $state );
 237                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 238                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 239                                 $text = str_replace( key( $contentDict ), $content, $text );
 240                         }
 241                 }
 242
 243                 return $text;
 244         }
 245
 246         # Add an item to the strip state
 247         # Returns the unique tag which must be inserted into the stripped text
 248         # The tag will be replaced with the original text in unstrip()
 249
 250         function insertStripItem( $text, &$state )
 251         {
 252                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 253                 if ( !$state ) {
 254                         $state = array(
 255                           'nowiki' => array(),
 256                           'hiero' => array(),
 257                           'math' => array(),
 258                           'pre' => array(),
 259                           'item' => array()
 260                         );
 261                 }
 262                 $state['item'][$rnd] = $text;
 263                 return $rnd;
 264         }
 265
 266         function categoryMagic ()
 267         {
 268                 global $wgLang , $wgUser ;
 269                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 270                 $id = $this->mTitle->getArticleID() ;
 271                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 272                 $ti = $this->mTitle->getText() ;
 273                 $ti = explode ( ":" , $ti , 2 ) ;
 274                 if ( $cat != $ti[0] ) return "" ;
 275                 $r = '<br style="clear:both;"/>\n';
 276
 277                 $articles = array() ;
 278                 $parents = array () ;
 279                 $children = array() ;
 280
 281
 282 #               $sk =& $this->mGetSkin();
 283                 $sk =& $wgUser->getSkin() ;
 284
 285                 $data = array () ;
 286                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 287                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 288
 289                 $res = wfQuery ( $sql1, DB_READ ) ;
 290                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 291
 292                 $res = wfQuery ( $sql2, DB_READ ) ;
 293                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 294
 295
 296                 foreach ( $data AS $x )
 297                 {
 298                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 299                         if ( $t != "" ) $t .= ":" ;
 300                         $t .= $x->cur_title ;
 301
 302                         $y = explode ( ":" , $t , 2 ) ;
 303                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 304                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 305                         } else {
 306                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 307                         }
 308                 }
 309                 wfFreeResult ( $res ) ;
 310
 311                 # Children
 312                 if ( count ( $children ) > 0 )
 313                 {
 314                         asort ( $children ) ;
 315                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 316                         $r .= implode ( ", " , $children ) ;
 317                 }
 318
 319                 # Articles
 320                 if ( count ( $articles ) > 0 )
 321                 {
 322                         asort ( $articles ) ;
 323                         $h =  wfMsg( "category_header", $ti[1] );
 324                         $r .= "<h2>{$h}</h2>\n" ;
 325                         $r .= implode ( ", " , $articles ) ;
 326                 }
 327
 328
 329                 return $r ;
 330         }
 331
 332         function getHTMLattrs ()
 333         {
 334                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 335                                 "title", "align", "lang", "dir", "width", "height",
 336                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 337                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 338                                 /* FONT */ "type", "start", "value", "compact",
 339                                 /* For various lists, mostly deprecated but safe */
 340                                 "summary", "width", "border", "frame", "rules",
 341                                 "cellspacing", "cellpadding", "valign", "char",
 342                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 343                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 344                                 "id", "class", "name", "style" /* For CSS */
 345                                 );
 346                 return $htmlattrs ;
 347         }
 348
 349         function fixTagAttributes ( $t )
 350         {
 351                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 352                 $htmlattrs = $this->getHTMLattrs() ;
 353
 354                 # Strip non-approved attributes from the tag
 355                 $t = preg_replace(
 356                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 357                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 358                         $t);
 359                 # Strip javascript "expression" from stylesheets. Brute force approach:
 360                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 361
 362                 if( preg_match(
 363                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 364                         wfMungeToUtf8( $t ) ) )
 365                 {
 366                         $t="";
 367                 }
 368
 369                 return trim ( $t ) ;
 370         }
 371
 372         function doTableStuff ( $t )
 373         {
 374                 $t = explode ( "\n" , $t ) ;
 375                 $td = array () ; # Is currently a td tag open?
 376                         $ltd = array () ; # Was it TD or TH?
 377                         $tr = array () ; # Is currently a tr tag open?
 378                         $ltr = array () ; # tr attributes
 379                         foreach ( $t AS $k => $x )
 380                         {
 381                                 $x = rtrim ( $x ) ;
 382                                 $fc = substr ( $x , 0 , 1 ) ;
 383                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 384                                 {
 385                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 386                                         array_push ( $td , false ) ;
 387                                         array_push ( $ltd , "" ) ;
 388                                         array_push ( $tr , false ) ;
 389                                         array_push ( $ltr , "" ) ;
 390                                 }
 391                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 392                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 393                                 {
 394                                         $z = "</table>\n" ;
 395                                         $l = array_pop ( $ltd ) ;
 396                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 397                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 398                                         array_pop ( $ltr ) ;
 399                                         $t[$k] = $z ;
 400                                 }
 401                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 402                                                 {
 403                                                 $z = trim ( substr ( $x , 2 ) ) ;
 404                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 405                                                 }*/
 406                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 407                                 {
 408                                         $x = substr ( $x , 1 ) ;
 409                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 410                                         $z = "" ;
 411                                         $l = array_pop ( $ltd ) ;
 412                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 413                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 414                                         array_pop ( $ltr ) ;
 415                                         $t[$k] = $z ;
 416                                         array_push ( $tr , false ) ;
 417                                         array_push ( $td , false ) ;
 418                                         array_push ( $ltd , "" ) ;
 419                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 420                                 }
 421                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 422                                 {
 423                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 424                                         {
 425                                                 $fc = "+" ;
 426                                                 $x = substr ( $x , 1 ) ;
 427                                         }
 428                                         $after = substr ( $x , 1 ) ;
 429                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 430                                         $after = explode ( "||" , $after ) ;
 431                                         $t[$k] = "" ;
 432                                         foreach ( $after AS $theline )
 433                                         {
 434                                                 $z = "" ;
 435                                                 if ( $fc != "+" )
 436                                                 {
 437                                                         $tra = array_pop ( $ltr ) ;
 438                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 439                                                         array_push ( $tr , true ) ;
 440                                                         array_push ( $ltr , "" ) ;
 441                                                 }
 442
 443                                                 $l = array_pop ( $ltd ) ;
 444                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 445                                                 if ( $fc == "|" ) $l = "td" ;
 446                                                 else if ( $fc == "!" ) $l = "th" ;
 447                                                 else if ( $fc == "+" ) $l = "caption" ;
 448                                                 else $l = "" ;
 449                                                 array_push ( $ltd , $l ) ;
 450                                                 $y = explode ( "|" , $theline , 2 ) ;
 451                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 452                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 453                                                 $t[$k] .= $y ;
 454                                                 array_push ( $td , true ) ;
 455                                         }
 456                                 }
 457                         }
 458
 459                 # Closing open td, tr && table
 460                 while ( count ( $td ) > 0 )
 461                 {
 462                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 463                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 464                         $t[] = "</table>" ;
 465                 }
 466
 467                 $t = implode ( "\n" , $t ) ;
 468                 #               $t = $this->removeHTMLtags( $t );
 469                 return $t ;
 470         }
 471
 472         # Well, OK, it's actually about 14 passes.  But since all the
 473         # hard lifting is done inside PHP's regex code, it probably
 474         # wouldn't speed things up much to add a real parser.
 475         #
 476         function doWikiPass2( $text, $linestart )
 477         {
 478                 $fname = "Parser::doWikiPass2";
 479                 wfProfileIn( $fname );
 480
 481                 $text = $this->removeHTMLtags( $text );
 482                 $text = $this->replaceVariables( $text );
 483
 484                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 485
 486                 $text = $this->doHeadings( $text );
 487
 488                 if($this->mOptions->getUseDynamicDates()) {
 489                         global $wgDateFormatter;
 490                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 491                 }
 492
 493                 $text = $this->replaceExternalLinks( $text );
 494                 $text = $this->doTokenizedParser ( $text );
 495
 496                 $text = $this->doTableStuff ( $text ) ;
 497
 498                 $text = $this->formatHeadings( $text );
 499
 500                 $sk =& $this->mOptions->getSkin();
 501                 $text = $sk->transformContent( $text );
 502
 503                 $text .= $this->categoryMagic () ;
 504
 505                 # needs to be called last
 506                 $text = $this->doBlockLevels( $text, $linestart );
 507
 508                 wfProfileOut( $fname );
 509                 return $text;
 510         }
 511
 512
 513         /* private */ function doHeadings( $text )
 514         {
 515                 for ( $i = 6; $i >= 1; --$i ) {
 516                         $h = substr( "======", 0, $i );
 517                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 518                           "<h{$i}>\\1</h{$i}>\\2", $text );
 519                 }
 520                 return $text;
 521         }
 522
 523         # Note: we have to do external links before the internal ones,
 524         # and otherwise take great care in the order of things here, so
 525         # that we don't end up interpreting some URLs twice.
 526
 527         /* private */ function replaceExternalLinks( $text )
 528         {
 529                 $fname = "Parser::replaceExternalLinks";
 530                 wfProfileIn( $fname );
 531                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 532                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 533                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 534                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 535                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 536                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 537                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 538                 wfProfileOut( $fname );
 539                 return $text;
 540         }
 541
 542         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 543         {
 544                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 545                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 546
 547                 # this is  the list of separators that should be ignored if they
 548                 # are the last character of an URL but that should be included
 549                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 550                 # in this case, the last comma should not become part of the URL,
 551                 # but in "www.foo.com/123,2342,32.htm" it should.
 552                 $sep = ",;\.:";
 553                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 554                 $images = "gif|png|jpg|jpeg";
 555
 556                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 557                 # they are interpreted as part of the string (used to tell PHP
 558                 # that the content of the string should be inserted there).
 559                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 560                   "((?i){$images})([^{$uc}]|$)/";
 561
 562                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 563                 $sk =& $this->mOptions->getSkin();
 564
 565                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 566                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 567                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 568                 }
 569                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 570                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 571                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 572                   "</a>\\5", $s );
 573                 $s = str_replace( $unique, $protocol, $s );
 574
 575                 $a = explode( "[{$protocol}:", " " . $s );
 576                 $s = array_shift( $a );
 577                 $s = substr( $s, 1 );
 578
 579                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 580                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 581
 582                 foreach ( $a as $line ) {
 583                         if ( preg_match( $e1, $line, $m ) ) {
 584                                 $link = "{$protocol}:{$m[1]}";
 585                                 $trail = $m[2];
 586                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 587                                 else { $text = wfEscapeHTML( $link ); }
 588                         } else if ( preg_match( $e2, $line, $m ) ) {
 589                                 $link = "{$protocol}:{$m[1]}";
 590                                 $text = $m[2];
 591                                 $trail = $m[3];
 592                         } else {
 593                                 $s .= "[{$protocol}:" . $line;
 594                                 continue;
 595                         }
 596                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 597                                 $paren = "";
 598                         } else {
 599                                 # Expand the URL for printable version
 600                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 601                         }
 602                         $la = $sk->getExternalLinkAttributes( $link, $text );
 603                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 604
 605                 }
 606                 return $s;
 607         }
 608
 609         /* private */ function handle3Quotes( &$state, $token )
 610         {
 611                 if ( $state["strong"] !== false ) {
 612                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 613                         {
 614                                 # ''' lala ''lala '''
 615                                 $s = "</em></strong><em>";
 616                         } else {
 617                                 $s = "</strong>";
 618                         }
 619                         $state["strong"] = FALSE;
 620                 } else {
 621                         $s = "<strong>";
 622                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 623                 }
 624                 return $s;
 625         }
 626
 627         /* private */ function handle2Quotes( &$state, $token )
 628         {
 629                 if ( $state["em"] !== false ) {
 630                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 631                         {
 632                                 # ''lala'''lala'' ....'''
 633                                 $s = "</strong></em><strong>";
 634                         } else {
 635                                 $s = "</em>";
 636                         }
 637                         $state["em"] = FALSE;
 638                 } else {
 639                         $s = "<em>";
 640                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 641
 642                 }
 643                 return $s;
 644         }
 645
 646         /* private */ function handle5Quotes( &$state, $token )
 647         {
 648                 $s = "";
 649                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 650                         if ( $state["em"] < $state["strong"] ) {
 651                                 $s .= "</strong></em>";
 652                         } else {
 653                                 $s .= "</em></strong>";
 654                         }
 655                         $state["strong"] = $state["em"] = FALSE;
 656                 } elseif ( $state["em"] !== false ) {
 657                         $s .= "</em><strong>";
 658                         $state["em"] = FALSE;
 659                         $state["strong"] = $token["pos"];
 660                 } elseif ( $state["strong"] !== false ) {
 661                         $s .= "</strong><em>";
 662                         $state["strong"] = FALSE;
 663                         $state["em"] = $token["pos"];
 664                 } else { # not $em and not $strong
 665                         $s .= "<strong><em>";
 666                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 667                 }
 668                 return $s;
 669         }
 670
 671         /* private */ function doTokenizedParser( $str )
 672         {
 673                 global $wgLang; # for language specific parser hook
 674
 675                 $tokenizer=Tokenizer::newFromString( $str );
 676                 $tokenStack = array();
 677
 678                 $s="";
 679                 $state["em"]      = FALSE;
 680                 $state["strong"]  = FALSE;
 681                 $tagIsOpen = FALSE;
 682                 $threeopen = false;
 683
 684                 # The tokenizer splits the text into tokens and returns them one by one.
 685                 # Every call to the tokenizer returns a new token.
 686                 while ( $token = $tokenizer->nextToken() )
 687                 {
 688                         switch ( $token["type"] )
 689                         {
 690                                 case "text":
 691                                         # simple text with no further markup
 692                                         $txt = $token["text"];
 693                                         break;
 694                                 case "[[[":
 695                                         # remember the tag opened with 3 [
 696                                         $threeopen = true;
 697                                 case "[[":
 698                                         # link opening tag.
 699                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 700                                         $tagIsOpen = TRUE;
 701                                         array_push( $tokenStack, $token );
 702                                         $txt="";
 703                                         break;
 704
 705                                 case "]]]":
 706                                 case "]]":
 707                                         # link close tag.
 708                                         # get text from stack, glue it together, and call the code to handle a
 709                                         # link
 710
 711                                         if ( count( $tokenStack ) == 0 )
 712                                         {
 713                                                 # stack empty. Found a ]] without an opening [[
 714                                                 $txt = "]]";
 715                                         } else {
 716                                                 $linkText = "";
 717                                                 $lastToken = array_pop( $tokenStack );
 718                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 719                                                 {
 720                                                         if( !empty( $lastToken["text"] ) ) {
 721                                                                 $linkText = $lastToken["text"] . $linkText;
 722                                                         }
 723                                                         $lastToken = array_pop( $tokenStack );
 724                                                 }
 725
 726                                                 $txt = $linkText ."]]";
 727
 728                                                 if( isset( $lastToken["text"] ) ) {
 729                                                         $prefix = $lastToken["text"];
 730                                                 } else {
 731                                                         $prefix = "";
 732                                                 }
 733                                                 $nextToken = $tokenizer->previewToken();
 734                                                 if ( $nextToken["type"] == "text" )
 735                                                 {
 736                                                         # Preview just looks at it. Now we have to fetch it.
 737                                                         $nextToken = $tokenizer->nextToken();
 738                                                         $txt .= $nextToken["text"];
 739                                                 }
 740                                                 $fakestate = $this->mStripState;
 741                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
 742
 743                                                 # did the tag start with 3 [ ?
 744                                                 if($threeopen) {
 745                                                         # show the first as text
 746                                                         $txt = "[".$txt;
 747                                                         $threeopen=false;
 748                                                 }
 749
 750                                         }
 751                                         $tagIsOpen = (count( $tokenStack ) != 0);
 752                                         break;
 753                                 case "----":
 754                                         $txt = "\n<hr />\n";
 755                                         break;
 756                                 case "'''":
 757                                         # This and the three next ones handle quotes
 758                                         $txt = $this->handle3Quotes( $state, $token );
 759                                         break;
 760                                 case "''":
 761                                         $txt = $this->handle2Quotes( $state, $token );
 762                                         break;
 763                                 case "'''''":
 764                                         $txt = $this->handle5Quotes( $state, $token );
 765                                         break;
 766                                 case "":
 767                                         # empty token
 768                                         $txt="";
 769                                         break;
 770                                 case "RFC ":
 771                                         if ( $tagIsOpen ) {
 772                                                 $txt = "RFC ";
 773                                         } else {
 774                                                 $txt = $this->doMagicRFC( $tokenizer );
 775                                         }
 776                                         break;
 777                                 case "ISBN ":
 778                                         if ( $tagIsOpen ) {
 779                                                 $txt = "ISBN ";
 780                                         } else {
 781                                                 $txt = $this->doMagicISBN( $tokenizer );
 782                                         }
 783                                         break;
 784                                 default:
 785                                         # Call language specific Hook.
 786                                         $txt = $wgLang->processToken( $token, $tokenStack );
 787                                         if ( NULL == $txt ) {
 788                                                 # An unkown token. Highlight.
 789                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 790                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 791                                         }
 792                                         break;
 793                         }
 794                         # If we're parsing the interior of a link, don't append the interior to $s,
 795                         # but push it to the stack so it can be processed when a ]] token is found.
 796                         if ( $tagIsOpen  && $txt != "" ) {
 797                                 $token["type"] = "text";
 798                                 $token["text"] = $txt;
 799                                 array_push( $tokenStack, $token );
 800                         } else {
 801                                 $s .= $txt;
 802                         }
 803                 } #end while
 804                 if ( count( $tokenStack ) != 0 )
 805                 {
 806                         # still objects on stack. opened [[ tag without closing ]] tag.
 807                         $txt = "";
 808                         while ( $lastToken = array_pop( $tokenStack ) )
 809                         {
 810                                 if ( $lastToken["type"] == "text" )
 811                                 {
 812                                         $txt = $lastToken["text"] . $txt;
 813                                 } else {
 814                                         $txt = $lastToken["type"] . $txt;
 815                                 }
 816                         }
 817                         $s .= $txt;
 818                 }
 819                 return $s;
 820         }
 821
 822         /* private */ function handleInternalLink( $line, $prefix )
 823         {
 824                 global $wgLang, $wgLinkCache;
 825                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 826                 static $fname = "Parser::handleInternalLink" ;
 827                 wfProfileIn( $fname );
 828
 829                 wfProfileIn( "$fname-setup" );
 830                 static $tc = FALSE;
 831                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 832                 $sk =& $this->mOptions->getSkin();
 833
 834                 # Match a link having the form [[namespace:link|alternate]]trail
 835                 static $e1 = FALSE;
 836                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 837                 # Match the end of a line for a word that's not followed by whitespace,
 838                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 839                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 840                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 841                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 842
 843
 844                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 845                 static $image = FALSE;
 846                 static $special = FALSE;
 847                 static $media = FALSE;
 848                 static $category = FALSE;
 849                 if ( !$image ) { $image = Namespace::getImage(); }
 850                 if ( !$special ) { $special = Namespace::getSpecial(); }
 851                 if ( !$media ) { $media = Namespace::getMedia(); }
 852                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 853
 854                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 855
 856                 wfProfileOut( "$fname-setup" );
 857                 $s = "";
 858
 859                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 860                         $text = $m[2];
 861                         $trail = $m[3];
 862                 } else { # Invalid form; output directly
 863                         $s .= $prefix . "[[" . $line ;
 864                         return $s;
 865                 }
 866
 867                 /* Valid link forms:
 868                 Foobar -- normal
 869                 :Foobar -- override special treatment of prefix (images, language links)
 870                 /Foobar -- convert to CurrentPage/Foobar
 871                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 872                 */
 873                 $c = substr($m[1],0,1);
 874                 $noforce = ($c != ":");
 875                 if( $c == "/" ) { # subpage
 876                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 877                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 878                                 $noslash=$m[1];
 879                         } else {
 880                                 $noslash=substr($m[1],1);
 881                         }
 882                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 883                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 884                                 if( "" == $text ) {
 885                                         $text= $m[1];
 886                                 } # this might be changed for ugliness reasons
 887                         } else {
 888                                 $link = $noslash; # no subpage allowed, use standard link
 889                         }
 890                 } elseif( $noforce ) { # no subpage
 891                         $link = $m[1];
 892                 } else {
 893                         $link = substr( $m[1], 1 );
 894                 }
 895                 if( "" == $text )
 896                         $text = $link;
 897
 898                 $nt = Title::newFromText( $link );
 899                 if( !$nt ) {
 900                         $s .= $prefix . "[[" . $line;
 901                         return $s;
 902                 }
 903                 $ns = $nt->getNamespace();
 904                 $iw = $nt->getInterWiki();
 905                 if( $noforce ) {
 906                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 907                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 908                                 return (trim($s) == '')? '': $s;
 909                         }
 910                         if( $ns == $image ) {
 911                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 912                                 $wgLinkCache->addImageLinkObj( $nt );
 913                                 return $s;
 914                         }
 915                 }
 916                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 917                     ( strpos( $link, "#" ) == FALSE ) ) {
 918                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 919                         return $s;
 920                 }
 921
 922                 # Category feature
 923                 $catns = strtoupper ( $nt->getDBkey () ) ;
 924                 $catns = explode ( ":" , $catns ) ;
 925                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 926                 else $catns = "" ;
 927                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 928                         $t = explode ( ":" , $nt->getText() ) ;
 929                         array_shift ( $t ) ;
 930                         $t = implode ( ":" , $t ) ;
 931                         $t = $wgLang->ucFirst ( $t ) ;
 932                         $nnt = Title::newFromText ( $category.":".$t ) ;
 933                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 934                         $this->mOutput->mCategoryLinks[] = $t ;
 935                         $s .= $prefix . $trail ;
 936                         return $s ;
 937                 }
 938
 939                 if( $ns == $media ) {
 940                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 941                         $wgLinkCache->addImageLinkObj( $nt );
 942                         return $s;
 943                 } elseif( $ns == $special ) {
 944                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 945                         return $s;
 946                 }
 947                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 948
 949                 wfProfileOut( $fname );
 950                 return $s;
 951         }
 952
 953         # Some functions here used by doBlockLevels()
 954         #
 955         /* private */ function closeParagraph()
 956         {
 957                 $result = "";
 958                 if ( '' != $this->mLastSection ) {
 959                         $result = "</" . $this->mLastSection  . ">\n";
 960                 }
 961                 $this->mLastSection = "";
 962                 return $result;
 963         }
 964         # getCommon() returns the length of the longest common substring
 965         # of both arguments, starting at the beginning of both.
 966         #
 967         /* private */ function getCommon( $st1, $st2 )
 968         {
 969                 $fl = strlen( $st1 );
 970                 $shorter = strlen( $st2 );
 971                 if ( $fl < $shorter ) { $shorter = $fl; }
 972
 973                 for ( $i = 0; $i < $shorter; ++$i ) {
 974                         if ( $st1{$i} != $st2{$i} ) { break; }
 975                 }
 976                 return $i;
 977         }
 978         # These next three functions open, continue, and close the list
 979         # element appropriate to the prefix character passed into them.
 980         #
 981         /* private */ function openList( $char )
 982     {
 983                 $result = $this->closeParagraph();
 984
 985                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 986                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 987                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 988                 else if ( ";" == $char ) {
 989                         $result .= "<dl><dt>";
 990                         $this->mDTopen = true;
 991                 }
 992                 else { $result = "<!-- ERR 1 -->"; }
 993
 994                 return $result;
 995         }
 996
 997         /* private */ function nextItem( $char )
 998         {
 999                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1000                 else if ( ":" == $char || ";" == $char ) {
1001                         $close = "</dd>";
1002                         if ( $this->mDTopen ) { $close = "</dt>"; }
1003                         if ( ";" == $char ) {
1004                                 $this->mDTopen = true;
1005                                 return $close . "<dt>";
1006                         } else {
1007                                 $this->mDTopen = false;
1008                                 return $close . "<dd>";
1009                         }
1010                 }
1011                 return "<!-- ERR 2 -->";
1012         }
1013
1014         /* private */function closeList( $char )
1015         {
1016                 if ( "*" == $char ) { $text = "</li></ul>"; }
1017                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1018                 else if ( ":" == $char ) {
1019                         if ( $this->mDTopen ) {
1020                                 $this->mDTopen = false;
1021                                 $text = "</dt></dl>";
1022                         } else {
1023                                 $text = "</dd></dl>";
1024                         }
1025                 }
1026                 else {  return "<!-- ERR 3 -->"; }
1027                 return $text."\n";
1028         }
1029
1030         /* private */ function doBlockLevels( $text, $linestart )
1031         {
1032                 $fname = "Parser::doBlockLevels";
1033                 wfProfileIn( $fname );
1034                 # Parsing through the text line by line.  The main thing
1035                 # happening here is handling of block-level elements p, pre,
1036                 # and making lists from lines starting with * # : etc.
1037                 #
1038                 $a = explode( "\n", $text );
1039                 $lastPref = $text = '';
1040                 $this->mDTopen = $inBlockElem = $pstack = false;
1041
1042                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1043                 foreach ( $a as $t ) {
1044
1045                         $oLine = $t;
1046                         $opl = strlen( $lastPref );
1047                         $npl = strspn( $t, "*#:;" );
1048                         $pref = substr( $t, 0, $npl );
1049                         $pref2 = str_replace( ";", ":", $pref );
1050                         $t = substr( $t, $npl );
1051                         // list generation
1052                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1053                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1054                                 if ( $pstack ) { $pstack = false; }
1055
1056                                 if ( ";" == substr( $pref, -1 ) ) {
1057                                         $cpos = strpos( $t, ":" );
1058                                         if ( ! ( false === $cpos ) ) {
1059                                                 $term = substr( $t, 0, $cpos );
1060                                                 $text .= $term . $this->nextItem( ":" );
1061                                                 $t = substr( $t, $cpos + 1 );
1062                                         }
1063                                 }
1064                         } else if (0 != $npl || 0 != $opl) {
1065                                 $cpl = $this->getCommon( $pref, $lastPref );
1066                                 if ( $pstack ) { $pstack = false; }
1067
1068                                 while ( $cpl < $opl ) {
1069                                         $text .= $this->closeList( $lastPref{$opl-1} );
1070                                         --$opl;
1071                                 }
1072                                 if ( $npl <= $cpl && $cpl > 0 ) {
1073                                         $text .= $this->nextItem( $pref{$cpl-1} );
1074                                 }
1075                                 while ( $npl > $cpl ) {
1076                                         $char = substr( $pref, $cpl, 1 );
1077                                         $text .= $this->openList( $char );
1078
1079                                         if ( ";" == $char ) {
1080                                                 $cpos = strpos( $t, ":" );
1081                                                 if ( ! ( false === $cpos ) ) {
1082                                                         $term = substr( $t, 0, $cpos );
1083                                                         $text .= $term . $this->nextItem( ":" );
1084                                                         $t = substr( $t, $cpos + 1 );
1085                                                 }
1086                                         }
1087                                         ++$cpl;
1088                                 }
1089                                 $lastPref = $pref2;
1090                         }
1091                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1092                                 $uniq_prefix = UNIQ_PREFIX;
1093                                 // XXX: use a stack for nestable elements like span, table and div
1094                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1095                                 $closematch = preg_match(
1096                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1097                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1098                                 if ( $openmatch or $closematch ) {
1099                                         if ( $pstack ) { $pstack = false; }
1100                                         $text .= $this->closeParagraph();
1101                                         if ( $closematch  ) {
1102                                                 $inBlockElem = false;
1103                                         } else {
1104                                                 $inBlockElem = true;
1105                                         }
1106                                 } else if ( !$inBlockElem ) {
1107                                         if ( " " == $t{0} ) {
1108                                                 // pre
1109                                                 if ($this->mLastSection != 'pre') {
1110                                                         $pstack = false;
1111                                                         $text .= $this->closeParagraph().'<pre>';
1112                                                         $this->mLastSection = 'pre';
1113                                                 }
1114                                         } else {
1115                                                 // paragraph
1116                                                 if ( '' == trim($t) ) {
1117                                                         if ( $pstack ) {
1118                                                                 $text .= $pstack.'<br/>';
1119                                                                 $pstack = false;
1120                                                                 $this->mLastSection = 'p';
1121                                                         } else {
1122                                                                 if ($this->mLastSection != 'p' ) {
1123                                                                         $text .= $this->closeParagraph();
1124                                                                         $this->mLastSection = '';
1125                                                                         $pstack = "<p>";
1126                                                                 } else {
1127                                                                         $pstack = '</p><p>';
1128                                                                 }
1129                                                         }
1130                                                 } else {
1131                                                         if ( $pstack ) {
1132                                                                 $text .= $pstack;
1133                                                                 $pstack = false;
1134                                                                 $this->mLastSection = 'p';
1135                                                         } else if ($this->mLastSection != 'p') {
1136                                                                 $text .= $this->closeParagraph().'<p>';
1137                                                                 $this->mLastSection = 'p';
1138                                                         }
1139                                                 }
1140                                         }
1141                                 }
1142                         }
1143                         if ($pstack === false) {
1144                                 $text .= $t."\n";
1145                         }
1146                 }
1147                 while ( $npl ) {
1148                         $text .= $this->closeList( $pref2{$npl-1} );
1149                         --$npl;
1150                 }
1151                 if ( "" != $this->mLastSection ) {
1152                         $text .= "</" . $this->mLastSection . ">";
1153                         $this->mLastSection = "";
1154                 }
1155                 wfProfileOut( $fname );
1156                 return $text;
1157         }
1158
1159         function getVariableValue( $index ) {
1160                 global $wgLang, $wgSitename, $wgServer;
1161
1162                 switch ( $index ) {
1163                         case MAG_CURRENTMONTH:
1164                                 return date( "m" );
1165                         case MAG_CURRENTMONTHNAME:
1166                                 return $wgLang->getMonthName( date("n") );
1167                         case MAG_CURRENTMONTHNAMEGEN:
1168                                 return $wgLang->getMonthNameGen( date("n") );
1169                         case MAG_CURRENTDAY:
1170                                 return date("j");
1171                         case MAG_CURRENTDAYNAME:
1172                                 return $wgLang->getWeekdayName( date("w")+1 );
1173                         case MAG_CURRENTYEAR:
1174                                 return date( "Y" );
1175                         case MAG_CURRENTTIME:
1176                                 return $wgLang->time( wfTimestampNow(), false );
1177                         case MAG_NUMBEROFARTICLES:
1178                                 return wfNumberOfArticles();
1179                         case MAG_SITENAME:
1180                                 return $wgSitename;
1181                         case MAG_SERVER:
1182                                 return $wgServer;
1183                         default:
1184                                 return NULL;
1185                 }
1186         }
1187
1188         function initialiseVariables()
1189         {
1190                 global $wgVariableIDs;
1191                 $this->mVariables = array();
1192                 foreach ( $wgVariableIDs as $id ) {
1193                         $mw =& MagicWord::get( $id );
1194                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1195                 }
1196         }
1197
1198         /* private */ function replaceVariables( $text )
1199         {
1200                 global $wgLang, $wgCurParser;
1201                 global $wgScript, $wgArticlePath;
1202
1203                 $fname = "Parser::replaceVariables";
1204                 wfProfileIn( $fname );
1205
1206                 $bail = false;
1207                 if ( !$this->mVariables ) {
1208                         $this->initialiseVariables();
1209                 }
1210                 $titleChars = Title::legalChars();
1211                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1212
1213                 # "Recursive" variable expansion: run it through a couple of passes
1214                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1215                         $oldText = $text;
1216
1217                         # It's impossible to rebind a global in PHP
1218                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1219                         $wgCurParser = $this->fork();
1220
1221                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1222                         if ( $oldText == $text ) {
1223                                 $bail = true;
1224                         }
1225                         $this->merge( $wgCurParser );
1226                 }
1227
1228                 return $text;
1229         }
1230
1231         # Returns a copy of this object except with various variables cleared
1232         # This copy can be re-merged with the parent after operations on the copy
1233         function fork()
1234         {
1235                 $copy = $this;
1236                 $copy->mOutput = new ParserOutput;
1237                 return $copy;
1238         }
1239
1240         # Merges a copy split off with fork()
1241         function merge( &$copy )
1242         {
1243                 # Output objects
1244                 $this->mOutput->merge( $copy->mOutput );
1245
1246                 # Include throttling arrays
1247                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1248                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1249                                 $this->mIncludeCount[$dbk] += $count;
1250                         } else {
1251                                 $this->mIncludeCount[$dbk] = $count;
1252                         }
1253                 }
1254
1255                 # Strip states
1256                 foreach( $copy->mStripState as $dictName => $contentDict ) {
1257                         $this->mStripState[$dictName] += $contentDict;
1258                 }
1259         }
1260
1261         function braceSubstitution( $matches )
1262         {
1263                 global $wgLinkCache, $wgLang;
1264                 $fname = "Parser::braceSubstitution";
1265                 $found = false;
1266                 $nowiki = false;
1267
1268                 $text = $matches[1];
1269
1270                 # SUBST
1271                 $mwSubst =& MagicWord::get( MAG_SUBST );
1272                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1273                         if ( $this->mOutputType != OT_WIKI ) {
1274                                 # Invalid SUBST not replaced at PST time
1275                                 # Return without further processing
1276                                 $text = $matches[0];
1277                                 $found = true;
1278                         }
1279                 } elseif ( $this->mOutputType == OT_WIKI ) {
1280                         # SUBST not found in PST pass, do nothing
1281                         $text = $matches[0];
1282                         $found = true;
1283                 }
1284
1285                 # MSG, MSGNW and INT
1286                 if ( !$found ) {
1287                         # Check for MSGNW:
1288                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1289                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1290                                 $nowiki = true;
1291                         } else {
1292                                 # Remove obsolete MSG:
1293                                 $mwMsg =& MagicWord::get( MAG_MSG );
1294                                 $mwMsg->matchStartAndRemove( $text );
1295                         }
1296
1297                         # Check if it is an internal message
1298                         $mwInt =& MagicWord::get( MAG_INT );
1299                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1300                                 $text = wfMsg( $text );
1301                                 $found = true;
1302                         }
1303                 }
1304
1305                 # NS
1306                 if ( !$found ) {
1307                         # Check for NS: (namespace expansion)
1308                         $mwNs = MagicWord::get( MAG_NS );
1309                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1310                                 if ( intval( $text ) ) {
1311                                         $text = $wgLang->getNsText( intval( $text ) );
1312                                         $found = true;
1313                                 } else {
1314                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1315                                         if ( !is_null( $index ) ) {
1316                                                 $text = $wgLang->getNsText( $index );
1317                                                 $found = true;
1318                                         }
1319                                 }
1320                         }
1321                 }
1322
1323                 # LOCALURL and LOCALURLE
1324                 if ( !$found ) {
1325                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1326                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1327
1328                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1329                                 $func = 'getLocalURL';
1330                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1331                                 $func = 'escapeLocalURL';
1332                         } else {
1333                                 $func = '';
1334                         }
1335
1336                         if ( $func !== '' ) {
1337                                 $args = explode( "|", $text );
1338                                 $n = count( $args );
1339                                 if ( $n > 0 ) {
1340                                         $title = Title::newFromText( $args[0] );
1341                                         if ( !is_null( $title ) ) {
1342                                                 if ( $n > 1 ) {
1343                                                         $text = $title->$func( $args[1] );
1344                                                 } else {
1345                                                         $text = $title->$func();
1346                                                 }
1347                                                 $found = true;
1348                                         }
1349                                 }
1350                         }
1351                 }
1352
1353                 # Check for a match against internal variables
1354                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1355                         $text = $this->mVariables[$text];
1356                         $found = true;
1357                         $this->mOutput->mContainsOldMagic = true;
1358                 }
1359
1360                 # Load from database
1361                 if ( !$found ) {
1362                         $title = Title::newFromText( $text, NS_TEMPLATE );
1363                         if ( is_object( $title ) && !$title->isExternal() ) {
1364                                 # Check for excessive inclusion
1365                                 $dbk = $title->getPrefixedDBkey();
1366                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1367                                         $this->mIncludeCount[$dbk] = 0;
1368                                 }
1369                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1370                                         $article = new Article( $title );
1371                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1372                                         if ( $articleContent !== false ) {
1373                                                 $found = true;
1374                                                 $text = $articleContent;
1375
1376                                                 # Escaping and link table handling
1377                                                 # Not required for preSaveTransform()
1378                                                 if ( $this->mOutputType == OT_HTML ) {
1379                                                         if ( $nowiki ) {
1380                                                                 $text = wfEscapeWikiText( $text );
1381                                                         } else {
1382                                                                 $text = $this->removeHTMLtags( $text );
1383                                                         }
1384                                                         # Do not enter included links in link table
1385                                                         $wgLinkCache->suspend();
1386
1387                                                         # Run full parser on the included text
1388                                                         $text = $this->strip( $text, $this->mStripState );
1389                                                         $text = $this->doWikiPass2( $text, true );
1390
1391                                                         # Add the result to the strip state for re-inclusion after
1392                                                         # the rest of the processing
1393                                                         $text = $this->insertStripItem( $text, $this->mStripState );
1394
1395                                                         # Resume the link cache and register the inclusion as a link
1396                                                         $wgLinkCache->resume();
1397                                                         $wgLinkCache->addLinkObj( $title );
1398
1399                                                 }
1400                                         }
1401                                 }
1402
1403                                 # If the title is valid but undisplayable, make a link to it
1404                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1405                                         $text = "[[" . $title->getPrefixedText() . "]]";
1406                                         $found = true;
1407                                 }
1408                         }
1409                 }
1410
1411                 if ( !$found ) {
1412                         return $matches[0];
1413                 } else {
1414                         return $text;
1415                 }
1416         }
1417
1418         # Cleans up HTML, removes dangerous tags and attributes
1419         /* private */ function removeHTMLtags( $text )
1420         {
1421                 $fname = "Parser::removeHTMLtags";
1422                 wfProfileIn( $fname );
1423                 $htmlpairs = array( # Tags that must be closed
1424                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1425                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1426                         "strike", "strong", "tt", "var", "div", "center",
1427                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1428                         "ruby", "rt" , "rb" , "rp", "p"
1429                 );
1430                 $htmlsingle = array(
1431                         "br", "hr", "li", "dt", "dd"
1432                 );
1433                 $htmlnest = array( # Tags that can be nested--??
1434                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1435                         "dl", "font", "big", "small", "sub", "sup"
1436                 );
1437                 $tabletags = array( # Can only appear inside table
1438                         "td", "th", "tr"
1439                 );
1440
1441                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1442                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1443
1444                 $htmlattrs = $this->getHTMLattrs () ;
1445
1446                 # Remove HTML comments
1447                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1448
1449                 $bits = explode( "<", $text );
1450                 $text = array_shift( $bits );
1451                 $tagstack = array(); $tablestack = array();
1452
1453                 foreach ( $bits as $x ) {
1454                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1455                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1456                           $x, $regs );
1457                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1458                         error_reporting( $prev );
1459
1460                         $badtag = 0 ;
1461                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1462                                 # Check our stack
1463                                 if ( $slash ) {
1464                                         # Closing a tag...
1465                                         if ( ! in_array( $t, $htmlsingle ) &&
1466                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1467                                                 array_push( $tagstack, $ot );
1468                                                 $badtag = 1;
1469                                         } else {
1470                                                 if ( $t == "table" ) {
1471                                                         $tagstack = array_pop( $tablestack );
1472                                                 }
1473                                                 $newparams = "";
1474                                         }
1475                                 } else {
1476                                         # Keep track for later
1477                                         if ( in_array( $t, $tabletags ) &&
1478                                           ! in_array( "table", $tagstack ) ) {
1479                                                 $badtag = 1;
1480                                         } else if ( in_array( $t, $tagstack ) &&
1481                                           ! in_array ( $t , $htmlnest ) ) {
1482                                                 $badtag = 1 ;
1483                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1484                                                 if ( $t == "table" ) {
1485                                                         array_push( $tablestack, $tagstack );
1486                                                         $tagstack = array();
1487                                                 }
1488                                                 array_push( $tagstack, $t );
1489                                         }
1490                                         # Strip non-approved attributes from the tag
1491                                         $newparams = $this->fixTagAttributes($params);
1492
1493                                 }
1494                                 if ( ! $badtag ) {
1495                                         $rest = str_replace( ">", "&gt;", $rest );
1496                                         $text .= "<$slash$t $newparams$brace$rest";
1497                                         continue;
1498                                 }
1499                         }
1500                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1501                 }
1502                 # Close off any remaining tags
1503                 while ( $t = array_pop( $tagstack ) ) {
1504                         $text .= "</$t>\n";
1505                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1506                 }
1507                 wfProfileOut( $fname );
1508                 return $text;
1509         }
1510
1511 /*
1512  *
1513  * This function accomplishes several tasks:
1514  * 1) Auto-number headings if that option is enabled
1515  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1516  * 3) Add a Table of contents on the top for users who have enabled the option
1517  * 4) Auto-anchor headings
1518  *
1519  * It loops through all headlines, collects the necessary data, then splits up the
1520  * string and re-inserts the newly formatted headlines.
1521  *
1522  */
1523
1524         /* private */ function formatHeadings( $text )
1525         {
1526                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1527                 $doShowToc = $this->mOptions->getShowToc();
1528                 if( !$this->mTitle->userCanEdit() ) {
1529                         $showEditLink = 0;
1530                         $rightClickHack = 0;
1531                 } else {
1532                         $showEditLink = $this->mOptions->getEditSection();
1533                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1534                 }
1535
1536                 # Inhibit editsection links if requested in the page
1537                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1538                 if( $esw->matchAndRemove( $text ) ) {
1539                         $showEditLink = 0;
1540                 }
1541                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1542                 # do not add TOC
1543                 $mw =& MagicWord::get( MAG_NOTOC );
1544                 if( $mw->matchAndRemove( $text ) ) {
1545                         $doShowToc = 0;
1546                 }
1547
1548                 # never add the TOC to the Main Page. This is an entry page that should not
1549                 # be more than 1-2 screens large anyway
1550                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1551                         $doShowToc = 0;
1552                 }
1553
1554                 # Get all headlines for numbering them and adding funky stuff like [edit]
1555                 # links - this is for later, but we need the number of headlines right now
1556                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1557
1558                 # if there are fewer than 4 headlines in the article, do not show TOC
1559                 if( $numMatches < 4 ) {
1560                         $doShowToc = 0;
1561                 }
1562
1563                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1564                 # override above conditions and always show TOC
1565                 $mw =& MagicWord::get( MAG_FORCETOC );
1566                 if ($mw->matchAndRemove( $text ) ) {
1567                         $doShowToc = 1;
1568                 }
1569
1570
1571                 # We need this to perform operations on the HTML
1572                 $sk =& $this->mOptions->getSkin();
1573
1574                 # headline counter
1575                 $headlineCount = 0;
1576
1577                 # Ugh .. the TOC should have neat indentation levels which can be
1578                 # passed to the skin functions. These are determined here
1579                 $toclevel = 0;
1580                 $toc = "";
1581                 $full = "";
1582                 $head = array();
1583                 $sublevelCount = array();
1584                 $level = 0;
1585                 $prevlevel = 0;
1586                 foreach( $matches[3] as $headline ) {
1587                         $numbering = "";
1588                         if( $level ) {
1589                                 $prevlevel = $level;
1590                         }
1591                         $level = $matches[1][$headlineCount];
1592                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1593                                 # reset when we enter a new level
1594                                 $sublevelCount[$level] = 0;
1595                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1596                                 $toclevel += $level - $prevlevel;
1597                         }
1598                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1599                                 # reset when we step back a level
1600                                 $sublevelCount[$level+1]=0;
1601                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1602                                 $toclevel -= $prevlevel - $level;
1603                         }
1604                         # count number of headlines for each level
1605                         @$sublevelCount[$level]++;
1606                         if( $doNumberHeadings || $doShowToc ) {
1607                                 $dot = 0;
1608                                 for( $i = 1; $i <= $level; $i++ ) {
1609                                         if( !empty( $sublevelCount[$i] ) ) {
1610                                                 if( $dot ) {
1611                                                         $numbering .= ".";
1612                                                 }
1613                                                 $numbering .= $sublevelCount[$i];
1614                                                 $dot = 1;
1615                                         }
1616                                 }
1617                         }
1618
1619                         # The canonized header is a version of the header text safe to use for links
1620                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1621                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1622
1623                         # strip out HTML
1624                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1625                         $tocline = trim( $canonized_headline );
1626                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1627                         $refer[$headlineCount] = $canonized_headline;
1628
1629                         # count how many in assoc. array so we can track dupes in anchors
1630                         @$refers[$canonized_headline]++;
1631                         $refcount[$headlineCount]=$refers[$canonized_headline];
1632
1633                         # Prepend the number to the heading text
1634
1635                         if( $doNumberHeadings || $doShowToc ) {
1636                                 $tocline = $numbering . " " . $tocline;
1637
1638                                 # Don't number the heading if it is the only one (looks silly)
1639                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1640                                         # the two are different if the line contains a link
1641                                         $headline=$numbering . " " . $headline;
1642                                 }
1643                         }
1644
1645                         # Create the anchor for linking from the TOC to the section
1646                         $anchor = $canonized_headline;
1647                         if($refcount[$headlineCount] > 1 ) {
1648                                 $anchor .= "_" . $refcount[$headlineCount];
1649                         }
1650                         if( $doShowToc ) {
1651                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1652                         }
1653                         if( $showEditLink ) {
1654                                 if ( empty( $head[$headlineCount] ) ) {
1655                                         $head[$headlineCount] = "";
1656                                 }
1657                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1658                         }
1659
1660                         # Add the edit section span
1661                         if( $rightClickHack ) {
1662                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1663                         }
1664
1665                         # give headline the correct <h#> tag
1666                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1667
1668                         $headlineCount++;
1669                 }
1670
1671                 if( $doShowToc ) {
1672                         $toclines = $headlineCount;
1673                         $toc .= $sk->tocUnindent( $toclevel );
1674                         $toc = $sk->tocTable( $toc );
1675                 }
1676
1677                 # split up and insert constructed headlines
1678
1679                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1680                 $i = 0;
1681
1682                 foreach( $blocks as $block ) {
1683                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1684                             # This is the [edit] link that appears for the top block of text when
1685                                 # section editing is enabled
1686                                 $full .= $sk->editSectionLink(0);
1687                         }
1688                         $full .= $block;
1689                         if( $doShowToc && !$i) {
1690                         # Top anchor now in skin
1691                                 $full = $full.$toc;
1692                         }
1693
1694                         if( !empty( $head[$i] ) ) {
1695                                 $full .= $head[$i];
1696                         }
1697                         $i++;
1698                 }
1699
1700                 return $full;
1701         }
1702
1703         /* private */ function doMagicISBN( &$tokenizer )
1704         {
1705                 global $wgLang;
1706
1707                 # Check whether next token is a text token
1708                 # If yes, fetch it and convert the text into a
1709                 # Special::BookSources link
1710                 $token = $tokenizer->previewToken();
1711                 while ( $token["type"] == "" )
1712                 {
1713                         $tokenizer->nextToken();
1714                         $token = $tokenizer->previewToken();
1715                 }
1716                 if ( $token["type"] == "text" )
1717                 {
1718                         $token = $tokenizer->nextToken();
1719                         $x = $token["text"];
1720                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1721
1722                         $isbn = $blank = "" ;
1723                         while ( " " == $x{0} ) {
1724                                 $blank .= " ";
1725                                 $x = substr( $x, 1 );
1726                         }
1727                         while ( strstr( $valid, $x{0} ) != false ) {
1728                                 $isbn .= $x{0};
1729                                 $x = substr( $x, 1 );
1730                         }
1731                         $num = str_replace( "-", "", $isbn );
1732                         $num = str_replace( " ", "", $num );
1733
1734                         if ( "" == $num ) {
1735                                 $text = "ISBN $blank$x";
1736                         } else {
1737                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1738                                 $text = "<a href=\"" .
1739                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1740                                         "\" class=\"internal\">ISBN $isbn</a>";
1741                                 $text .= $x;
1742                         }
1743                 } else {
1744                         $text = "ISBN ";
1745                 }
1746                 return $text;
1747         }
1748         /* private */ function doMagicRFC( &$tokenizer )
1749         {
1750                 global $wgLang;
1751
1752                 # Check whether next token is a text token
1753                 # If yes, fetch it and convert the text into a
1754                 # link to an RFC source
1755                 $token = $tokenizer->previewToken();
1756                 while ( $token["type"] == "" )
1757                 {
1758                         $tokenizer->nextToken();
1759                         $token = $tokenizer->previewToken();
1760                 }
1761                 if ( $token["type"] == "text" )
1762                 {
1763                         $token = $tokenizer->nextToken();
1764                         $x = $token["text"];
1765                         $valid = "0123456789";
1766
1767                         $rfc = $blank = "" ;
1768                         while ( " " == $x{0} ) {
1769                                 $blank .= " ";
1770                                 $x = substr( $x, 1 );
1771                         }
1772                         while ( strstr( $valid, $x{0} ) != false ) {
1773                                 $rfc .= $x{0};
1774                                 $x = substr( $x, 1 );
1775                         }
1776
1777                         if ( "" == $rfc ) {
1778                                 $text .= "RFC $blank$x";
1779                         } else {
1780                                 $url = wfmsg( "rfcurl" );
1781                                 $url = str_replace( "$1", $rfc, $url);
1782                                 $sk =& $this->mOptions->getSkin();
1783                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1784                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1785                         }
1786                 } else {
1787                         $text = "RFC ";
1788                 }
1789                 return $text;
1790         }
1791
1792         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1793         {
1794                 $this->mOptions = $options;
1795                 $this->mTitle =& $title;
1796                 $this->mOutputType = OT_WIKI;
1797
1798                 if ( $clearState ) {
1799                         $this->clearState();
1800                 }
1801
1802                 $stripState = false;
1803                 $pairs = array(
1804                         "\r\n" => "\n",
1805                         );
1806                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1807                 // now with regexes
1808                 $pairs = array(
1809                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1810                         "/<br *?>/i" => "<br/>",
1811                 );
1812                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1813                 $text = $this->strip( $text, $stripState, false );
1814                 $text = $this->pstPass2( $text, $user );
1815                 $text = $this->unstrip( $text, $stripState );
1816                 return $text;
1817         }
1818
1819         /* private */ function pstPass2( $text, &$user )
1820         {
1821                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1822
1823                 # Variable replacement
1824                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1825                 $text = $this->replaceVariables( $text );
1826
1827                 # Signatures
1828                 #
1829                 $n = $user->getName();
1830                 $k = $user->getOption( "nickname" );
1831                 if ( "" == $k ) { $k = $n; }
1832                 if(isset($wgLocaltimezone)) {
1833                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1834                 }
1835                 /* Note: this is an ugly timezone hack for the European wikis */
1836                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1837                   " (" . date( "T" ) . ")";
1838                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1839
1840                 $text = preg_replace( "/~~~~~/", $d, $text );
1841                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1842                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1843                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1844                   Namespace::getUser() ) . ":$n|$k]]", $text );
1845
1846                 # Context links: [[|name]] and [[name (context)|]]
1847                 #
1848                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1849                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1850                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1851                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1852
1853                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1854                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1855                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1856                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1857                                                                                                                 # [[ns:page (cont)|]]
1858                 $context = "";
1859                 $t = $this->mTitle->getText();
1860                 if ( preg_match( $conpat, $t, $m ) ) {
1861                         $context = $m[2];
1862                 }
1863                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1864                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1865                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1866
1867                 if ( "" == $context ) {
1868                         $text = preg_replace( $p2, "[[\\1]]", $text );
1869                 } else {
1870                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1871                 }
1872
1873                 /*
1874                 $mw =& MagicWord::get( MAG_SUBST );
1875                 $wgCurParser = $this->fork();
1876                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1877                 $this->merge( $wgCurParser );
1878                 */
1879
1880                 # Trim trailing whitespace
1881                 # MAG_END (__END__) tag allows for trailing
1882                 # whitespace to be deliberately included
1883                 $text = rtrim( $text );
1884                 $mw =& MagicWord::get( MAG_END );
1885                 $mw->matchAndRemove( $text );
1886
1887                 return $text;
1888         }
1889
1890         # Set up some variables which are usually set up in parse()
1891         # so that an external function can call some class members with confidence
1892         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1893         {
1894                 $this->mTitle =& $title;
1895                 $this->mOptions = $options;
1896                 $this->mOutputType = $outputType;
1897                 if ( $clearState ) {
1898                         $this->clearState();
1899                 }
1900         }
1901
1902         function transformMsg( $text, $options ) {
1903                 global $wgTitle;
1904                 static $executing = false;
1905
1906                 # Guard against infinite recursion
1907                 if ( $executing ) {
1908                         return $text;
1909                 }
1910                 $executing = true;
1911
1912                 $this->mTitle = $wgTitle;
1913                 $this->mOptions = $options;
1914                 $this->mOutputType = OT_MSG;
1915                 $this->clearState();
1916                 $text = $this->replaceVariables( $text );
1917
1918                 $executing = false;
1919                 return $text;
1920         }
1921 }
1922
1923 class ParserOutput
1924 {
1925         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1926
1927         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1928                 $containsOldMagic = false )
1929         {
1930                 $this->mText = $text;
1931                 $this->mLanguageLinks = $languageLinks;
1932                 $this->mCategoryLinks = $categoryLinks;
1933                 $this->mContainsOldMagic = $containsOldMagic;
1934         }
1935
1936         function getText() { return $this->mText; }
1937         function getLanguageLinks() { return $this->mLanguageLinks; }
1938         function getCategoryLinks() { return $this->mCategoryLinks; }
1939         function containsOldMagic() { return $this->mContainsOldMagic; }
1940         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1941         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1942         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1943         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1944
1945         function merge( $other ) {
1946                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1947                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1948                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1949         }
1950
1951 }
1952
1953 class ParserOptions
1954 {
1955         # All variables are private
1956         var $mUseTeX;                    # Use texvc to expand <math> tags
1957         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1958         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1959         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1960         var $mAllowExternalImages;       # Allow external images inline
1961         var $mSkin;                      # Reference to the preferred skin
1962         var $mDateFormat;                # Date format index
1963         var $mEditSection;               # Create "edit section" links
1964         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1965         var $mNumberHeadings;            # Automatically number headings
1966         var $mShowToc;                   # Show table of contents
1967
1968         function getUseTeX() { return $this->mUseTeX; }
1969         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1970         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1971         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1972         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1973         function getSkin() { return $this->mSkin; }
1974         function getDateFormat() { return $this->mDateFormat; }
1975         function getEditSection() { return $this->mEditSection; }
1976         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1977         function getNumberHeadings() { return $this->mNumberHeadings; }
1978         function getShowToc() { return $this->mShowToc; }
1979
1980         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1981         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1982         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1983         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1984         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1985         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1986         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1987         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1988         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1989         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1990         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1991
1992         /* static */ function newFromUser( &$user )
1993         {
1994                 $popts = new ParserOptions;
1995                 $popts->initialiseFromUser( &$user );
1996                 return $popts;
1997         }
1998
1999         function initialiseFromUser( &$userInput )
2000         {
2001                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2002
2003                 if ( !$userInput ) {
2004                         $user = new User;
2005                         $user->setLoaded( true );
2006                 } else {
2007                         $user =& $userInput;
2008                 }
2009
2010                 $this->mUseTeX = $wgUseTeX;
2011                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2012                 $this->mUseDynamicDates = $wgUseDynamicDates;
2013                 $this->mInterwikiMagic = $wgInterwikiMagic;
2014                 $this->mAllowExternalImages = $wgAllowExternalImages;
2015                 $this->mSkin =& $user->getSkin();
2016                 $this->mDateFormat = $user->getOption( "date" );
2017                 $this->mEditSection = $user->getOption( "editsection" );
2018                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2019                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2020                 $this->mShowToc = $user->getOption( "showtoc" );
2021         }
2022
2023
2024 }
2025
2026 # Regex callbacks, used in Parser::replaceVariables
2027 function wfBraceSubstitution( $matches )
2028 {
2029         global $wgCurParser;
2030         return $wgCurParser->braceSubstitution( $matches );
2031 }
2032
2033 ?>