includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 # prefix for escaping, used in two functions at least
  47 define( "UNIQ_PREFIX", "NaodW29");
  48
  49 class Parser
  50 {
  51         # Cleared with clearState():
  52         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  53         var $mVariables, $mIncludeCount;
  54
  55         # Temporary:
  56         var $mOptions, $mTitle, $mOutputType;
  57
  58         function Parser()
  59         {
  60                 $this->clearState();
  61         }
  62
  63         function clearState()
  64         {
  65                 $this->mOutput = new ParserOutput;
  66                 $this->mAutonumber = 0;
  67                 $this->mLastSection = "";
  68                 $this->mDTopen = false;
  69                 $this->mVariables = false;
  70                 $this->mIncludeCount = array();
  71                 $this->mStripState = array();
  72         }
  73
  74         # First pass--just handle <nowiki> sections, pass the rest off
  75         # to doWikiPass2() which does all the real work.
  76         #
  77         # Returns a ParserOutput
  78         #
  79         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  80         {
  81                 $fname = "Parser::parse";
  82                 wfProfileIn( $fname );
  83
  84                 if ( $clearState ) {
  85                         $this->clearState();
  86                 }
  87
  88                 $this->mOptions = $options;
  89                 $this->mTitle =& $title;
  90                 $this->mOutputType = OT_HTML;
  91
  92                 $stripState = NULL;
  93                 $text = $this->strip( $text, $this->mStripState );
  94                 $text = $this->doWikiPass2( $text, $linestart );
  95                 # needs to be called last
  96                 $text = $this->doBlockLevels( $text, $linestart );
  97                 $text = $this->unstrip( $text, $this->mStripState );
  98                 # Clean up special characters
  99                 $fixtags = array(
 100                         "/<hr *>/i" => '<hr/>',
 101                         "/<br *>/i" => '<br/>',
 102                         "/<center *>/i"=>'<span style="text-align:center;">',
 103                         "/<\\/center *>/i" => '</span>',
 104                         # Clean up spare ampersands; note that we probably ought to be
 105                         # more careful about named entities.
 106                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 107                 );
 108                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 if ( $state ) {
 159                         $nowiki_content = $state['nowiki'];
 160                         $hiero_content = $state['hiero'];
 161                         $math_content = $state['math'];
 162                         $pre_content = $state['pre'];
 163                         $item_content = $state['item'];
 164                 } else {
 165                         $nowiki_content = array();
 166                         $hiero_content = array();
 167                         $math_content = array();
 168                         $pre_content = array();
 169                         $item_content = array();
 170                 }
 171
 172                 # Replace any instances of the placeholders
 173                 $uniq_prefix = UNIQ_PREFIX;
 174                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 175
 176                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 177                 foreach( $nowiki_content as $marker => $content ){
 178                         if( $render ){
 179                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 180                         } else {
 181                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 182                         }
 183                 }
 184
 185                 if( $GLOBALS['wgUseWikiHiero'] ){
 186                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 187                         foreach( $hiero_content as $marker => $content ){
 188                                 if( $render ){
 189                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 190                                 } else {
 191                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 192                                 }
 193                         }
 194                 }
 195
 196                 if( $this->mOptions->getUseTeX() ){
 197                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 198                         foreach( $math_content as $marker => $content ){
 199                                 if( $render ){
 200                                         $math_content[$marker] = renderMath( $content );
 201                                 } else {
 202                                         $math_content[$marker] = "<math>$content</math>";
 203                                 }
 204                         }
 205                 }
 206
 207                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 208                 foreach( $pre_content as $marker => $content ){
 209                         if( $render ){
 210                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 211                         } else {
 212                                 $pre_content[$marker] = "<pre>$content</pre>";
 213                         }
 214                 }
 215
 216                 $state = array(
 217                   'nowiki' => $nowiki_content,
 218                   'hiero' => $hiero_content,
 219                   'math' => $math_content,
 220                   'pre' => $pre_content,
 221                   'item' => $item_content
 222                 );
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 /*
 230                 $dicts = array( 'item', 'pre', 'math', 'hiero', 'nowiki' );
 231                 foreach ( $dicts as $dictName ) {
 232                         $content_dict = $state[$dictName];
 233                         foreach( $content_dict as $marker => $content ){
 234                                 $text = str_replace( $marker, $content, $text );
 235                         }
 236                 }*/
 237
 238                 $contentDict = end( $state );
 239                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 240                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 241                                 $text = str_replace( key( $contentDict ), $content, $text );
 242                         }
 243                 }
 244
 245                 return $text;
 246         }
 247
 248         # Add an item to the strip state
 249         # Returns the unique tag which must be inserted into the stripped text
 250         # The tag will be replaced with the original text in unstrip()
 251
 252         function insertStripItem( $text, &$state )
 253         {
 254                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 255                 if ( !$state ) {
 256                         $state = array(
 257                           'nowiki' => array(),
 258                           'hiero' => array(),
 259                           'math' => array(),
 260                           'pre' => array(),
 261                           'item' => array()
 262                         );
 263                 }
 264                 $state['item'][$rnd] = $text;
 265                 return $rnd;
 266         }
 267
 268         function categoryMagic ()
 269         {
 270                 global $wgLang , $wgUser ;
 271                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 272                 $id = $this->mTitle->getArticleID() ;
 273                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 274                 $ti = $this->mTitle->getText() ;
 275                 $ti = explode ( ":" , $ti , 2 ) ;
 276                 if ( $cat != $ti[0] ) return "" ;
 277                 $r = '<br style="clear:both;"/>\n';
 278
 279                 $articles = array() ;
 280                 $parents = array () ;
 281                 $children = array() ;
 282
 283
 284 #               $sk =& $this->mGetSkin();
 285                 $sk =& $wgUser->getSkin() ;
 286
 287                 $data = array () ;
 288                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 289                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 290
 291                 $res = wfQuery ( $sql1, DB_READ ) ;
 292                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 293
 294                 $res = wfQuery ( $sql2, DB_READ ) ;
 295                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 296
 297
 298                 foreach ( $data AS $x )
 299                 {
 300                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 301                         if ( $t != "" ) $t .= ":" ;
 302                         $t .= $x->cur_title ;
 303
 304                         $y = explode ( ":" , $t , 2 ) ;
 305                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 306                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 307                         } else {
 308                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 309                         }
 310                 }
 311                 wfFreeResult ( $res ) ;
 312
 313                 # Children
 314                 if ( count ( $children ) > 0 )
 315                 {
 316                         asort ( $children ) ;
 317                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 318                         $r .= implode ( ", " , $children ) ;
 319                 }
 320
 321                 # Articles
 322                 if ( count ( $articles ) > 0 )
 323                 {
 324                         asort ( $articles ) ;
 325                         $h =  wfMsg( "category_header", $ti[1] );
 326                         $r .= "<h2>{$h}</h2>\n" ;
 327                         $r .= implode ( ", " , $articles ) ;
 328                 }
 329
 330
 331                 return $r ;
 332         }
 333
 334         function getHTMLattrs ()
 335         {
 336                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 337                                 "title", "align", "lang", "dir", "width", "height",
 338                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 339                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 340                                 /* FONT */ "type", "start", "value", "compact",
 341                                 /* For various lists, mostly deprecated but safe */
 342                                 "summary", "width", "border", "frame", "rules",
 343                                 "cellspacing", "cellpadding", "valign", "char",
 344                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 345                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 346                                 "id", "class", "name", "style" /* For CSS */
 347                                 );
 348                 return $htmlattrs ;
 349         }
 350
 351         function fixTagAttributes ( $t )
 352         {
 353                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 354                 $htmlattrs = $this->getHTMLattrs() ;
 355
 356                 # Strip non-approved attributes from the tag
 357                 $t = preg_replace(
 358                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 359                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 360                         $t);
 361                 # Strip javascript "expression" from stylesheets. Brute force approach:
 362                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 363
 364                 if( preg_match(
 365                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 366                         wfMungeToUtf8( $t ) ) )
 367                 {
 368                         $t="";
 369                 }
 370
 371                 return trim ( $t ) ;
 372         }
 373
 374         function doTableStuff ( $t )
 375         {
 376                 $t = explode ( "\n" , $t ) ;
 377                 $td = array () ; # Is currently a td tag open?
 378                         $ltd = array () ; # Was it TD or TH?
 379                         $tr = array () ; # Is currently a tr tag open?
 380                         $ltr = array () ; # tr attributes
 381                         foreach ( $t AS $k => $x )
 382                         {
 383                                 $x = rtrim ( $x ) ;
 384                                 $fc = substr ( $x , 0 , 1 ) ;
 385                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 386                                 {
 387                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 388                                         array_push ( $td , false ) ;
 389                                         array_push ( $ltd , "" ) ;
 390                                         array_push ( $tr , false ) ;
 391                                         array_push ( $ltr , "" ) ;
 392                                 }
 393                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 394                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 395                                 {
 396                                         $z = "</table>\n" ;
 397                                         $l = array_pop ( $ltd ) ;
 398                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 399                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 400                                         array_pop ( $ltr ) ;
 401                                         $t[$k] = $z ;
 402                                 }
 403                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 404                                                 {
 405                                                 $z = trim ( substr ( $x , 2 ) ) ;
 406                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 407                                                 }*/
 408                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 409                                 {
 410                                         $x = substr ( $x , 1 ) ;
 411                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 412                                         $z = "" ;
 413                                         $l = array_pop ( $ltd ) ;
 414                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 415                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 416                                         array_pop ( $ltr ) ;
 417                                         $t[$k] = $z ;
 418                                         array_push ( $tr , false ) ;
 419                                         array_push ( $td , false ) ;
 420                                         array_push ( $ltd , "" ) ;
 421                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 422                                 }
 423                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 424                                 {
 425                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 426                                         {
 427                                                 $fc = "+" ;
 428                                                 $x = substr ( $x , 1 ) ;
 429                                         }
 430                                         $after = substr ( $x , 1 ) ;
 431                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 432                                         $after = explode ( "||" , $after ) ;
 433                                         $t[$k] = "" ;
 434                                         foreach ( $after AS $theline )
 435                                         {
 436                                                 $z = "" ;
 437                                                 if ( $fc != "+" )
 438                                                 {
 439                                                         $tra = array_pop ( $ltr ) ;
 440                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 441                                                         array_push ( $tr , true ) ;
 442                                                         array_push ( $ltr , "" ) ;
 443                                                 }
 444
 445                                                 $l = array_pop ( $ltd ) ;
 446                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 447                                                 if ( $fc == "|" ) $l = "td" ;
 448                                                 else if ( $fc == "!" ) $l = "th" ;
 449                                                 else if ( $fc == "+" ) $l = "caption" ;
 450                                                 else $l = "" ;
 451                                                 array_push ( $ltd , $l ) ;
 452                                                 $y = explode ( "|" , $theline , 2 ) ;
 453                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 454                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 455                                                 $t[$k] .= $y ;
 456                                                 array_push ( $td , true ) ;
 457                                         }
 458                                 }
 459                         }
 460
 461                 # Closing open td, tr && table
 462                 while ( count ( $td ) > 0 )
 463                 {
 464                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 465                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 466                         $t[] = "</table>" ;
 467                 }
 468
 469                 $t = implode ( "\n" , $t ) ;
 470                 #               $t = $this->removeHTMLtags( $t );
 471                 return $t ;
 472         }
 473
 474         # Well, OK, it's actually about 14 passes.  But since all the
 475         # hard lifting is done inside PHP's regex code, it probably
 476         # wouldn't speed things up much to add a real parser.
 477         #
 478         function doWikiPass2( $text, $linestart )
 479         {
 480                 $fname = "Parser::doWikiPass2";
 481                 wfProfileIn( $fname );
 482
 483                 $text = $this->removeHTMLtags( $text );
 484                 $text = $this->replaceVariables( $text );
 485
 486                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 487
 488                 $text = $this->doHeadings( $text );
 489
 490                 if($this->mOptions->getUseDynamicDates()) {
 491                         global $wgDateFormatter;
 492                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 493                 }
 494
 495                 $text = $this->replaceExternalLinks( $text );
 496                 $text = $this->doTokenizedParser ( $text );
 497
 498                 $text = $this->doTableStuff ( $text ) ;
 499
 500                 $text = $this->formatHeadings( $text );
 501
 502                 $sk =& $this->mOptions->getSkin();
 503                 $text = $sk->transformContent( $text );
 504
 505                 $text .= $this->categoryMagic () ;
 506
 507                 wfProfileOut( $fname );
 508                 return $text;
 509         }
 510
 511
 512         /* private */ function doHeadings( $text )
 513         {
 514                 for ( $i = 6; $i >= 1; --$i ) {
 515                         $h = substr( "======", 0, $i );
 516                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 517                           "<h{$i}>\\1</h{$i}>\\2", $text );
 518                 }
 519                 return $text;
 520         }
 521
 522         # Note: we have to do external links before the internal ones,
 523         # and otherwise take great care in the order of things here, so
 524         # that we don't end up interpreting some URLs twice.
 525
 526         /* private */ function replaceExternalLinks( $text )
 527         {
 528                 $fname = "Parser::replaceExternalLinks";
 529                 wfProfileIn( $fname );
 530                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 531                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 532                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 533                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 534                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 535                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 536                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 537                 wfProfileOut( $fname );
 538                 return $text;
 539         }
 540
 541         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 542         {
 543                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 544                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 545
 546                 # this is  the list of separators that should be ignored if they
 547                 # are the last character of an URL but that should be included
 548                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 549                 # in this case, the last comma should not become part of the URL,
 550                 # but in "www.foo.com/123,2342,32.htm" it should.
 551                 $sep = ",;\.:";
 552                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 553                 $images = "gif|png|jpg|jpeg";
 554
 555                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 556                 # they are interpreted as part of the string (used to tell PHP
 557                 # that the content of the string should be inserted there).
 558                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 559                   "((?i){$images})([^{$uc}]|$)/";
 560
 561                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 562                 $sk =& $this->mOptions->getSkin();
 563
 564                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 565                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 566                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 567                 }
 568                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 569                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 570                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 571                   "</a>\\5", $s );
 572                 $s = str_replace( $unique, $protocol, $s );
 573
 574                 $a = explode( "[{$protocol}:", " " . $s );
 575                 $s = array_shift( $a );
 576                 $s = substr( $s, 1 );
 577
 578                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 579                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 580
 581                 foreach ( $a as $line ) {
 582                         if ( preg_match( $e1, $line, $m ) ) {
 583                                 $link = "{$protocol}:{$m[1]}";
 584                                 $trail = $m[2];
 585                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 586                                 else { $text = wfEscapeHTML( $link ); }
 587                         } else if ( preg_match( $e2, $line, $m ) ) {
 588                                 $link = "{$protocol}:{$m[1]}";
 589                                 $text = $m[2];
 590                                 $trail = $m[3];
 591                         } else {
 592                                 $s .= "[{$protocol}:" . $line;
 593                                 continue;
 594                         }
 595                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 596                                 $paren = "";
 597                         } else {
 598                                 # Expand the URL for printable version
 599                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 600                         }
 601                         $la = $sk->getExternalLinkAttributes( $link, $text );
 602                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 603
 604                 }
 605                 return $s;
 606         }
 607
 608         /* private */ function handle3Quotes( &$state, $token )
 609         {
 610                 if ( $state["strong"] !== false ) {
 611                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 612                         {
 613                                 # ''' lala ''lala '''
 614                                 $s = "</em></strong><em>";
 615                         } else {
 616                                 $s = "</strong>";
 617                         }
 618                         $state["strong"] = FALSE;
 619                 } else {
 620                         $s = "<strong>";
 621                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 622                 }
 623                 return $s;
 624         }
 625
 626         /* private */ function handle2Quotes( &$state, $token )
 627         {
 628                 if ( $state["em"] !== false ) {
 629                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 630                         {
 631                                 # ''lala'''lala'' ....'''
 632                                 $s = "</strong></em><strong>";
 633                         } else {
 634                                 $s = "</em>";
 635                         }
 636                         $state["em"] = FALSE;
 637                 } else {
 638                         $s = "<em>";
 639                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 640
 641                 }
 642                 return $s;
 643         }
 644
 645         /* private */ function handle5Quotes( &$state, $token )
 646         {
 647                 $s = "";
 648                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 649                         if ( $state["em"] < $state["strong"] ) {
 650                                 $s .= "</strong></em>";
 651                         } else {
 652                                 $s .= "</em></strong>";
 653                         }
 654                         $state["strong"] = $state["em"] = FALSE;
 655                 } elseif ( $state["em"] !== false ) {
 656                         $s .= "</em><strong>";
 657                         $state["em"] = FALSE;
 658                         $state["strong"] = $token["pos"];
 659                 } elseif ( $state["strong"] !== false ) {
 660                         $s .= "</strong><em>";
 661                         $state["strong"] = FALSE;
 662                         $state["em"] = $token["pos"];
 663                 } else { # not $em and not $strong
 664                         $s .= "<strong><em>";
 665                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 666                 }
 667                 return $s;
 668         }
 669
 670         /* private */ function doTokenizedParser( $str )
 671         {
 672                 global $wgLang; # for language specific parser hook
 673
 674                 $tokenizer=Tokenizer::newFromString( $str );
 675                 $tokenStack = array();
 676
 677                 $s="";
 678                 $state["em"]      = FALSE;
 679                 $state["strong"]  = FALSE;
 680                 $tagIsOpen = FALSE;
 681                 $threeopen = false;
 682
 683                 # The tokenizer splits the text into tokens and returns them one by one.
 684                 # Every call to the tokenizer returns a new token.
 685                 while ( $token = $tokenizer->nextToken() )
 686                 {
 687                         switch ( $token["type"] )
 688                         {
 689                                 case "text":
 690                                         # simple text with no further markup
 691                                         $txt = $token["text"];
 692                                         break;
 693                                 case "[[[":
 694                                         # remember the tag opened with 3 [
 695                                         $threeopen = true;
 696                                 case "[[":
 697                                         # link opening tag.
 698                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 699                                         $tagIsOpen = TRUE;
 700                                         array_push( $tokenStack, $token );
 701                                         $txt="";
 702                                         break;
 703
 704                                 case "]]]":
 705                                 case "]]":
 706                                         # link close tag.
 707                                         # get text from stack, glue it together, and call the code to handle a
 708                                         # link
 709
 710                                         if ( count( $tokenStack ) == 0 )
 711                                         {
 712                                                 # stack empty. Found a ]] without an opening [[
 713                                                 $txt = "]]";
 714                                         } else {
 715                                                 $linkText = "";
 716                                                 $lastToken = array_pop( $tokenStack );
 717                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 718                                                 {
 719                                                         if( !empty( $lastToken["text"] ) ) {
 720                                                                 $linkText = $lastToken["text"] . $linkText;
 721                                                         }
 722                                                         $lastToken = array_pop( $tokenStack );
 723                                                 }
 724
 725                                                 $txt = $linkText ."]]";
 726
 727                                                 if( isset( $lastToken["text"] ) ) {
 728                                                         $prefix = $lastToken["text"];
 729                                                 } else {
 730                                                         $prefix = "";
 731                                                 }
 732                                                 $nextToken = $tokenizer->previewToken();
 733                                                 if ( $nextToken["type"] == "text" )
 734                                                 {
 735                                                         # Preview just looks at it. Now we have to fetch it.
 736                                                         $nextToken = $tokenizer->nextToken();
 737                                                         $txt .= $nextToken["text"];
 738                                                 }
 739                                                 $fakestate = $this->mStripState;
 740                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
 741
 742                                                 # did the tag start with 3 [ ?
 743                                                 if($threeopen) {
 744                                                         # show the first as text
 745                                                         $txt = "[".$txt;
 746                                                         $threeopen=false;
 747                                                 }
 748
 749                                         }
 750                                         $tagIsOpen = (count( $tokenStack ) != 0);
 751                                         break;
 752                                 case "----":
 753                                         $txt = "\n<hr />\n";
 754                                         break;
 755                                 case "'''":
 756                                         # This and the three next ones handle quotes
 757                                         $txt = $this->handle3Quotes( $state, $token );
 758                                         break;
 759                                 case "''":
 760                                         $txt = $this->handle2Quotes( $state, $token );
 761                                         break;
 762                                 case "'''''":
 763                                         $txt = $this->handle5Quotes( $state, $token );
 764                                         break;
 765                                 case "":
 766                                         # empty token
 767                                         $txt="";
 768                                         break;
 769                                 case "RFC ":
 770                                         if ( $tagIsOpen ) {
 771                                                 $txt = "RFC ";
 772                                         } else {
 773                                                 $txt = $this->doMagicRFC( $tokenizer );
 774                                         }
 775                                         break;
 776                                 case "ISBN ":
 777                                         if ( $tagIsOpen ) {
 778                                                 $txt = "ISBN ";
 779                                         } else {
 780                                                 $txt = $this->doMagicISBN( $tokenizer );
 781                                         }
 782                                         break;
 783                                 default:
 784                                         # Call language specific Hook.
 785                                         $txt = $wgLang->processToken( $token, $tokenStack );
 786                                         if ( NULL == $txt ) {
 787                                                 # An unkown token. Highlight.
 788                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 789                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 790                                         }
 791                                         break;
 792                         }
 793                         # If we're parsing the interior of a link, don't append the interior to $s,
 794                         # but push it to the stack so it can be processed when a ]] token is found.
 795                         if ( $tagIsOpen  && $txt != "" ) {
 796                                 $token["type"] = "text";
 797                                 $token["text"] = $txt;
 798                                 array_push( $tokenStack, $token );
 799                         } else {
 800                                 $s .= $txt;
 801                         }
 802                 } #end while
 803                 if ( count( $tokenStack ) != 0 )
 804                 {
 805                         # still objects on stack. opened [[ tag without closing ]] tag.
 806                         $txt = "";
 807                         while ( $lastToken = array_pop( $tokenStack ) )
 808                         {
 809                                 if ( $lastToken["type"] == "text" )
 810                                 {
 811                                         $txt = $lastToken["text"] . $txt;
 812                                 } else {
 813                                         $txt = $lastToken["type"] . $txt;
 814                                 }
 815                         }
 816                         $s .= $txt;
 817                 }
 818                 return $s;
 819         }
 820
 821         /* private */ function handleInternalLink( $line, $prefix )
 822         {
 823                 global $wgLang, $wgLinkCache;
 824                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 825                 static $fname = "Parser::handleInternalLink" ;
 826                 wfProfileIn( $fname );
 827
 828                 wfProfileIn( "$fname-setup" );
 829                 static $tc = FALSE;
 830                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 831                 $sk =& $this->mOptions->getSkin();
 832
 833                 # Match a link having the form [[namespace:link|alternate]]trail
 834                 static $e1 = FALSE;
 835                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 836                 # Match the end of a line for a word that's not followed by whitespace,
 837                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 838                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 839                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 840                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 841
 842
 843                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 844                 static $image = FALSE;
 845                 static $special = FALSE;
 846                 static $media = FALSE;
 847                 static $category = FALSE;
 848                 if ( !$image ) { $image = Namespace::getImage(); }
 849                 if ( !$special ) { $special = Namespace::getSpecial(); }
 850                 if ( !$media ) { $media = Namespace::getMedia(); }
 851                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 852
 853                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 854
 855                 wfProfileOut( "$fname-setup" );
 856                 $s = "";
 857
 858                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 859                         $text = $m[2];
 860                         $trail = $m[3];
 861                 } else { # Invalid form; output directly
 862                         $s .= $prefix . "[[" . $line ;
 863                         return $s;
 864                 }
 865
 866                 /* Valid link forms:
 867                 Foobar -- normal
 868                 :Foobar -- override special treatment of prefix (images, language links)
 869                 /Foobar -- convert to CurrentPage/Foobar
 870                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 871                 */
 872                 $c = substr($m[1],0,1);
 873                 $noforce = ($c != ":");
 874                 if( $c == "/" ) { # subpage
 875                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 876                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 877                                 $noslash=$m[1];
 878                         } else {
 879                                 $noslash=substr($m[1],1);
 880                         }
 881                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 882                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 883                                 if( "" == $text ) {
 884                                         $text= $m[1];
 885                                 } # this might be changed for ugliness reasons
 886                         } else {
 887                                 $link = $noslash; # no subpage allowed, use standard link
 888                         }
 889                 } elseif( $noforce ) { # no subpage
 890                         $link = $m[1];
 891                 } else {
 892                         $link = substr( $m[1], 1 );
 893                 }
 894                 if( "" == $text )
 895                         $text = $link;
 896
 897                 $nt = Title::newFromText( $link );
 898                 if( !$nt ) {
 899                         $s .= $prefix . "[[" . $line;
 900                         return $s;
 901                 }
 902                 $ns = $nt->getNamespace();
 903                 $iw = $nt->getInterWiki();
 904                 if( $noforce ) {
 905                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 906                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 907                                 return (trim($s) == '')? '': $s;
 908                         }
 909                         if( $ns == $image ) {
 910                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 911                                 $wgLinkCache->addImageLinkObj( $nt );
 912                                 return $s;
 913                         }
 914                 }
 915                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 916                     ( strpos( $link, "#" ) == FALSE ) ) {
 917                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 918                         return $s;
 919                 }
 920
 921                 # Category feature
 922                 $catns = strtoupper ( $nt->getDBkey () ) ;
 923                 $catns = explode ( ":" , $catns ) ;
 924                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 925                 else $catns = "" ;
 926                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 927                         $t = explode ( ":" , $nt->getText() ) ;
 928                         array_shift ( $t ) ;
 929                         $t = implode ( ":" , $t ) ;
 930                         $t = $wgLang->ucFirst ( $t ) ;
 931                         $nnt = Title::newFromText ( $category.":".$t ) ;
 932                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 933                         $this->mOutput->mCategoryLinks[] = $t ;
 934                         $s .= $prefix . $trail ;
 935                         return $s ;
 936                 }
 937
 938                 if( $ns == $media ) {
 939                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 940                         $wgLinkCache->addImageLinkObj( $nt );
 941                         return $s;
 942                 } elseif( $ns == $special ) {
 943                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 944                         return $s;
 945                 }
 946                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 947
 948                 wfProfileOut( $fname );
 949                 return $s;
 950         }
 951
 952         # Some functions here used by doBlockLevels()
 953         #
 954         /* private */ function closeParagraph()
 955         {
 956                 $result = "";
 957                 if ( '' != $this->mLastSection ) {
 958                         $result = "</" . $this->mLastSection  . ">\n";
 959                 }
 960                 $this->mLastSection = "";
 961                 return $result;
 962         }
 963         # getCommon() returns the length of the longest common substring
 964         # of both arguments, starting at the beginning of both.
 965         #
 966         /* private */ function getCommon( $st1, $st2 )
 967         {
 968                 $fl = strlen( $st1 );
 969                 $shorter = strlen( $st2 );
 970                 if ( $fl < $shorter ) { $shorter = $fl; }
 971
 972                 for ( $i = 0; $i < $shorter; ++$i ) {
 973                         if ( $st1{$i} != $st2{$i} ) { break; }
 974                 }
 975                 return $i;
 976         }
 977         # These next three functions open, continue, and close the list
 978         # element appropriate to the prefix character passed into them.
 979         #
 980         /* private */ function openList( $char )
 981     {
 982                 $result = $this->closeParagraph();
 983
 984                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 985                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 986                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 987                 else if ( ";" == $char ) {
 988                         $result .= "<dl><dt>";
 989                         $this->mDTopen = true;
 990                 }
 991                 else { $result = "<!-- ERR 1 -->"; }
 992
 993                 return $result;
 994         }
 995
 996         /* private */ function nextItem( $char )
 997         {
 998                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 999                 else if ( ":" == $char || ";" == $char ) {
1000                         $close = "</dd>";
1001                         if ( $this->mDTopen ) { $close = "</dt>"; }
1002                         if ( ";" == $char ) {
1003                                 $this->mDTopen = true;
1004                                 return $close . "<dt>";
1005                         } else {
1006                                 $this->mDTopen = false;
1007                                 return $close . "<dd>";
1008                         }
1009                 }
1010                 return "<!-- ERR 2 -->";
1011         }
1012
1013         /* private */function closeList( $char )
1014         {
1015                 if ( "*" == $char ) { $text = "</li></ul>"; }
1016                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1017                 else if ( ":" == $char ) {
1018                         if ( $this->mDTopen ) {
1019                                 $this->mDTopen = false;
1020                                 $text = "</dt></dl>";
1021                         } else {
1022                                 $text = "</dd></dl>";
1023                         }
1024                 }
1025                 else {  return "<!-- ERR 3 -->"; }
1026                 return $text."\n";
1027         }
1028
1029         /* private */ function doBlockLevels( $text, $linestart )
1030         {
1031                 $fname = "Parser::doBlockLevels";
1032                 wfProfileIn( $fname );
1033                 # Parsing through the text line by line.  The main thing
1034                 # happening here is handling of block-level elements p, pre,
1035                 # and making lists from lines starting with * # : etc.
1036                 #
1037                 $a = explode( "\n", $text );
1038                 $lastPref = $text = '';
1039                 $this->mDTopen = $inBlockElem = $pstack = false;
1040
1041                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1042                 foreach ( $a as $t ) {
1043
1044                         $oLine = $t;
1045                         $opl = strlen( $lastPref );
1046                         $npl = strspn( $t, "*#:;" );
1047                         $pref = substr( $t, 0, $npl );
1048                         $pref2 = str_replace( ";", ":", $pref );
1049                         $t = substr( $t, $npl );
1050                         // list generation
1051                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1052                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1053                                 if ( $pstack ) { $pstack = false; }
1054
1055                                 if ( ";" == substr( $pref, -1 ) ) {
1056                                         $cpos = strpos( $t, ":" );
1057                                         if ( ! ( false === $cpos ) ) {
1058                                                 $term = substr( $t, 0, $cpos );
1059                                                 $text .= $term . $this->nextItem( ":" );
1060                                                 $t = substr( $t, $cpos + 1 );
1061                                         }
1062                                 }
1063                         } else if (0 != $npl || 0 != $opl) {
1064                                 $cpl = $this->getCommon( $pref, $lastPref );
1065                                 if ( $pstack ) { $pstack = false; }
1066
1067                                 while ( $cpl < $opl ) {
1068                                         $text .= $this->closeList( $lastPref{$opl-1} );
1069                                         --$opl;
1070                                 }
1071                                 if ( $npl <= $cpl && $cpl > 0 ) {
1072                                         $text .= $this->nextItem( $pref{$cpl-1} );
1073                                 }
1074                                 while ( $npl > $cpl ) {
1075                                         $char = substr( $pref, $cpl, 1 );
1076                                         $text .= $this->openList( $char );
1077
1078                                         if ( ";" == $char ) {
1079                                                 $cpos = strpos( $t, ":" );
1080                                                 if ( ! ( false === $cpos ) ) {
1081                                                         $term = substr( $t, 0, $cpos );
1082                                                         $text .= $term . $this->nextItem( ":" );
1083                                                         $t = substr( $t, $cpos + 1 );
1084                                                 }
1085                                         }
1086                                         ++$cpl;
1087                                 }
1088                                 $lastPref = $pref2;
1089                         }
1090                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1091                                 $uniq_prefix = UNIQ_PREFIX;
1092                                 // XXX: use a stack for nestable elements like span, table and div
1093                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1094                                 $closematch = preg_match(
1095                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1096                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1097                                 if ( $openmatch or $closematch ) {
1098                                         if ( $pstack ) { $pstack = false; }
1099                                         $text .= $this->closeParagraph();
1100                                         if ( $closematch  ) {
1101                                                 $inBlockElem = false;
1102                                         } else {
1103                                                 $inBlockElem = true;
1104                                         }
1105                                 } else if ( !$inBlockElem ) {
1106                                         if ( " " == $t{0} ) {
1107                                                 // pre
1108                                                 if ($this->mLastSection != 'pre') {
1109                                                         $pstack = false;
1110                                                         $text .= $this->closeParagraph().'<pre>';
1111                                                         $this->mLastSection = 'pre';
1112                                                 }
1113                                         } else {
1114                                                 // paragraph
1115                                                 if ( '' == trim($t) ) {
1116                                                         if ( $pstack ) {
1117                                                                 $text .= $pstack.'<br/>';
1118                                                                 $pstack = false;
1119                                                                 $this->mLastSection = 'p';
1120                                                         } else {
1121                                                                 if ($this->mLastSection != 'p' ) {
1122                                                                         $text .= $this->closeParagraph();
1123                                                                         $this->mLastSection = '';
1124                                                                         $pstack = "<p>";
1125                                                                 } else {
1126                                                                         $pstack = '</p><p>';
1127                                                                 }
1128                                                         }
1129                                                 } else {
1130                                                         if ( $pstack ) {
1131                                                                 $text .= $pstack;
1132                                                                 $pstack = false;
1133                                                                 $this->mLastSection = 'p';
1134                                                         } else if ($this->mLastSection != 'p') {
1135                                                                 $text .= $this->closeParagraph().'<p>';
1136                                                                 $this->mLastSection = 'p';
1137                                                         }
1138                                                 }
1139                                         }
1140                                 }
1141                         }
1142                         if ($pstack === false) {
1143                                 $text .= $t."\n";
1144                         }
1145                 }
1146                 while ( $npl ) {
1147                         $text .= $this->closeList( $pref2{$npl-1} );
1148                         --$npl;
1149                 }
1150                 if ( "" != $this->mLastSection ) {
1151                         $text .= "</" . $this->mLastSection . ">";
1152                         $this->mLastSection = "";
1153                 }
1154                 wfProfileOut( $fname );
1155                 return $text;
1156         }
1157
1158         function getVariableValue( $index ) {
1159                 global $wgLang, $wgSitename, $wgServer;
1160
1161                 switch ( $index ) {
1162                         case MAG_CURRENTMONTH:
1163                                 return date( "m" );
1164                         case MAG_CURRENTMONTHNAME:
1165                                 return $wgLang->getMonthName( date("n") );
1166                         case MAG_CURRENTMONTHNAMEGEN:
1167                                 return $wgLang->getMonthNameGen( date("n") );
1168                         case MAG_CURRENTDAY:
1169                                 return date("j");
1170                         case MAG_CURRENTDAYNAME:
1171                                 return $wgLang->getWeekdayName( date("w")+1 );
1172                         case MAG_CURRENTYEAR:
1173                                 return date( "Y" );
1174                         case MAG_CURRENTTIME:
1175                                 return $wgLang->time( wfTimestampNow(), false );
1176                         case MAG_NUMBEROFARTICLES:
1177                                 return wfNumberOfArticles();
1178                         case MAG_SITENAME:
1179                                 return $wgSitename;
1180                         case MAG_SERVER:
1181                                 return $wgServer;
1182                         default:
1183                                 return NULL;
1184                 }
1185         }
1186
1187         function initialiseVariables()
1188         {
1189                 global $wgVariableIDs;
1190                 $this->mVariables = array();
1191                 foreach ( $wgVariableIDs as $id ) {
1192                         $mw =& MagicWord::get( $id );
1193                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1194                 }
1195         }
1196
1197         /* private */ function replaceVariables( $text )
1198         {
1199                 global $wgLang, $wgCurParser;
1200                 global $wgScript, $wgArticlePath;
1201
1202                 $fname = "Parser::replaceVariables";
1203                 wfProfileIn( $fname );
1204
1205                 $bail = false;
1206                 if ( !$this->mVariables ) {
1207                         $this->initialiseVariables();
1208                 }
1209                 $titleChars = Title::legalChars();
1210                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1211
1212                 # "Recursive" variable expansion: run it through a couple of passes
1213                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1214                         $oldText = $text;
1215
1216                         # It's impossible to rebind a global in PHP
1217                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1218                         $wgCurParser = $this->fork();
1219
1220                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1221                         if ( $oldText == $text ) {
1222                                 $bail = true;
1223                         }
1224                         $this->merge( $wgCurParser );
1225                 }
1226
1227                 return $text;
1228         }
1229
1230         # Returns a copy of this object except with various variables cleared
1231         # This copy can be re-merged with the parent after operations on the copy
1232         function fork()
1233         {
1234                 $copy = $this;
1235                 $copy->mOutput = new ParserOutput;
1236                 return $copy;
1237         }
1238
1239         # Merges a copy split off with fork()
1240         function merge( &$copy )
1241         {
1242                 # Output objects
1243                 $this->mOutput->merge( $copy->mOutput );
1244
1245                 # Include throttling arrays
1246                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1247                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1248                                 $this->mIncludeCount[$dbk] += $count;
1249                         } else {
1250                                 $this->mIncludeCount[$dbk] = $count;
1251                         }
1252                 }
1253
1254                 # Strip states
1255                 foreach( $copy->mStripState as $dictName => $contentDict ) {
1256                         $this->mStripState[$dictName] += $contentDict;
1257                 }
1258         }
1259
1260         function braceSubstitution( $matches )
1261         {
1262                 global $wgLinkCache, $wgLang;
1263                 $fname = "Parser::braceSubstitution";
1264                 $found = false;
1265                 $nowiki = false;
1266
1267                 $text = $matches[1];
1268
1269                 # SUBST
1270                 $mwSubst =& MagicWord::get( MAG_SUBST );
1271                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1272                         if ( $this->mOutputType != OT_WIKI ) {
1273                                 # Invalid SUBST not replaced at PST time
1274                                 # Return without further processing
1275                                 $text = $matches[0];
1276                                 $found = true;
1277                         }
1278                 } elseif ( $this->mOutputType == OT_WIKI ) {
1279                         # SUBST not found in PST pass, do nothing
1280                         $text = $matches[0];
1281                         $found = true;
1282                 }
1283
1284                 # MSG, MSGNW and INT
1285                 if ( !$found ) {
1286                         # Check for MSGNW:
1287                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1288                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1289                                 $nowiki = true;
1290                         } else {
1291                                 # Remove obsolete MSG:
1292                                 $mwMsg =& MagicWord::get( MAG_MSG );
1293                                 $mwMsg->matchStartAndRemove( $text );
1294                         }
1295
1296                         # Check if it is an internal message
1297                         $mwInt =& MagicWord::get( MAG_INT );
1298                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1299                                 $text = wfMsg( $text );
1300                                 $found = true;
1301                         }
1302                 }
1303
1304                 # NS
1305                 if ( !$found ) {
1306                         # Check for NS: (namespace expansion)
1307                         $mwNs = MagicWord::get( MAG_NS );
1308                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1309                                 if ( intval( $text ) ) {
1310                                         $text = $wgLang->getNsText( intval( $text ) );
1311                                         $found = true;
1312                                 } else {
1313                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1314                                         if ( !is_null( $index ) ) {
1315                                                 $text = $wgLang->getNsText( $index );
1316                                                 $found = true;
1317                                         }
1318                                 }
1319                         }
1320                 }
1321
1322                 # LOCALURL and LOCALURLE
1323                 if ( !$found ) {
1324                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1325                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1326
1327                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1328                                 $func = 'getLocalURL';
1329                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1330                                 $func = 'escapeLocalURL';
1331                         } else {
1332                                 $func = '';
1333                         }
1334
1335                         if ( $func !== '' ) {
1336                                 $args = explode( "|", $text );
1337                                 $n = count( $args );
1338                                 if ( $n > 0 ) {
1339                                         $title = Title::newFromText( $args[0] );
1340                                         if ( !is_null( $title ) ) {
1341                                                 if ( $n > 1 ) {
1342                                                         $text = $title->$func( $args[1] );
1343                                                 } else {
1344                                                         $text = $title->$func();
1345                                                 }
1346                                                 $found = true;
1347                                         }
1348                                 }
1349                         }
1350                 }
1351
1352                 # Check for a match against internal variables
1353                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1354                         $text = $this->mVariables[$text];
1355                         $found = true;
1356                         $this->mOutput->mContainsOldMagic = true;
1357                 }
1358
1359                 # Load from database
1360                 if ( !$found ) {
1361                         $title = Title::newFromText( $text, NS_TEMPLATE );
1362                         if ( is_object( $title ) && !$title->isExternal() ) {
1363                                 # Check for excessive inclusion
1364                                 $dbk = $title->getPrefixedDBkey();
1365                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1366                                         $this->mIncludeCount[$dbk] = 0;
1367                                 }
1368                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1369                                         $article = new Article( $title );
1370                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1371                                         if ( $articleContent !== false ) {
1372                                                 $found = true;
1373                                                 $text = $articleContent;
1374
1375                                                 # Escaping and link table handling
1376                                                 # Not required for preSaveTransform()
1377                                                 if ( $this->mOutputType == OT_HTML ) {
1378                                                         if ( $nowiki ) {
1379                                                                 $text = wfEscapeWikiText( $text );
1380                                                         } else {
1381                                                                 $text = $this->removeHTMLtags( $text );
1382                                                         }
1383                                                         # Do not enter included links in link table
1384                                                         $wgLinkCache->suspend();
1385
1386                                                         # Run full parser on the included text
1387                                                         $text = $this->strip( $text, $this->mStripState );
1388                                                         $text = $this->doWikiPass2( $text, true  );
1389
1390                                                         # Add the result to the strip state for re-inclusion after
1391                                                         # the rest of the processing
1392                                                         $text = $this->insertStripItem( $text, $this->mStripState );
1393
1394                                                         # Resume the link cache and register the inclusion as a link
1395                                                         $wgLinkCache->resume();
1396                                                         $wgLinkCache->addLinkObj( $title );
1397
1398                                                 }
1399                                         }
1400                                 }
1401
1402                                 # If the title is valid but undisplayable, make a link to it
1403                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1404                                         $text = "[[" . $title->getPrefixedText() . "]]";
1405                                         $found = true;
1406                                 }
1407                         }
1408                 }
1409
1410                 if ( !$found ) {
1411                         return $matches[0];
1412                 } else {
1413                         return $text;
1414                 }
1415         }
1416
1417         # Cleans up HTML, removes dangerous tags and attributes
1418         /* private */ function removeHTMLtags( $text )
1419         {
1420                 $fname = "Parser::removeHTMLtags";
1421                 wfProfileIn( $fname );
1422                 $htmlpairs = array( # Tags that must be closed
1423                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1424                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1425                         "strike", "strong", "tt", "var", "div", "center",
1426                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1427                         "ruby", "rt" , "rb" , "rp", "p"
1428                 );
1429                 $htmlsingle = array(
1430                         "br", "hr", "li", "dt", "dd"
1431                 );
1432                 $htmlnest = array( # Tags that can be nested--??
1433                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1434                         "dl", "font", "big", "small", "sub", "sup"
1435                 );
1436                 $tabletags = array( # Can only appear inside table
1437                         "td", "th", "tr"
1438                 );
1439
1440                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1441                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1442
1443                 $htmlattrs = $this->getHTMLattrs () ;
1444
1445                 # Remove HTML comments
1446                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1447
1448                 $bits = explode( "<", $text );
1449                 $text = array_shift( $bits );
1450                 $tagstack = array(); $tablestack = array();
1451
1452                 foreach ( $bits as $x ) {
1453                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1454                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1455                           $x, $regs );
1456                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1457                         error_reporting( $prev );
1458
1459                         $badtag = 0 ;
1460                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1461                                 # Check our stack
1462                                 if ( $slash ) {
1463                                         # Closing a tag...
1464                                         if ( ! in_array( $t, $htmlsingle ) &&
1465                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1466                                                 array_push( $tagstack, $ot );
1467                                                 $badtag = 1;
1468                                         } else {
1469                                                 if ( $t == "table" ) {
1470                                                         $tagstack = array_pop( $tablestack );
1471                                                 }
1472                                                 $newparams = "";
1473                                         }
1474                                 } else {
1475                                         # Keep track for later
1476                                         if ( in_array( $t, $tabletags ) &&
1477                                           ! in_array( "table", $tagstack ) ) {
1478                                                 $badtag = 1;
1479                                         } else if ( in_array( $t, $tagstack ) &&
1480                                           ! in_array ( $t , $htmlnest ) ) {
1481                                                 $badtag = 1 ;
1482                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1483                                                 if ( $t == "table" ) {
1484                                                         array_push( $tablestack, $tagstack );
1485                                                         $tagstack = array();
1486                                                 }
1487                                                 array_push( $tagstack, $t );
1488                                         }
1489                                         # Strip non-approved attributes from the tag
1490                                         $newparams = $this->fixTagAttributes($params);
1491
1492                                 }
1493                                 if ( ! $badtag ) {
1494                                         $rest = str_replace( ">", "&gt;", $rest );
1495                                         $text .= "<$slash$t $newparams$brace$rest";
1496                                         continue;
1497                                 }
1498                         }
1499                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1500                 }
1501                 # Close off any remaining tags
1502                 while ( $t = array_pop( $tagstack ) ) {
1503                         $text .= "</$t>\n";
1504                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1505                 }
1506                 wfProfileOut( $fname );
1507                 return $text;
1508         }
1509
1510 /*
1511  *
1512  * This function accomplishes several tasks:
1513  * 1) Auto-number headings if that option is enabled
1514  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1515  * 3) Add a Table of contents on the top for users who have enabled the option
1516  * 4) Auto-anchor headings
1517  *
1518  * It loops through all headlines, collects the necessary data, then splits up the
1519  * string and re-inserts the newly formatted headlines.
1520  *
1521  */
1522
1523         /* private */ function formatHeadings( $text )
1524         {
1525                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1526                 $doShowToc = $this->mOptions->getShowToc();
1527                 if( !$this->mTitle->userCanEdit() ) {
1528                         $showEditLink = 0;
1529                         $rightClickHack = 0;
1530                 } else {
1531                         $showEditLink = $this->mOptions->getEditSection();
1532                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1533                 }
1534
1535                 # Inhibit editsection links if requested in the page
1536                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1537                 if( $esw->matchAndRemove( $text ) ) {
1538                         $showEditLink = 0;
1539                 }
1540                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1541                 # do not add TOC
1542                 $mw =& MagicWord::get( MAG_NOTOC );
1543                 if( $mw->matchAndRemove( $text ) ) {
1544                         $doShowToc = 0;
1545                 }
1546
1547                 # never add the TOC to the Main Page. This is an entry page that should not
1548                 # be more than 1-2 screens large anyway
1549                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1550                         $doShowToc = 0;
1551                 }
1552
1553                 # Get all headlines for numbering them and adding funky stuff like [edit]
1554                 # links - this is for later, but we need the number of headlines right now
1555                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1556
1557                 # if there are fewer than 4 headlines in the article, do not show TOC
1558                 if( $numMatches < 4 ) {
1559                         $doShowToc = 0;
1560                 }
1561
1562                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1563                 # override above conditions and always show TOC
1564                 $mw =& MagicWord::get( MAG_FORCETOC );
1565                 if ($mw->matchAndRemove( $text ) ) {
1566                         $doShowToc = 1;
1567                 }
1568
1569
1570                 # We need this to perform operations on the HTML
1571                 $sk =& $this->mOptions->getSkin();
1572
1573                 # headline counter
1574                 $headlineCount = 0;
1575
1576                 # Ugh .. the TOC should have neat indentation levels which can be
1577                 # passed to the skin functions. These are determined here
1578                 $toclevel = 0;
1579                 $toc = "";
1580                 $full = "";
1581                 $head = array();
1582                 $sublevelCount = array();
1583                 $level = 0;
1584                 $prevlevel = 0;
1585                 foreach( $matches[3] as $headline ) {
1586                         $numbering = "";
1587                         if( $level ) {
1588                                 $prevlevel = $level;
1589                         }
1590                         $level = $matches[1][$headlineCount];
1591                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1592                                 # reset when we enter a new level
1593                                 $sublevelCount[$level] = 0;
1594                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1595                                 $toclevel += $level - $prevlevel;
1596                         }
1597                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1598                                 # reset when we step back a level
1599                                 $sublevelCount[$level+1]=0;
1600                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1601                                 $toclevel -= $prevlevel - $level;
1602                         }
1603                         # count number of headlines for each level
1604                         @$sublevelCount[$level]++;
1605                         if( $doNumberHeadings || $doShowToc ) {
1606                                 $dot = 0;
1607                                 for( $i = 1; $i <= $level; $i++ ) {
1608                                         if( !empty( $sublevelCount[$i] ) ) {
1609                                                 if( $dot ) {
1610                                                         $numbering .= ".";
1611                                                 }
1612                                                 $numbering .= $sublevelCount[$i];
1613                                                 $dot = 1;
1614                                         }
1615                                 }
1616                         }
1617
1618                         # The canonized header is a version of the header text safe to use for links
1619                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1620                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1621
1622                         # strip out HTML
1623                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1624                         $tocline = trim( $canonized_headline );
1625                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1626                         $refer[$headlineCount] = $canonized_headline;
1627
1628                         # count how many in assoc. array so we can track dupes in anchors
1629                         @$refers[$canonized_headline]++;
1630                         $refcount[$headlineCount]=$refers[$canonized_headline];
1631
1632                         # Prepend the number to the heading text
1633
1634                         if( $doNumberHeadings || $doShowToc ) {
1635                                 $tocline = $numbering . " " . $tocline;
1636
1637                                 # Don't number the heading if it is the only one (looks silly)
1638                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1639                                         # the two are different if the line contains a link
1640                                         $headline=$numbering . " " . $headline;
1641                                 }
1642                         }
1643
1644                         # Create the anchor for linking from the TOC to the section
1645                         $anchor = $canonized_headline;
1646                         if($refcount[$headlineCount] > 1 ) {
1647                                 $anchor .= "_" . $refcount[$headlineCount];
1648                         }
1649                         if( $doShowToc ) {
1650                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1651                         }
1652                         if( $showEditLink ) {
1653                                 if ( empty( $head[$headlineCount] ) ) {
1654                                         $head[$headlineCount] = "";
1655                                 }
1656                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1657                         }
1658
1659                         # Add the edit section span
1660                         if( $rightClickHack ) {
1661                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1662                         }
1663
1664                         # give headline the correct <h#> tag
1665                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1666
1667                         $headlineCount++;
1668                 }
1669
1670                 if( $doShowToc ) {
1671                         $toclines = $headlineCount;
1672                         $toc .= $sk->tocUnindent( $toclevel );
1673                         $toc = $sk->tocTable( $toc );
1674                 }
1675
1676                 # split up and insert constructed headlines
1677
1678                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1679                 $i = 0;
1680
1681                 foreach( $blocks as $block ) {
1682                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1683                             # This is the [edit] link that appears for the top block of text when
1684                                 # section editing is enabled
1685                                 $full .= $sk->editSectionLink(0);
1686                         }
1687                         $full .= $block;
1688                         if( $doShowToc && !$i) {
1689                         # Top anchor now in skin
1690                                 $full = $full.$toc;
1691                         }
1692
1693                         if( !empty( $head[$i] ) ) {
1694                                 $full .= $head[$i];
1695                         }
1696                         $i++;
1697                 }
1698
1699                 return $full;
1700         }
1701
1702         /* private */ function doMagicISBN( &$tokenizer )
1703         {
1704                 global $wgLang;
1705
1706                 # Check whether next token is a text token
1707                 # If yes, fetch it and convert the text into a
1708                 # Special::BookSources link
1709                 $token = $tokenizer->previewToken();
1710                 while ( $token["type"] == "" )
1711                 {
1712                         $tokenizer->nextToken();
1713                         $token = $tokenizer->previewToken();
1714                 }
1715                 if ( $token["type"] == "text" )
1716                 {
1717                         $token = $tokenizer->nextToken();
1718                         $x = $token["text"];
1719                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1720
1721                         $isbn = $blank = "" ;
1722                         while ( " " == $x{0} ) {
1723                                 $blank .= " ";
1724                                 $x = substr( $x, 1 );
1725                         }
1726                         while ( strstr( $valid, $x{0} ) != false ) {
1727                                 $isbn .= $x{0};
1728                                 $x = substr( $x, 1 );
1729                         }
1730                         $num = str_replace( "-", "", $isbn );
1731                         $num = str_replace( " ", "", $num );
1732
1733                         if ( "" == $num ) {
1734                                 $text = "ISBN $blank$x";
1735                         } else {
1736                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1737                                 $text = "<a href=\"" .
1738                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1739                                         "\" class=\"internal\">ISBN $isbn</a>";
1740                                 $text .= $x;
1741                         }
1742                 } else {
1743                         $text = "ISBN ";
1744                 }
1745                 return $text;
1746         }
1747         /* private */ function doMagicRFC( &$tokenizer )
1748         {
1749                 global $wgLang;
1750
1751                 # Check whether next token is a text token
1752                 # If yes, fetch it and convert the text into a
1753                 # link to an RFC source
1754                 $token = $tokenizer->previewToken();
1755                 while ( $token["type"] == "" )
1756                 {
1757                         $tokenizer->nextToken();
1758                         $token = $tokenizer->previewToken();
1759                 }
1760                 if ( $token["type"] == "text" )
1761                 {
1762                         $token = $tokenizer->nextToken();
1763                         $x = $token["text"];
1764                         $valid = "0123456789";
1765
1766                         $rfc = $blank = "" ;
1767                         while ( " " == $x{0} ) {
1768                                 $blank .= " ";
1769                                 $x = substr( $x, 1 );
1770                         }
1771                         while ( strstr( $valid, $x{0} ) != false ) {
1772                                 $rfc .= $x{0};
1773                                 $x = substr( $x, 1 );
1774                         }
1775
1776                         if ( "" == $rfc ) {
1777                                 $text .= "RFC $blank$x";
1778                         } else {
1779                                 $url = wfmsg( "rfcurl" );
1780                                 $url = str_replace( "$1", $rfc, $url);
1781                                 $sk =& $this->mOptions->getSkin();
1782                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1783                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1784                         }
1785                 } else {
1786                         $text = "RFC ";
1787                 }
1788                 return $text;
1789         }
1790
1791         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1792         {
1793                 $this->mOptions = $options;
1794                 $this->mTitle =& $title;
1795                 $this->mOutputType = OT_WIKI;
1796
1797                 if ( $clearState ) {
1798                         $this->clearState();
1799                 }
1800
1801                 $stripState = false;
1802                 $pairs = array(
1803                         "\r\n" => "\n",
1804                         );
1805                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1806                 // now with regexes
1807                 $pairs = array(
1808                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1809                         "/<br *?>/i" => "<br/>",
1810                 );
1811                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1812                 $text = $this->strip( $text, $stripState, false );
1813                 $text = $this->pstPass2( $text, $user );
1814                 $text = $this->unstrip( $text, $stripState );
1815                 return $text;
1816         }
1817
1818         /* private */ function pstPass2( $text, &$user )
1819         {
1820                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1821
1822                 # Variable replacement
1823                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1824                 $text = $this->replaceVariables( $text );
1825
1826                 # Signatures
1827                 #
1828                 $n = $user->getName();
1829                 $k = $user->getOption( "nickname" );
1830                 if ( "" == $k ) { $k = $n; }
1831                 if(isset($wgLocaltimezone)) {
1832                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1833                 }
1834                 /* Note: this is an ugly timezone hack for the European wikis */
1835                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1836                   " (" . date( "T" ) . ")";
1837                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1838
1839                 $text = preg_replace( "/~~~~~/", $d, $text );
1840                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1841                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1842                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1843                   Namespace::getUser() ) . ":$n|$k]]", $text );
1844
1845                 # Context links: [[|name]] and [[name (context)|]]
1846                 #
1847                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1848                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1849                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1850                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1851
1852                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1853                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1854                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1855                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1856                                                                                                                 # [[ns:page (cont)|]]
1857                 $context = "";
1858                 $t = $this->mTitle->getText();
1859                 if ( preg_match( $conpat, $t, $m ) ) {
1860                         $context = $m[2];
1861                 }
1862                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1863                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1864                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1865
1866                 if ( "" == $context ) {
1867                         $text = preg_replace( $p2, "[[\\1]]", $text );
1868                 } else {
1869                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1870                 }
1871
1872                 /*
1873                 $mw =& MagicWord::get( MAG_SUBST );
1874                 $wgCurParser = $this->fork();
1875                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1876                 $this->merge( $wgCurParser );
1877                 */
1878
1879                 # Trim trailing whitespace
1880                 # MAG_END (__END__) tag allows for trailing
1881                 # whitespace to be deliberately included
1882                 $text = rtrim( $text );
1883                 $mw =& MagicWord::get( MAG_END );
1884                 $mw->matchAndRemove( $text );
1885
1886                 return $text;
1887         }
1888
1889         # Set up some variables which are usually set up in parse()
1890         # so that an external function can call some class members with confidence
1891         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1892         {
1893                 $this->mTitle =& $title;
1894                 $this->mOptions = $options;
1895                 $this->mOutputType = $outputType;
1896                 if ( $clearState ) {
1897                         $this->clearState();
1898                 }
1899         }
1900
1901         function transformMsg( $text, $options ) {
1902                 global $wgTitle;
1903                 static $executing = false;
1904
1905                 # Guard against infinite recursion
1906                 if ( $executing ) {
1907                         return $text;
1908                 }
1909                 $executing = true;
1910
1911                 $this->mTitle = $wgTitle;
1912                 $this->mOptions = $options;
1913                 $this->mOutputType = OT_MSG;
1914                 $this->clearState();
1915                 $text = $this->replaceVariables( $text );
1916
1917                 $executing = false;
1918                 return $text;
1919         }
1920 }
1921
1922 class ParserOutput
1923 {
1924         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1925
1926         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1927                 $containsOldMagic = false )
1928         {
1929                 $this->mText = $text;
1930                 $this->mLanguageLinks = $languageLinks;
1931                 $this->mCategoryLinks = $categoryLinks;
1932                 $this->mContainsOldMagic = $containsOldMagic;
1933         }
1934
1935         function getText() { return $this->mText; }
1936         function getLanguageLinks() { return $this->mLanguageLinks; }
1937         function getCategoryLinks() { return $this->mCategoryLinks; }
1938         function containsOldMagic() { return $this->mContainsOldMagic; }
1939         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1940         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1941         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1942         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1943
1944         function merge( $other ) {
1945                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1946                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1947                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1948         }
1949
1950 }
1951
1952 class ParserOptions
1953 {
1954         # All variables are private
1955         var $mUseTeX;                    # Use texvc to expand <math> tags
1956         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1957         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1958         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1959         var $mAllowExternalImages;       # Allow external images inline
1960         var $mSkin;                      # Reference to the preferred skin
1961         var $mDateFormat;                # Date format index
1962         var $mEditSection;               # Create "edit section" links
1963         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1964         var $mNumberHeadings;            # Automatically number headings
1965         var $mShowToc;                   # Show table of contents
1966
1967         function getUseTeX() { return $this->mUseTeX; }
1968         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1969         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1970         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1971         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1972         function getSkin() { return $this->mSkin; }
1973         function getDateFormat() { return $this->mDateFormat; }
1974         function getEditSection() { return $this->mEditSection; }
1975         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1976         function getNumberHeadings() { return $this->mNumberHeadings; }
1977         function getShowToc() { return $this->mShowToc; }
1978
1979         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1980         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1981         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1982         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1983         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1984         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1985         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1986         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1987         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1988         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1989         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1990
1991         /* static */ function newFromUser( &$user )
1992         {
1993                 $popts = new ParserOptions;
1994                 $popts->initialiseFromUser( &$user );
1995                 return $popts;
1996         }
1997
1998         function initialiseFromUser( &$userInput )
1999         {
2000                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2001
2002                 if ( !$userInput ) {
2003                         $user = new User;
2004                         $user->setLoaded( true );
2005                 } else {
2006                         $user =& $userInput;
2007                 }
2008
2009                 $this->mUseTeX = $wgUseTeX;
2010                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2011                 $this->mUseDynamicDates = $wgUseDynamicDates;
2012                 $this->mInterwikiMagic = $wgInterwikiMagic;
2013                 $this->mAllowExternalImages = $wgAllowExternalImages;
2014                 $this->mSkin =& $user->getSkin();
2015                 $this->mDateFormat = $user->getOption( "date" );
2016                 $this->mEditSection = $user->getOption( "editsection" );
2017                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2018                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2019                 $this->mShowToc = $user->getOption( "showtoc" );
2020         }
2021
2022
2023 }
2024
2025 # Regex callbacks, used in Parser::replaceVariables
2026 function wfBraceSubstitution( $matches )
2027 {
2028         global $wgCurParser;
2029         return $wgCurParser->braceSubstitution( $matches );
2030 }
2031
2032 ?>