includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         # This method generates the list of subcategories and pages for a category
 260         function categoryMagic ()
 261         {
 262                 global $wgLang , $wgUser ;
 263                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 264
 265                 $cns = Namespace::getCategory() ;
 266                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 267
 268                 $r = "<br style=\"clear:both;\"/>\n";
 269
 270
 271                 $sk =& $wgUser->getSkin() ;
 272
 273                 $articles = array() ;
 274                 $children = array() ;
 275                 $data = array () ;
 276                 $id = $this->mTitle->getArticleID() ;
 277
 278                 # For existing categories
 279                 if( $id ) {
 280                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 281                         $res = wfQuery ( $sql, DB_READ ) ;
 282                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 283                 } else {
 284                         # For non-existing categories
 285                         $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
 286                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
 287                         $res = wfQuery ( $sql, DB_READ ) ;
 288                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 289                 }
 290
 291                 # For all pages that link to this category
 292                 foreach ( $data AS $x )
 293                 {
 294                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 295                         if ( $t != "" ) $t .= ":" ;
 296                         $t .= $x->cur_title ;
 297
 298                         if ( $x->cur_namespace == $cns ) {
 299                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 300                         } else {
 301                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 302                         }
 303                 }
 304                 wfFreeResult ( $res ) ;
 305
 306                 # Showing subcategories
 307                 if ( count ( $children ) > 0 )
 308                 {
 309                         asort ( $children ) ;
 310                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 311                         $r .= implode ( ", " , $children ) ;
 312                 }
 313
 314                 # Showing pages in this category
 315                 if ( count ( $articles ) > 0 )
 316                 {
 317                         $ti = $this->mTitle->getText() ;
 318                         asort ( $articles ) ;
 319                         $h =  wfMsg( "category_header", $ti );
 320                         $r .= "<h2>{$h}</h2>\n" ;
 321                         $r .= implode ( ", " , $articles ) ;
 322                 }
 323
 324
 325                 return $r ;
 326         }
 327
 328         function getHTMLattrs ()
 329         {
 330                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 331                                 "title", "align", "lang", "dir", "width", "height",
 332                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 333                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 334                                 /* FONT */ "type", "start", "value", "compact",
 335                                 /* For various lists, mostly deprecated but safe */
 336                                 "summary", "width", "border", "frame", "rules",
 337                                 "cellspacing", "cellpadding", "valign", "char",
 338                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 339                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 340                                 "id", "class", "name", "style" /* For CSS */
 341                                 );
 342                 return $htmlattrs ;
 343         }
 344
 345         function fixTagAttributes ( $t )
 346         {
 347                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 348                 $htmlattrs = $this->getHTMLattrs() ;
 349
 350                 # Strip non-approved attributes from the tag
 351                 $t = preg_replace(
 352                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 353                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 354                         $t);
 355                 # Strip javascript "expression" from stylesheets. Brute force approach:
 356                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 357
 358                 if( preg_match(
 359                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 360                         wfMungeToUtf8( $t ) ) )
 361                 {
 362                         $t="";
 363                 }
 364
 365                 return trim ( $t ) ;
 366         }
 367
 368         function doTableStuff ( $t )
 369         {
 370                 $t = explode ( "\n" , $t ) ;
 371                 $td = array () ; # Is currently a td tag open?
 372                         $ltd = array () ; # Was it TD or TH?
 373                         $tr = array () ; # Is currently a tr tag open?
 374                         $ltr = array () ; # tr attributes
 375                         foreach ( $t AS $k => $x )
 376                         {
 377                                 $x = trim ( $x ) ;
 378                                 $fc = substr ( $x , 0 , 1 ) ;
 379                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 380                                 {
 381                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 382                                         array_push ( $td , false ) ;
 383                                         array_push ( $ltd , "" ) ;
 384                                         array_push ( $tr , false ) ;
 385                                         array_push ( $ltr , "" ) ;
 386                                 }
 387                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 388                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 389                                 {
 390                                         $z = "</table>\n" ;
 391                                         $l = array_pop ( $ltd ) ;
 392                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 393                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 394                                         array_pop ( $ltr ) ;
 395                                         $t[$k] = $z ;
 396                                 }
 397                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 398                                                 {
 399                                                 $z = trim ( substr ( $x , 2 ) ) ;
 400                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 401                                                 }*/
 402                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 403                                 {
 404                                         $x = substr ( $x , 1 ) ;
 405                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 406                                         $z = "" ;
 407                                         $l = array_pop ( $ltd ) ;
 408                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 409                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 410                                         array_pop ( $ltr ) ;
 411                                         $t[$k] = $z ;
 412                                         array_push ( $tr , false ) ;
 413                                         array_push ( $td , false ) ;
 414                                         array_push ( $ltd , "" ) ;
 415                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 416                                 }
 417                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 418                                 {
 419                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 420                                         {
 421                                                 $fc = "+" ;
 422                                                 $x = substr ( $x , 1 ) ;
 423                                         }
 424                                         $after = substr ( $x , 1 ) ;
 425                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 426                                         $after = explode ( "||" , $after ) ;
 427                                         $t[$k] = "" ;
 428                                         foreach ( $after AS $theline )
 429                                         {
 430                                                 $z = "" ;
 431                                                 if ( $fc != "+" )
 432                                                 {
 433                                                         $tra = array_pop ( $ltr ) ;
 434                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 435                                                         array_push ( $tr , true ) ;
 436                                                         array_push ( $ltr , "" ) ;
 437                                                 }
 438
 439                                                 $l = array_pop ( $ltd ) ;
 440                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 441                                                 if ( $fc == "|" ) $l = "td" ;
 442                                                 else if ( $fc == "!" ) $l = "th" ;
 443                                                 else if ( $fc == "+" ) $l = "caption" ;
 444                                                 else $l = "" ;
 445                                                 array_push ( $ltd , $l ) ;
 446                                                 $y = explode ( "|" , $theline , 2 ) ;
 447                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 448                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 449                                                 $t[$k] .= $y ;
 450                                                 array_push ( $td , true ) ;
 451                                         }
 452                                 }
 453                         }
 454
 455                 # Closing open td, tr && table
 456                 while ( count ( $td ) > 0 )
 457                 {
 458                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 459                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 460                         $t[] = "</table>" ;
 461                 }
 462
 463                 $t = implode ( "\n" , $t ) ;
 464                 #               $t = $this->removeHTMLtags( $t );
 465                 return $t ;
 466         }
 467
 468         function internalParse( $text, $linestart, $args = array() )
 469         {
 470                 $fname = "Parser::internalParse";
 471                 wfProfileIn( $fname );
 472
 473                 $text = $this->removeHTMLtags( $text );
 474                 $text = $this->replaceVariables( $text, $args );
 475
 476                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 477
 478                 $text = $this->doHeadings( $text );
 479                 if($this->mOptions->getUseDynamicDates()) {
 480                         global $wgDateFormatter;
 481                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 482                 }
 483                 $text = $this->replaceExternalLinks( $text );
 484                 $text = $this->doTokenizedParser ( $text );
 485                 $text = $this->doTableStuff ( $text ) ;
 486                 $text = $this->formatHeadings( $text );
 487                 $sk =& $this->mOptions->getSkin();
 488                 $text = $sk->transformContent( $text );
 489
 490                 if ( !isset ( $this->categoryMagicDone ) ) {
 491                    $text .= $this->categoryMagic () ;
 492                    $this->categoryMagicDone = true ;
 493                    }
 494
 495                 wfProfileOut( $fname );
 496                 return $text;
 497         }
 498
 499
 500         /* private */ function doHeadings( $text )
 501         {
 502                 for ( $i = 6; $i >= 1; --$i ) {
 503                         $h = substr( "======", 0, $i );
 504                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 505                           "<h{$i}>\\1</h{$i}>\\2", $text );
 506                 }
 507                 return $text;
 508         }
 509
 510         # Note: we have to do external links before the internal ones,
 511         # and otherwise take great care in the order of things here, so
 512         # that we don't end up interpreting some URLs twice.
 513
 514         /* private */ function replaceExternalLinks( $text )
 515         {
 516                 $fname = "Parser::replaceExternalLinks";
 517                 wfProfileIn( $fname );
 518                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 519                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 520                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 521                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 522                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 523                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 524                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 525                 wfProfileOut( $fname );
 526                 return $text;
 527         }
 528
 529         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 530         {
 531                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 532                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 533
 534                 # this is  the list of separators that should be ignored if they
 535                 # are the last character of an URL but that should be included
 536                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 537                 # in this case, the last comma should not become part of the URL,
 538                 # but in "www.foo.com/123,2342,32.htm" it should.
 539                 $sep = ",;\.:";
 540                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 541                 $images = "gif|png|jpg|jpeg";
 542
 543                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 544                 # they are interpreted as part of the string (used to tell PHP
 545                 # that the content of the string should be inserted there).
 546                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 547                   "((?i){$images})([^{$uc}]|$)/";
 548
 549                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 550                 $sk =& $this->mOptions->getSkin();
 551
 552                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 553                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 554                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 555                 }
 556                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 557                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 558                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 559                   "</a>\\5", $s );
 560                 $s = str_replace( $unique, $protocol, $s );
 561
 562                 $a = explode( "[{$protocol}:", " " . $s );
 563                 $s = array_shift( $a );
 564                 $s = substr( $s, 1 );
 565
 566                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 567                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 568
 569                 foreach ( $a as $line ) {
 570                         if ( preg_match( $e1, $line, $m ) ) {
 571                                 $link = "{$protocol}:{$m[1]}";
 572                                 $trail = $m[2];
 573                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 574                                 else { $text = wfEscapeHTML( $link ); }
 575                         } else if ( preg_match( $e2, $line, $m ) ) {
 576                                 $link = "{$protocol}:{$m[1]}";
 577                                 $text = $m[2];
 578                                 $trail = $m[3];
 579                         } else {
 580                                 $s .= "[{$protocol}:" . $line;
 581                                 continue;
 582                         }
 583                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 584                                 $paren = "";
 585                         } else {
 586                                 # Expand the URL for printable version
 587                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 588                         }
 589                         $la = $sk->getExternalLinkAttributes( $link, $text );
 590                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 591
 592                 }
 593                 return $s;
 594         }
 595
 596         /* private */ function handle3Quotes( &$state, $token )
 597         {
 598                 if ( $state["strong"] !== false ) {
 599                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 600                         {
 601                                 # ''' lala ''lala '''
 602                                 $s = "</em></strong><em>";
 603                         } else {
 604                                 $s = "</strong>";
 605                         }
 606                         $state["strong"] = FALSE;
 607                 } else {
 608                         $s = "<strong>";
 609                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 610                 }
 611                 return $s;
 612         }
 613
 614         /* private */ function handle2Quotes( &$state, $token )
 615         {
 616                 if ( $state["em"] !== false ) {
 617                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 618                         {
 619                                 # ''lala'''lala'' ....'''
 620                                 $s = "</strong></em><strong>";
 621                         } else {
 622                                 $s = "</em>";
 623                         }
 624                         $state["em"] = FALSE;
 625                 } else {
 626                         $s = "<em>";
 627                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 628
 629                 }
 630                 return $s;
 631         }
 632
 633         /* private */ function handle5Quotes( &$state, $token )
 634         {
 635                 $s = "";
 636                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 637                         if ( $state["em"] < $state["strong"] ) {
 638                                 $s .= "</strong></em>";
 639                         } else {
 640                                 $s .= "</em></strong>";
 641                         }
 642                         $state["strong"] = $state["em"] = FALSE;
 643                 } elseif ( $state["em"] !== false ) {
 644                         $s .= "</em><strong>";
 645                         $state["em"] = FALSE;
 646                         $state["strong"] = $token["pos"];
 647                 } elseif ( $state["strong"] !== false ) {
 648                         $s .= "</strong><em>";
 649                         $state["strong"] = FALSE;
 650                         $state["em"] = $token["pos"];
 651                 } else { # not $em and not $strong
 652                         $s .= "<strong><em>";
 653                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 654                 }
 655                 return $s;
 656         }
 657
 658         /* private */ function doTokenizedParser( $str )
 659         {
 660                 global $wgLang; # for language specific parser hook
 661
 662                 $tokenizer=Tokenizer::newFromString( $str );
 663                 $tokenStack = array();
 664
 665                 $s="";
 666                 $state["em"]      = FALSE;
 667                 $state["strong"]  = FALSE;
 668                 $tagIsOpen = FALSE;
 669                 $threeopen = false;
 670
 671                 # The tokenizer splits the text into tokens and returns them one by one.
 672                 # Every call to the tokenizer returns a new token.
 673                 while ( $token = $tokenizer->nextToken() )
 674                 {
 675                         switch ( $token["type"] )
 676                         {
 677                                 case "text":
 678                                         # simple text with no further markup
 679                                         $txt = $token["text"];
 680                                         break;
 681                                 case "[[[":
 682                                         # remember the tag opened with 3 [
 683                                         $threeopen = true;
 684                                 case "[[":
 685                                         # link opening tag.
 686                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 687                                         $tagIsOpen = TRUE;
 688                                         array_push( $tokenStack, $token );
 689                                         $txt="";
 690                                         break;
 691
 692                                 case "]]]":
 693                                 case "]]":
 694                                         # link close tag.
 695                                         # get text from stack, glue it together, and call the code to handle a
 696                                         # link
 697
 698                                         if ( count( $tokenStack ) == 0 )
 699                                         {
 700                                                 # stack empty. Found a ]] without an opening [[
 701                                                 $txt = "]]";
 702                                         } else {
 703                                                 $linkText = "";
 704                                                 $lastToken = array_pop( $tokenStack );
 705                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 706                                                 {
 707                                                         if( !empty( $lastToken["text"] ) ) {
 708                                                                 $linkText = $lastToken["text"] . $linkText;
 709                                                         }
 710                                                         $lastToken = array_pop( $tokenStack );
 711                                                 }
 712
 713                                                 $txt = $linkText ."]]";
 714
 715                                                 if( isset( $lastToken["text"] ) ) {
 716                                                         $prefix = $lastToken["text"];
 717                                                 } else {
 718                                                         $prefix = "";
 719                                                 }
 720                                                 $nextToken = $tokenizer->previewToken();
 721                                                 if ( $nextToken["type"] == "text" )
 722                                                 {
 723                                                         # Preview just looks at it. Now we have to fetch it.
 724                                                         $nextToken = $tokenizer->nextToken();
 725                                                         $txt .= $nextToken["text"];
 726                                                 }
 727                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 728
 729                                                 # did the tag start with 3 [ ?
 730                                                 if($threeopen) {
 731                                                         # show the first as text
 732                                                         $txt = "[".$txt;
 733                                                         $threeopen=false;
 734                                                 }
 735
 736                                         }
 737                                         $tagIsOpen = (count( $tokenStack ) != 0);
 738                                         break;
 739                                 case "----":
 740                                         $txt = "\n<hr />\n";
 741                                         break;
 742                                 case "'''":
 743                                         # This and the three next ones handle quotes
 744                                         $txt = $this->handle3Quotes( $state, $token );
 745                                         break;
 746                                 case "''":
 747                                         $txt = $this->handle2Quotes( $state, $token );
 748                                         break;
 749                                 case "'''''":
 750                                         $txt = $this->handle5Quotes( $state, $token );
 751                                         break;
 752                                 case "":
 753                                         # empty token
 754                                         $txt="";
 755                                         break;
 756                                 case "RFC ":
 757                                         if ( $tagIsOpen ) {
 758                                                 $txt = "RFC ";
 759                                         } else {
 760                                                 $txt = $this->doMagicRFC( $tokenizer );
 761                                         }
 762                                         break;
 763                                 case "ISBN ":
 764                                         if ( $tagIsOpen ) {
 765                                                 $txt = "ISBN ";
 766                                         } else {
 767                                                 $txt = $this->doMagicISBN( $tokenizer );
 768                                         }
 769                                         break;
 770                                 default:
 771                                         # Call language specific Hook.
 772                                         $txt = $wgLang->processToken( $token, $tokenStack );
 773                                         if ( NULL == $txt ) {
 774                                                 # An unkown token. Highlight.
 775                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 776                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 777                                         }
 778                                         break;
 779                         }
 780                         # If we're parsing the interior of a link, don't append the interior to $s,
 781                         # but push it to the stack so it can be processed when a ]] token is found.
 782                         if ( $tagIsOpen  && $txt != "" ) {
 783                                 $token["type"] = "text";
 784                                 $token["text"] = $txt;
 785                                 array_push( $tokenStack, $token );
 786                         } else {
 787                                 $s .= $txt;
 788                         }
 789                 } #end while
 790                 if ( count( $tokenStack ) != 0 )
 791                 {
 792                         # still objects on stack. opened [[ tag without closing ]] tag.
 793                         $txt = "";
 794                         while ( $lastToken = array_pop( $tokenStack ) )
 795                         {
 796                                 if ( $lastToken["type"] == "text" )
 797                                 {
 798                                         $txt = $lastToken["text"] . $txt;
 799                                 } else {
 800                                         $txt = $lastToken["type"] . $txt;
 801                                 }
 802                         }
 803                         $s .= $txt;
 804                 }
 805                 return $s;
 806         }
 807
 808         /* private */ function handleInternalLink( $line, $prefix )
 809         {
 810                 global $wgLang, $wgLinkCache;
 811                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 812                 static $fname = "Parser::handleInternalLink" ;
 813                 wfProfileIn( $fname );
 814
 815                 wfProfileIn( "$fname-setup" );
 816                 static $tc = FALSE;
 817                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 818                 $sk =& $this->mOptions->getSkin();
 819
 820                 # Match a link having the form [[namespace:link|alternate]]trail
 821                 static $e1 = FALSE;
 822                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 823                 # Match the end of a line for a word that's not followed by whitespace,
 824                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 825                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 826                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 827                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 828
 829
 830                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 831                 static $image = FALSE;
 832                 static $special = FALSE;
 833                 static $media = FALSE;
 834                 static $category = FALSE;
 835                 if ( !$image ) { $image = Namespace::getImage(); }
 836                 if ( !$special ) { $special = Namespace::getSpecial(); }
 837                 if ( !$media ) { $media = Namespace::getMedia(); }
 838                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 839
 840                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 841
 842                 wfProfileOut( "$fname-setup" );
 843                 $s = "";
 844
 845                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 846                         $text = $m[2];
 847                         $trail = $m[3];
 848                 } else { # Invalid form; output directly
 849                         $s .= $prefix . "[[" . $line ;
 850                         return $s;
 851                 }
 852
 853                 /* Valid link forms:
 854                 Foobar -- normal
 855                 :Foobar -- override special treatment of prefix (images, language links)
 856                 /Foobar -- convert to CurrentPage/Foobar
 857                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 858                 */
 859                 $c = substr($m[1],0,1);
 860                 $noforce = ($c != ":");
 861                 if( $c == "/" ) { # subpage
 862                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 863                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 864                                 $noslash=$m[1];
 865                         } else {
 866                                 $noslash=substr($m[1],1);
 867                         }
 868                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 869                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 870                                 if( "" == $text ) {
 871                                         $text= $m[1];
 872                                 } # this might be changed for ugliness reasons
 873                         } else {
 874                                 $link = $noslash; # no subpage allowed, use standard link
 875                         }
 876                 } elseif( $noforce ) { # no subpage
 877                         $link = $m[1];
 878                 } else {
 879                         $link = substr( $m[1], 1 );
 880                 }
 881                 if( "" == $text )
 882                         $text = $link;
 883
 884                 $nt = Title::newFromText( $link );
 885                 if( !$nt ) {
 886                         $s .= $prefix . "[[" . $line;
 887                         return $s;
 888                 }
 889                 $ns = $nt->getNamespace();
 890                 $iw = $nt->getInterWiki();
 891                 if( $noforce ) {
 892                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 893                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 894                                 return (trim($s) == '')? '': $s;
 895                         }
 896                         if( $ns == $image ) {
 897                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 898                                 $wgLinkCache->addImageLinkObj( $nt );
 899                                 return $s;
 900                         }
 901                         if ( $ns == $category ) {
 902                                 $t = $nt->getText() ;
 903                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 904                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 905                                 $this->mOutput->mCategoryLinks[] = $t ;
 906                                 $s .= $prefix . $trail ;
 907                                 return $s ;
 908                         }
 909                 }
 910                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 911                     ( strpos( $link, "#" ) == FALSE ) ) {
 912                         # Self-links are handled specially; generally de-link and change to bold.
 913                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 914                         return $s;
 915                 }
 916
 917                 if( $ns == $media ) {
 918                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 919                         $wgLinkCache->addImageLinkObj( $nt );
 920                         return $s;
 921                 } elseif( $ns == $special ) {
 922                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 923                         return $s;
 924                 }
 925                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 926
 927                 wfProfileOut( $fname );
 928                 return $s;
 929         }
 930
 931         # Some functions here used by doBlockLevels()
 932         #
 933         /* private */ function closeParagraph()
 934         {
 935                 $result = "";
 936                 if ( '' != $this->mLastSection ) {
 937                         $result = "</" . $this->mLastSection  . ">\n";
 938                 }
 939                 $this->mInPre = false;
 940                 $this->mLastSection = "";
 941                 return $result;
 942         }
 943         # getCommon() returns the length of the longest common substring
 944         # of both arguments, starting at the beginning of both.
 945         #
 946         /* private */ function getCommon( $st1, $st2 )
 947         {
 948                 $fl = strlen( $st1 );
 949                 $shorter = strlen( $st2 );
 950                 if ( $fl < $shorter ) { $shorter = $fl; }
 951
 952                 for ( $i = 0; $i < $shorter; ++$i ) {
 953                         if ( $st1{$i} != $st2{$i} ) { break; }
 954                 }
 955                 return $i;
 956         }
 957         # These next three functions open, continue, and close the list
 958         # element appropriate to the prefix character passed into them.
 959         #
 960         /* private */ function openList( $char )
 961     {
 962                 $result = $this->closeParagraph();
 963
 964                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 965                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 966                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 967                 else if ( ";" == $char ) {
 968                         $result .= "<dl><dt>";
 969                         $this->mDTopen = true;
 970                 }
 971                 else { $result = "<!-- ERR 1 -->"; }
 972
 973                 return $result;
 974         }
 975
 976         /* private */ function nextItem( $char )
 977         {
 978                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 979                 else if ( ":" == $char || ";" == $char ) {
 980                         $close = "</dd>";
 981                         if ( $this->mDTopen ) { $close = "</dt>"; }
 982                         if ( ";" == $char ) {
 983                                 $this->mDTopen = true;
 984                                 return $close . "<dt>";
 985                         } else {
 986                                 $this->mDTopen = false;
 987                                 return $close . "<dd>";
 988                         }
 989                 }
 990                 return "<!-- ERR 2 -->";
 991         }
 992
 993         /* private */function closeList( $char )
 994         {
 995                 if ( "*" == $char ) { $text = "</li></ul>"; }
 996                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 997                 else if ( ":" == $char ) {
 998                         if ( $this->mDTopen ) {
 999                                 $this->mDTopen = false;
1000                                 $text = "</dt></dl>";
1001                         } else {
1002                                 $text = "</dd></dl>";
1003                         }
1004                 }
1005                 else {  return "<!-- ERR 3 -->"; }
1006                 return $text."\n";
1007         }
1008
1009         /* private */ function doBlockLevels( $text, $linestart )
1010         {
1011                 $fname = "Parser::doBlockLevels";
1012                 wfProfileIn( $fname );
1013                 # Parsing through the text line by line.  The main thing
1014                 # happening here is handling of block-level elements p, pre,
1015                 # and making lists from lines starting with * # : etc.
1016                 #
1017                 $a = explode( "\n", $text );
1018
1019                 $lastPref = $text = $lastLine = '';
1020                 $this->mDTopen = $inBlockElem = false;
1021                 $npl = 0;
1022                 $pstack = false;
1023
1024                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1025                 foreach ( $a as $t ) {
1026                         $oLine = $t;
1027                         $opl = strlen( $lastPref );
1028                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1029                         $preOpenMatch = preg_match("/<pre/i", $t );
1030                         if (!$this->mInPre) {
1031                                 $this->mInPre = !empty($preOpenMatch);
1032                         }
1033                         if ( !$this->mInPre ) {
1034                                 $npl = strspn( $t, "*#:;" );
1035                                 $pref = substr( $t, 0, $npl );
1036                                 $pref2 = str_replace( ";", ":", $pref );
1037                                 $t = substr( $t, $npl );
1038                         } else {
1039                                 $npl = 0;
1040                                 $pref = $pref2 = '';
1041                         }
1042
1043                         // list generation
1044                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1045                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1046                                 if ( $pstack ) { $pstack = false; }
1047
1048                                 if ( ";" == substr( $pref, -1 ) ) {
1049                                         $cpos = strpos( $t, ":" );
1050                                         if ( false !== $cpos ) {
1051                                                 $term = substr( $t, 0, $cpos );
1052                                                 $text .= $term . $this->nextItem( ":" );
1053                                                 $t = substr( $t, $cpos + 1 );
1054                                         }
1055                                 }
1056                         } else if (0 != $npl || 0 != $opl) {
1057                                 $cpl = $this->getCommon( $pref, $lastPref );
1058                                 if ( $pstack ) { $pstack = false; }
1059
1060                                 while ( $cpl < $opl ) {
1061                                         $text .= $this->closeList( $lastPref{$opl-1} );
1062                                         --$opl;
1063                                 }
1064                                 if ( $npl <= $cpl && $cpl > 0 ) {
1065                                         $text .= $this->nextItem( $pref{$cpl-1} );
1066                                 }
1067                                 while ( $npl > $cpl ) {
1068                                         $char = substr( $pref, $cpl, 1 );
1069                                         $text .= $this->openList( $char );
1070
1071                                         if ( ";" == $char ) {
1072                                                 $cpos = strpos( $t, ":" );
1073                                                 if ( ! ( false === $cpos ) ) {
1074                                                         $term = substr( $t, 0, $cpos );
1075                                                         $text .= $term . $this->nextItem( ":" );
1076                                                         $t = substr( $t, $cpos + 1 );
1077                                                 }
1078                                         }
1079                                         ++$cpl;
1080                                 }
1081                                 $lastPref = $pref2;
1082                         }
1083                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1084                                 $uniq_prefix = UNIQ_PREFIX;
1085                                 // XXX: use a stack for nestable elements like span, table and div
1086                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1087                                 $closematch = preg_match(
1088                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1089                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1090                                 if ( $openmatch or $closematch ) {
1091                                         if ( $pstack ) { $pstack = false; }
1092                                         $text .= $this->closeParagraph();
1093                                         if($preOpenMatch and !$preCloseMatch) {
1094                                                 $this->mInPre = true;
1095                                         }
1096                                         if ( $closematch  ) {
1097                                                 $inBlockElem = false;
1098                                         } else {
1099                                                 $inBlockElem = true;
1100                                         }
1101                                 } else if ( !$inBlockElem ) {
1102                                         if ( " " == $t{0} ) {
1103                                                 // pre
1104                                                 if ($this->mLastSection != 'pre') {
1105                                                         $pstack = false;
1106                                                         $text .= $this->closeParagraph().'<pre>';
1107                                                         $this->mLastSection = 'pre';
1108                                                 }
1109                                         } else {
1110                                                 // paragraph
1111                                                 if ( '' == trim($t) ) {
1112                                                         if ( $pstack ) {
1113                                                                 $text .= $pstack.'<br/>';
1114                                                                 $pstack = false;
1115                                                                 $this->mLastSection = 'p';
1116                                                         } else {
1117                                                                 if ($this->mLastSection != 'p' ) {
1118                                                                         $text .= $this->closeParagraph();
1119                                                                         $this->mLastSection = '';
1120                                                                         $pstack = "<p>";
1121                                                                 } else {
1122                                                                         $pstack = '</p><p>';
1123                                                                 }
1124                                                         }
1125                                                 } else {
1126                                                         if ( $pstack ) {
1127                                                                 $text .= $pstack;
1128                                                                 $pstack = false;
1129                                                                 $this->mLastSection = 'p';
1130                                                         } else if ($this->mLastSection != 'p') {
1131                                                                 $text .= $this->closeParagraph().'<p>';
1132                                                                 $this->mLastSection = 'p';
1133                                                         }
1134                                                 }
1135                                         }
1136                                 }
1137                         }
1138                         if ($pstack === false) {
1139                                 $text .= $t."\n";
1140                         }
1141                 }
1142                 while ( $npl ) {
1143                         $text .= $this->closeList( $pref2{$npl-1} );
1144                         --$npl;
1145                 }
1146                 if ( "" != $this->mLastSection ) {
1147                         $text .= "</" . $this->mLastSection . ">";
1148                         $this->mLastSection = "";
1149                 }
1150
1151                 wfProfileOut( $fname );
1152                 return $text;
1153         }
1154
1155         function getVariableValue( $index ) {
1156                 global $wgLang, $wgSitename, $wgServer;
1157
1158                 switch ( $index ) {
1159                         case MAG_CURRENTMONTH:
1160                                 return date( "m" );
1161                         case MAG_CURRENTMONTHNAME:
1162                                 return $wgLang->getMonthName( date("n") );
1163                         case MAG_CURRENTMONTHNAMEGEN:
1164                                 return $wgLang->getMonthNameGen( date("n") );
1165                         case MAG_CURRENTDAY:
1166                                 return date("j");
1167                         case MAG_CURRENTDAYNAME:
1168                                 return $wgLang->getWeekdayName( date("w")+1 );
1169                         case MAG_CURRENTYEAR:
1170                                 return date( "Y" );
1171                         case MAG_CURRENTTIME:
1172                                 return $wgLang->time( wfTimestampNow(), false );
1173                         case MAG_NUMBEROFARTICLES:
1174                                 return wfNumberOfArticles();
1175                         case MAG_SITENAME:
1176                                 return $wgSitename;
1177                         case MAG_SERVER:
1178                                 return $wgServer;
1179                         default:
1180                                 return NULL;
1181                 }
1182         }
1183
1184         function initialiseVariables()
1185         {
1186                 global $wgVariableIDs;
1187                 $this->mVariables = array();
1188                 foreach ( $wgVariableIDs as $id ) {
1189                         $mw =& MagicWord::get( $id );
1190                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1191                 }
1192         }
1193
1194         /* private */ function replaceVariables( $text, $args = array() )
1195         {
1196                 global $wgLang, $wgScript, $wgArticlePath;
1197
1198                 $fname = "Parser::replaceVariables";
1199                 wfProfileIn( $fname );
1200
1201                 $bail = false;
1202                 if ( !$this->mVariables ) {
1203                         $this->initialiseVariables();
1204                 }
1205                 $titleChars = Title::legalChars();
1206                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1207
1208                 # This function is called recursively. To keep track of arguments we need a stack:
1209                 array_push( $this->mArgStack, $args );
1210
1211                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1212                 $GLOBALS['wgCurParser'] =& $this;
1213                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1214
1215                 array_pop( $this->mArgStack );
1216
1217                 return $text;
1218         }
1219
1220         function braceSubstitution( $matches )
1221         {
1222                 global $wgLinkCache, $wgLang;
1223                 $fname = "Parser::braceSubstitution";
1224                 $found = false;
1225                 $nowiki = false;
1226                 $title = NULL;
1227
1228                 # $newline is an optional newline character before the braces
1229                 # $part1 is the bit before the first |, and must contain only title characters
1230                 # $args is a list of arguments, starting from index 0, not including $part1
1231
1232                 $newline = $matches[1];
1233                 $part1 = $matches[2];
1234                 # If the third subpattern matched anything, it will start with |
1235                 if ( $matches[3] !== "" ) {
1236                         $args = explode( "|", substr( $matches[3], 1 ) );
1237                 } else {
1238                         $args = array();
1239                 }
1240                 $argc = count( $args );
1241
1242                 # SUBST
1243                 $mwSubst =& MagicWord::get( MAG_SUBST );
1244                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1245                         if ( $this->mOutputType != OT_WIKI ) {
1246                                 # Invalid SUBST not replaced at PST time
1247                                 # Return without further processing
1248                                 $text = $matches[0];
1249                                 $found = true;
1250                         }
1251                 } elseif ( $this->mOutputType == OT_WIKI ) {
1252                         # SUBST not found in PST pass, do nothing
1253                         $text = $matches[0];
1254                         $found = true;
1255                 }
1256
1257                 # MSG, MSGNW and INT
1258                 if ( !$found ) {
1259                         # Check for MSGNW:
1260                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1261                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1262                                 $nowiki = true;
1263                         } else {
1264                                 # Remove obsolete MSG:
1265                                 $mwMsg =& MagicWord::get( MAG_MSG );
1266                                 $mwMsg->matchStartAndRemove( $part1 );
1267                         }
1268
1269                         # Check if it is an internal message
1270                         $mwInt =& MagicWord::get( MAG_INT );
1271                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1272                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1273                                         $text = wfMsgReal( $part1, $args, true );
1274                                         $found = true;
1275                                 }
1276                         }
1277                 }
1278
1279                 # NS
1280                 if ( !$found ) {
1281                         # Check for NS: (namespace expansion)
1282                         $mwNs = MagicWord::get( MAG_NS );
1283                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1284                                 if ( intval( $part1 ) ) {
1285                                         $text = $wgLang->getNsText( intval( $part1 ) );
1286                                         $found = true;
1287                                 } else {
1288                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1289                                         if ( !is_null( $index ) ) {
1290                                                 $text = $wgLang->getNsText( $index );
1291                                                 $found = true;
1292                                         }
1293                                 }
1294                         }
1295                 }
1296
1297                 # LOCALURL and LOCALURLE
1298                 if ( !$found ) {
1299                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1300                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1301
1302                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1303                                 $func = 'getLocalURL';
1304                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1305                                 $func = 'escapeLocalURL';
1306                         } else {
1307                                 $func = '';
1308                         }
1309
1310                         if ( $func !== '' ) {
1311                                 $title = Title::newFromText( $part1 );
1312                                 if ( !is_null( $title ) ) {
1313                                         if ( $argc > 0 ) {
1314                                                 $text = $title->$func( $args[0] );
1315                                         } else {
1316                                                 $text = $title->$func();
1317                                         }
1318                                         $found = true;
1319                                 }
1320                         }
1321                 }
1322
1323                 # Internal variables
1324                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1325                         $text = $this->mVariables[$part1];
1326                         $found = true;
1327                         $this->mOutput->mContainsOldMagic = true;
1328                 }
1329
1330                 # Arguments input from the caller
1331                 $inputArgs = end( $this->mArgStack );
1332                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1333                         $text = $inputArgs[$part1];
1334                         $found = true;
1335                 }
1336
1337                 # Load from database
1338                 if ( !$found ) {
1339                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1340                         if ( !is_null( $title ) && !$title->isExternal() ) {
1341                                 # Check for excessive inclusion
1342                                 $dbk = $title->getPrefixedDBkey();
1343                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1344                                         $article = new Article( $title );
1345                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1346                                         if ( $articleContent !== false ) {
1347                                                 $found = true;
1348                                                 $text = $articleContent;
1349
1350                                         }
1351                                 }
1352
1353                                 # If the title is valid but undisplayable, make a link to it
1354                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1355                                         $text = "[[" . $title->getPrefixedText() . "]]";
1356                                         $found = true;
1357                                 }
1358                         }
1359                 }
1360
1361                 # Recursive parsing, escaping and link table handling
1362                 # Only for HTML output
1363                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1364                         $text = wfEscapeWikiText( $text );
1365                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1366                         # Clean up argument array
1367                         $assocArgs = array();
1368                         $index = 1;
1369                         foreach( $args as $arg ) {
1370                                 $eqpos = strpos( $arg, "=" );
1371                                 if ( $eqpos === false ) {
1372                                         $assocArgs[$index++] = $arg;
1373                                 } else {
1374                                         $name = trim( substr( $arg, 0, $eqpos ) );
1375                                         $value = trim( substr( $arg, $eqpos+1 ) );
1376                                         if ( $value === false ) {
1377                                                 $value = "";
1378                                         }
1379                                         if ( $name !== false ) {
1380                                                 $assocArgs[$name] = $value;
1381                                         }
1382                                 }
1383                         }
1384
1385                         # Do not enter included links in link table
1386                         if ( !is_null( $title ) ) {
1387                                 $wgLinkCache->suspend();
1388                         }
1389
1390                         # Run full parser on the included text
1391                         $text = $this->strip( $text, $this->mStripState );
1392                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1393
1394                         # Add the result to the strip state for re-inclusion after
1395                         # the rest of the processing
1396                         $text = $this->insertStripItem( $text, $this->mStripState );
1397
1398                         # Resume the link cache and register the inclusion as a link
1399                         if ( !is_null( $title ) ) {
1400                                 $wgLinkCache->resume();
1401                                 $wgLinkCache->addLinkObj( $title );
1402                         }
1403                 }
1404
1405                 if ( !$found ) {
1406                         return $matches[0];
1407                 } else {
1408                         return $newline . $text;
1409                 }
1410         }
1411
1412         # Returns true if the function is allowed to include this entity
1413         function incrementIncludeCount( $dbk )
1414         {
1415                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1416                         $this->mIncludeCount[$dbk] = 0;
1417                 }
1418                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1419                         return true;
1420                 } else {
1421                         return false;
1422                 }
1423         }
1424
1425
1426         # Cleans up HTML, removes dangerous tags and attributes
1427         /* private */ function removeHTMLtags( $text )
1428         {
1429                 $fname = "Parser::removeHTMLtags";
1430                 wfProfileIn( $fname );
1431                 $htmlpairs = array( # Tags that must be closed
1432                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1433                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1434                         "strike", "strong", "tt", "var", "div", "center",
1435                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1436                         "ruby", "rt" , "rb" , "rp", "p"
1437                 );
1438                 $htmlsingle = array(
1439                         "br", "hr", "li", "dt", "dd"
1440                 );
1441                 $htmlnest = array( # Tags that can be nested--??
1442                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1443                         "dl", "font", "big", "small", "sub", "sup"
1444                 );
1445                 $tabletags = array( # Can only appear inside table
1446                         "td", "th", "tr"
1447                 );
1448
1449                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1450                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1451
1452                 $htmlattrs = $this->getHTMLattrs () ;
1453
1454                 # Remove HTML comments
1455                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1456
1457                 $bits = explode( "<", $text );
1458                 $text = array_shift( $bits );
1459                 $tagstack = array(); $tablestack = array();
1460
1461                 foreach ( $bits as $x ) {
1462                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1463                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1464                           $x, $regs );
1465                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1466                         error_reporting( $prev );
1467
1468                         $badtag = 0 ;
1469                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1470                                 # Check our stack
1471                                 if ( $slash ) {
1472                                         # Closing a tag...
1473                                         if ( ! in_array( $t, $htmlsingle ) &&
1474                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1475                                                 array_push( $tagstack, $ot );
1476                                                 $badtag = 1;
1477                                         } else {
1478                                                 if ( $t == "table" ) {
1479                                                         $tagstack = array_pop( $tablestack );
1480                                                 }
1481                                                 $newparams = "";
1482                                         }
1483                                 } else {
1484                                         # Keep track for later
1485                                         if ( in_array( $t, $tabletags ) &&
1486                                           ! in_array( "table", $tagstack ) ) {
1487                                                 $badtag = 1;
1488                                         } else if ( in_array( $t, $tagstack ) &&
1489                                           ! in_array ( $t , $htmlnest ) ) {
1490                                                 $badtag = 1 ;
1491                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1492                                                 if ( $t == "table" ) {
1493                                                         array_push( $tablestack, $tagstack );
1494                                                         $tagstack = array();
1495                                                 }
1496                                                 array_push( $tagstack, $t );
1497                                         }
1498                                         # Strip non-approved attributes from the tag
1499                                         $newparams = $this->fixTagAttributes($params);
1500
1501                                 }
1502                                 if ( ! $badtag ) {
1503                                         $rest = str_replace( ">", "&gt;", $rest );
1504                                         $text .= "<$slash$t $newparams$brace$rest";
1505                                         continue;
1506                                 }
1507                         }
1508                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1509                 }
1510                 # Close off any remaining tags
1511                 while ( $t = array_pop( $tagstack ) ) {
1512                         $text .= "</$t>\n";
1513                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1514                 }
1515                 wfProfileOut( $fname );
1516                 return $text;
1517         }
1518
1519 /*
1520  *
1521  * This function accomplishes several tasks:
1522  * 1) Auto-number headings if that option is enabled
1523  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1524  * 3) Add a Table of contents on the top for users who have enabled the option
1525  * 4) Auto-anchor headings
1526  *
1527  * It loops through all headlines, collects the necessary data, then splits up the
1528  * string and re-inserts the newly formatted headlines.
1529  *
1530  */
1531
1532         /* private */ function formatHeadings( $text )
1533         {
1534                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1535                 $doShowToc = $this->mOptions->getShowToc();
1536                 if( !$this->mTitle->userCanEdit() ) {
1537                         $showEditLink = 0;
1538                         $rightClickHack = 0;
1539                 } else {
1540                         $showEditLink = $this->mOptions->getEditSection();
1541                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1542                 }
1543
1544                 # Inhibit editsection links if requested in the page
1545                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1546                 if( $esw->matchAndRemove( $text ) ) {
1547                         $showEditLink = 0;
1548                 }
1549                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1550                 # do not add TOC
1551                 $mw =& MagicWord::get( MAG_NOTOC );
1552                 if( $mw->matchAndRemove( $text ) ) {
1553                         $doShowToc = 0;
1554                 }
1555
1556                 # never add the TOC to the Main Page. This is an entry page that should not
1557                 # be more than 1-2 screens large anyway
1558                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1559                         $doShowToc = 0;
1560                 }
1561
1562                 # Get all headlines for numbering them and adding funky stuff like [edit]
1563                 # links - this is for later, but we need the number of headlines right now
1564                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1565
1566                 # if there are fewer than 4 headlines in the article, do not show TOC
1567                 if( $numMatches < 4 ) {
1568                         $doShowToc = 0;
1569                 }
1570
1571                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1572                 # override above conditions and always show TOC
1573                 $mw =& MagicWord::get( MAG_FORCETOC );
1574                 if ($mw->matchAndRemove( $text ) ) {
1575                         $doShowToc = 1;
1576                 }
1577
1578
1579                 # We need this to perform operations on the HTML
1580                 $sk =& $this->mOptions->getSkin();
1581
1582                 # headline counter
1583                 $headlineCount = 0;
1584
1585                 # Ugh .. the TOC should have neat indentation levels which can be
1586                 # passed to the skin functions. These are determined here
1587                 $toclevel = 0;
1588                 $toc = "";
1589                 $full = "";
1590                 $head = array();
1591                 $sublevelCount = array();
1592                 $level = 0;
1593                 $prevlevel = 0;
1594                 foreach( $matches[3] as $headline ) {
1595                         $numbering = "";
1596                         if( $level ) {
1597                                 $prevlevel = $level;
1598                         }
1599                         $level = $matches[1][$headlineCount];
1600                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1601                                 # reset when we enter a new level
1602                                 $sublevelCount[$level] = 0;
1603                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1604                                 $toclevel += $level - $prevlevel;
1605                         }
1606                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1607                                 # reset when we step back a level
1608                                 $sublevelCount[$level+1]=0;
1609                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1610                                 $toclevel -= $prevlevel - $level;
1611                         }
1612                         # count number of headlines for each level
1613                         @$sublevelCount[$level]++;
1614                         if( $doNumberHeadings || $doShowToc ) {
1615                                 $dot = 0;
1616                                 for( $i = 1; $i <= $level; $i++ ) {
1617                                         if( !empty( $sublevelCount[$i] ) ) {
1618                                                 if( $dot ) {
1619                                                         $numbering .= ".";
1620                                                 }
1621                                                 $numbering .= $sublevelCount[$i];
1622                                                 $dot = 1;
1623                                         }
1624                                 }
1625                         }
1626
1627                         # The canonized header is a version of the header text safe to use for links
1628                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1629                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1630
1631                         # strip out HTML
1632                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1633                         $tocline = trim( $canonized_headline );
1634                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1635                         $refer[$headlineCount] = $canonized_headline;
1636
1637                         # count how many in assoc. array so we can track dupes in anchors
1638                         @$refers[$canonized_headline]++;
1639                         $refcount[$headlineCount]=$refers[$canonized_headline];
1640
1641                         # Prepend the number to the heading text
1642
1643                         if( $doNumberHeadings || $doShowToc ) {
1644                                 $tocline = $numbering . " " . $tocline;
1645
1646                                 # Don't number the heading if it is the only one (looks silly)
1647                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1648                                         # the two are different if the line contains a link
1649                                         $headline=$numbering . " " . $headline;
1650                                 }
1651                         }
1652
1653                         # Create the anchor for linking from the TOC to the section
1654                         $anchor = $canonized_headline;
1655                         if($refcount[$headlineCount] > 1 ) {
1656                                 $anchor .= "_" . $refcount[$headlineCount];
1657                         }
1658                         if( $doShowToc ) {
1659                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1660                         }
1661                         if( $showEditLink ) {
1662                                 if ( empty( $head[$headlineCount] ) ) {
1663                                         $head[$headlineCount] = "";
1664                                 }
1665                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1666                         }
1667
1668                         # Add the edit section span
1669                         if( $rightClickHack ) {
1670                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1671                         }
1672
1673                         # give headline the correct <h#> tag
1674                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1675
1676                         $headlineCount++;
1677                 }
1678
1679                 if( $doShowToc ) {
1680                         $toclines = $headlineCount;
1681                         $toc .= $sk->tocUnindent( $toclevel );
1682                         $toc = $sk->tocTable( $toc );
1683                 }
1684
1685                 # split up and insert constructed headlines
1686
1687                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1688                 $i = 0;
1689
1690                 foreach( $blocks as $block ) {
1691                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1692                             # This is the [edit] link that appears for the top block of text when
1693                                 # section editing is enabled
1694
1695                                 # Disabled because it broke block formatting
1696                                 # For example, a bullet point in the top line
1697                                 # $full .= $sk->editSectionLink(0);
1698                         }
1699                         $full .= $block;
1700                         if( $doShowToc && !$i) {
1701                         # Top anchor now in skin
1702                                 $full = $full.$toc;
1703                         }
1704
1705                         if( !empty( $head[$i] ) ) {
1706                                 $full .= $head[$i];
1707                         }
1708                         $i++;
1709                 }
1710
1711                 return $full;
1712         }
1713
1714         /* private */ function doMagicISBN( &$tokenizer )
1715         {
1716                 global $wgLang;
1717
1718                 # Check whether next token is a text token
1719                 # If yes, fetch it and convert the text into a
1720                 # Special::BookSources link
1721                 $token = $tokenizer->previewToken();
1722                 while ( $token["type"] == "" )
1723                 {
1724                         $tokenizer->nextToken();
1725                         $token = $tokenizer->previewToken();
1726                 }
1727                 if ( $token["type"] == "text" )
1728                 {
1729                         $token = $tokenizer->nextToken();
1730                         $x = $token["text"];
1731                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1732
1733                         $isbn = $blank = "" ;
1734                         while ( " " == $x{0} ) {
1735                                 $blank .= " ";
1736                                 $x = substr( $x, 1 );
1737                         }
1738                         while ( strstr( $valid, $x{0} ) != false ) {
1739                                 $isbn .= $x{0};
1740                                 $x = substr( $x, 1 );
1741                         }
1742                         $num = str_replace( "-", "", $isbn );
1743                         $num = str_replace( " ", "", $num );
1744
1745                         if ( "" == $num ) {
1746                                 $text = "ISBN $blank$x";
1747                         } else {
1748                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1749                                 $text = "<a href=\"" .
1750                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1751                                         "\" class=\"internal\">ISBN $isbn</a>";
1752                                 $text .= $x;
1753                         }
1754                 } else {
1755                         $text = "ISBN ";
1756                 }
1757                 return $text;
1758         }
1759         /* private */ function doMagicRFC( &$tokenizer )
1760         {
1761                 global $wgLang;
1762
1763                 # Check whether next token is a text token
1764                 # If yes, fetch it and convert the text into a
1765                 # link to an RFC source
1766                 $token = $tokenizer->previewToken();
1767                 while ( $token["type"] == "" )
1768                 {
1769                         $tokenizer->nextToken();
1770                         $token = $tokenizer->previewToken();
1771                 }
1772                 if ( $token["type"] == "text" )
1773                 {
1774                         $token = $tokenizer->nextToken();
1775                         $x = $token["text"];
1776                         $valid = "0123456789";
1777
1778                         $rfc = $blank = "" ;
1779                         while ( " " == $x{0} ) {
1780                                 $blank .= " ";
1781                                 $x = substr( $x, 1 );
1782                         }
1783                         while ( strstr( $valid, $x{0} ) != false ) {
1784                                 $rfc .= $x{0};
1785                                 $x = substr( $x, 1 );
1786                         }
1787
1788                         if ( "" == $rfc ) {
1789                                 $text .= "RFC $blank$x";
1790                         } else {
1791                                 $url = wfmsg( "rfcurl" );
1792                                 $url = str_replace( "$1", $rfc, $url);
1793                                 $sk =& $this->mOptions->getSkin();
1794                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1795                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1796                         }
1797                 } else {
1798                         $text = "RFC ";
1799                 }
1800                 return $text;
1801         }
1802
1803         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1804         {
1805                 $this->mOptions = $options;
1806                 $this->mTitle =& $title;
1807                 $this->mOutputType = OT_WIKI;
1808
1809                 if ( $clearState ) {
1810                         $this->clearState();
1811                 }
1812
1813                 $stripState = false;
1814                 $pairs = array(
1815                         "\r\n" => "\n",
1816                         );
1817                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1818                 // now with regexes
1819                 $pairs = array(
1820                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1821                         "/<br *?>/i" => "<br/>",
1822                 );
1823                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1824                 $text = $this->strip( $text, $stripState, false );
1825                 $text = $this->pstPass2( $text, $user );
1826                 $text = $this->unstrip( $text, $stripState );
1827                 return $text;
1828         }
1829
1830         /* private */ function pstPass2( $text, &$user )
1831         {
1832                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1833
1834                 # Variable replacement
1835                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1836                 $text = $this->replaceVariables( $text );
1837
1838                 # Signatures
1839                 #
1840                 $n = $user->getName();
1841                 $k = $user->getOption( "nickname" );
1842                 if ( "" == $k ) { $k = $n; }
1843                 if(isset($wgLocaltimezone)) {
1844                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1845                 }
1846                 /* Note: this is an ugly timezone hack for the European wikis */
1847                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1848                   " (" . date( "T" ) . ")";
1849                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1850
1851                 $text = preg_replace( "/~~~~~/", $d, $text );
1852                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1853                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1854                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1855                   Namespace::getUser() ) . ":$n|$k]]", $text );
1856
1857                 # Context links: [[|name]] and [[name (context)|]]
1858                 #
1859                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1860                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1861                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1862                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1863
1864                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1865                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1866                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1867                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1868                                                                                                                 # [[ns:page (cont)|]]
1869                 $context = "";
1870                 $t = $this->mTitle->getText();
1871                 if ( preg_match( $conpat, $t, $m ) ) {
1872                         $context = $m[2];
1873                 }
1874                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1875                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1876                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1877
1878                 if ( "" == $context ) {
1879                         $text = preg_replace( $p2, "[[\\1]]", $text );
1880                 } else {
1881                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1882                 }
1883
1884                 /*
1885                 $mw =& MagicWord::get( MAG_SUBST );
1886                 $wgCurParser = $this->fork();
1887                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1888                 $this->merge( $wgCurParser );
1889                 */
1890
1891                 # Trim trailing whitespace
1892                 # MAG_END (__END__) tag allows for trailing
1893                 # whitespace to be deliberately included
1894                 $text = rtrim( $text );
1895                 $mw =& MagicWord::get( MAG_END );
1896                 $mw->matchAndRemove( $text );
1897
1898                 return $text;
1899         }
1900
1901         # Set up some variables which are usually set up in parse()
1902         # so that an external function can call some class members with confidence
1903         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1904         {
1905                 $this->mTitle =& $title;
1906                 $this->mOptions = $options;
1907                 $this->mOutputType = $outputType;
1908                 if ( $clearState ) {
1909                         $this->clearState();
1910                 }
1911         }
1912
1913         function transformMsg( $text, $options ) {
1914                 global $wgTitle;
1915                 static $executing = false;
1916
1917                 # Guard against infinite recursion
1918                 if ( $executing ) {
1919                         return $text;
1920                 }
1921                 $executing = true;
1922
1923                 $this->mTitle = $wgTitle;
1924                 $this->mOptions = $options;
1925                 $this->mOutputType = OT_MSG;
1926                 $this->clearState();
1927                 $text = $this->replaceVariables( $text );
1928
1929                 $executing = false;
1930                 return $text;
1931         }
1932 }
1933
1934 class ParserOutput
1935 {
1936         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1937
1938         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1939                 $containsOldMagic = false )
1940         {
1941                 $this->mText = $text;
1942                 $this->mLanguageLinks = $languageLinks;
1943                 $this->mCategoryLinks = $categoryLinks;
1944                 $this->mContainsOldMagic = $containsOldMagic;
1945         }
1946
1947         function getText() { return $this->mText; }
1948         function getLanguageLinks() { return $this->mLanguageLinks; }
1949         function getCategoryLinks() { return $this->mCategoryLinks; }
1950         function containsOldMagic() { return $this->mContainsOldMagic; }
1951         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1952         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1953         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1954         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1955
1956         function merge( $other ) {
1957                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1958                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1959                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1960         }
1961
1962 }
1963
1964 class ParserOptions
1965 {
1966         # All variables are private
1967         var $mUseTeX;                    # Use texvc to expand <math> tags
1968         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1969         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1970         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1971         var $mAllowExternalImages;       # Allow external images inline
1972         var $mSkin;                      # Reference to the preferred skin
1973         var $mDateFormat;                # Date format index
1974         var $mEditSection;               # Create "edit section" links
1975         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1976         var $mNumberHeadings;            # Automatically number headings
1977         var $mShowToc;                   # Show table of contents
1978
1979         function getUseTeX() { return $this->mUseTeX; }
1980         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1981         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1982         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1983         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1984         function getSkin() { return $this->mSkin; }
1985         function getDateFormat() { return $this->mDateFormat; }
1986         function getEditSection() { return $this->mEditSection; }
1987         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1988         function getNumberHeadings() { return $this->mNumberHeadings; }
1989         function getShowToc() { return $this->mShowToc; }
1990
1991         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1992         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1993         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1994         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1995         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1996         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1997         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1998         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1999         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2000         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2001         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2002
2003         /* static */ function newFromUser( &$user )
2004         {
2005                 $popts = new ParserOptions;
2006                 $popts->initialiseFromUser( $user );
2007                 return $popts;
2008         }
2009
2010         function initialiseFromUser( &$userInput )
2011         {
2012                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2013
2014                 if ( !$userInput ) {
2015                         $user = new User;
2016                         $user->setLoaded( true );
2017                 } else {
2018                         $user =& $userInput;
2019                 }
2020
2021                 $this->mUseTeX = $wgUseTeX;
2022                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2023                 $this->mUseDynamicDates = $wgUseDynamicDates;
2024                 $this->mInterwikiMagic = $wgInterwikiMagic;
2025                 $this->mAllowExternalImages = $wgAllowExternalImages;
2026                 $this->mSkin =& $user->getSkin();
2027                 $this->mDateFormat = $user->getOption( "date" );
2028                 $this->mEditSection = $user->getOption( "editsection" );
2029                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2030                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2031                 $this->mShowToc = $user->getOption( "showtoc" );
2032         }
2033
2034
2035 }
2036
2037 # Regex callbacks, used in Parser::replaceVariables
2038 function wfBraceSubstitution( $matches )
2039 {
2040         global $wgCurParser;
2041         return $wgCurParser->braceSubstitution( $matches );
2042 }
2043
2044 ?>