includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         function categoryMagic ()
 260         {
 261                 global $wgLang , $wgUser ;
 262                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 263                 $id = $this->mTitle->getArticleID() ;
 264                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 265                 $ti = $this->mTitle->getText() ;
 266                 $ti = explode ( ":" , $ti , 2 ) ;
 267                 if ( $cat != $ti[0] ) return "" ;
 268                 $r = '<br style="clear:both;"/>\n';
 269
 270                 $articles = array() ;
 271                 $parents = array () ;
 272                 $children = array() ;
 273
 274
 275 #               $sk =& $this->mGetSkin();
 276                 $sk =& $wgUser->getSkin() ;
 277
 278                 $data = array () ;
 279                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 280                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 281
 282                 $res = wfQuery ( $sql1, DB_READ ) ;
 283                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 284
 285                 $res = wfQuery ( $sql2, DB_READ ) ;
 286                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 287
 288
 289                 foreach ( $data AS $x )
 290                 {
 291                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 292                         if ( $t != "" ) $t .= ":" ;
 293                         $t .= $x->cur_title ;
 294
 295                         $y = explode ( ":" , $t , 2 ) ;
 296                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 297                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 298                         } else {
 299                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 300                         }
 301                 }
 302                 wfFreeResult ( $res ) ;
 303
 304                 # Children
 305                 if ( count ( $children ) > 0 )
 306                 {
 307                         asort ( $children ) ;
 308                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 309                         $r .= implode ( ", " , $children ) ;
 310                 }
 311
 312                 # Articles
 313                 if ( count ( $articles ) > 0 )
 314                 {
 315                         asort ( $articles ) ;
 316                         $h =  wfMsg( "category_header", $ti[1] );
 317                         $r .= "<h2>{$h}</h2>\n" ;
 318                         $r .= implode ( ", " , $articles ) ;
 319                 }
 320
 321
 322                 return $r ;
 323         }
 324
 325         function getHTMLattrs ()
 326         {
 327                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 328                                 "title", "align", "lang", "dir", "width", "height",
 329                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 330                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 331                                 /* FONT */ "type", "start", "value", "compact",
 332                                 /* For various lists, mostly deprecated but safe */
 333                                 "summary", "width", "border", "frame", "rules",
 334                                 "cellspacing", "cellpadding", "valign", "char",
 335                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 336                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 337                                 "id", "class", "name", "style" /* For CSS */
 338                                 );
 339                 return $htmlattrs ;
 340         }
 341
 342         function fixTagAttributes ( $t )
 343         {
 344                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 345                 $htmlattrs = $this->getHTMLattrs() ;
 346
 347                 # Strip non-approved attributes from the tag
 348                 $t = preg_replace(
 349                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 350                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 351                         $t);
 352                 # Strip javascript "expression" from stylesheets. Brute force approach:
 353                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 354
 355                 if( preg_match(
 356                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 357                         wfMungeToUtf8( $t ) ) )
 358                 {
 359                         $t="";
 360                 }
 361
 362                 return trim ( $t ) ;
 363         }
 364
 365         function doTableStuff ( $t )
 366         {
 367                 $t = explode ( "\n" , $t ) ;
 368                 $td = array () ; # Is currently a td tag open?
 369                         $ltd = array () ; # Was it TD or TH?
 370                         $tr = array () ; # Is currently a tr tag open?
 371                         $ltr = array () ; # tr attributes
 372                         foreach ( $t AS $k => $x )
 373                         {
 374                                 $x = rtrim ( $x ) ;
 375                                 $fc = substr ( $x , 0 , 1 ) ;
 376                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 377                                 {
 378                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 379                                         array_push ( $td , false ) ;
 380                                         array_push ( $ltd , "" ) ;
 381                                         array_push ( $tr , false ) ;
 382                                         array_push ( $ltr , "" ) ;
 383                                 }
 384                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 385                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 386                                 {
 387                                         $z = "</table>\n" ;
 388                                         $l = array_pop ( $ltd ) ;
 389                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 390                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 391                                         array_pop ( $ltr ) ;
 392                                         $t[$k] = $z ;
 393                                 }
 394                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 395                                                 {
 396                                                 $z = trim ( substr ( $x , 2 ) ) ;
 397                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 398                                                 }*/
 399                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 400                                 {
 401                                         $x = substr ( $x , 1 ) ;
 402                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 403                                         $z = "" ;
 404                                         $l = array_pop ( $ltd ) ;
 405                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 406                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 407                                         array_pop ( $ltr ) ;
 408                                         $t[$k] = $z ;
 409                                         array_push ( $tr , false ) ;
 410                                         array_push ( $td , false ) ;
 411                                         array_push ( $ltd , "" ) ;
 412                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 413                                 }
 414                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 415                                 {
 416                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 417                                         {
 418                                                 $fc = "+" ;
 419                                                 $x = substr ( $x , 1 ) ;
 420                                         }
 421                                         $after = substr ( $x , 1 ) ;
 422                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 423                                         $after = explode ( "||" , $after ) ;
 424                                         $t[$k] = "" ;
 425                                         foreach ( $after AS $theline )
 426                                         {
 427                                                 $z = "" ;
 428                                                 if ( $fc != "+" )
 429                                                 {
 430                                                         $tra = array_pop ( $ltr ) ;
 431                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 432                                                         array_push ( $tr , true ) ;
 433                                                         array_push ( $ltr , "" ) ;
 434                                                 }
 435
 436                                                 $l = array_pop ( $ltd ) ;
 437                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 438                                                 if ( $fc == "|" ) $l = "td" ;
 439                                                 else if ( $fc == "!" ) $l = "th" ;
 440                                                 else if ( $fc == "+" ) $l = "caption" ;
 441                                                 else $l = "" ;
 442                                                 array_push ( $ltd , $l ) ;
 443                                                 $y = explode ( "|" , $theline , 2 ) ;
 444                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 445                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 446                                                 $t[$k] .= $y ;
 447                                                 array_push ( $td , true ) ;
 448                                         }
 449                                 }
 450                         }
 451
 452                 # Closing open td, tr && table
 453                 while ( count ( $td ) > 0 )
 454                 {
 455                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 456                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 457                         $t[] = "</table>" ;
 458                 }
 459
 460                 $t = implode ( "\n" , $t ) ;
 461                 #               $t = $this->removeHTMLtags( $t );
 462                 return $t ;
 463         }
 464
 465         function internalParse( $text, $linestart, $args = array() )
 466         {
 467                 $fname = "Parser::internalParse";
 468                 wfProfileIn( $fname );
 469
 470                 $text = $this->removeHTMLtags( $text );
 471                 $text = $this->replaceVariables( $text, $args );
 472
 473                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 474
 475                 $text = $this->doHeadings( $text );
 476                 if($this->mOptions->getUseDynamicDates()) {
 477                         global $wgDateFormatter;
 478                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 479                 }
 480                 $text = $this->replaceExternalLinks( $text );
 481                 $text = $this->doTokenizedParser ( $text );
 482                 $text = $this->doTableStuff ( $text ) ;
 483                 $text = $this->formatHeadings( $text );
 484                 $sk =& $this->mOptions->getSkin();
 485                 $text = $sk->transformContent( $text );
 486
 487                 $text .= $this->categoryMagic () ;
 488
 489                 wfProfileOut( $fname );
 490                 return $text;
 491         }
 492
 493
 494         /* private */ function doHeadings( $text )
 495         {
 496                 for ( $i = 6; $i >= 1; --$i ) {
 497                         $h = substr( "======", 0, $i );
 498                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 499                           "<h{$i}>\\1</h{$i}>\\2", $text );
 500                 }
 501                 return $text;
 502         }
 503
 504         # Note: we have to do external links before the internal ones,
 505         # and otherwise take great care in the order of things here, so
 506         # that we don't end up interpreting some URLs twice.
 507
 508         /* private */ function replaceExternalLinks( $text )
 509         {
 510                 $fname = "Parser::replaceExternalLinks";
 511                 wfProfileIn( $fname );
 512                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 513                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 514                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 515                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 516                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 517                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 518                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 519                 wfProfileOut( $fname );
 520                 return $text;
 521         }
 522
 523         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 524         {
 525                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 526                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 527
 528                 # this is  the list of separators that should be ignored if they
 529                 # are the last character of an URL but that should be included
 530                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 531                 # in this case, the last comma should not become part of the URL,
 532                 # but in "www.foo.com/123,2342,32.htm" it should.
 533                 $sep = ",;\.:";
 534                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 535                 $images = "gif|png|jpg|jpeg";
 536
 537                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 538                 # they are interpreted as part of the string (used to tell PHP
 539                 # that the content of the string should be inserted there).
 540                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 541                   "((?i){$images})([^{$uc}]|$)/";
 542
 543                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 544                 $sk =& $this->mOptions->getSkin();
 545
 546                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 547                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 548                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 549                 }
 550                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 551                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 552                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 553                   "</a>\\5", $s );
 554                 $s = str_replace( $unique, $protocol, $s );
 555
 556                 $a = explode( "[{$protocol}:", " " . $s );
 557                 $s = array_shift( $a );
 558                 $s = substr( $s, 1 );
 559
 560                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 561                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 562
 563                 foreach ( $a as $line ) {
 564                         if ( preg_match( $e1, $line, $m ) ) {
 565                                 $link = "{$protocol}:{$m[1]}";
 566                                 $trail = $m[2];
 567                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 568                                 else { $text = wfEscapeHTML( $link ); }
 569                         } else if ( preg_match( $e2, $line, $m ) ) {
 570                                 $link = "{$protocol}:{$m[1]}";
 571                                 $text = $m[2];
 572                                 $trail = $m[3];
 573                         } else {
 574                                 $s .= "[{$protocol}:" . $line;
 575                                 continue;
 576                         }
 577                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 578                                 $paren = "";
 579                         } else {
 580                                 # Expand the URL for printable version
 581                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 582                         }
 583                         $la = $sk->getExternalLinkAttributes( $link, $text );
 584                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 585
 586                 }
 587                 return $s;
 588         }
 589
 590         /* private */ function handle3Quotes( &$state, $token )
 591         {
 592                 if ( $state["strong"] !== false ) {
 593                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 594                         {
 595                                 # ''' lala ''lala '''
 596                                 $s = "</em></strong><em>";
 597                         } else {
 598                                 $s = "</strong>";
 599                         }
 600                         $state["strong"] = FALSE;
 601                 } else {
 602                         $s = "<strong>";
 603                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 604                 }
 605                 return $s;
 606         }
 607
 608         /* private */ function handle2Quotes( &$state, $token )
 609         {
 610                 if ( $state["em"] !== false ) {
 611                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 612                         {
 613                                 # ''lala'''lala'' ....'''
 614                                 $s = "</strong></em><strong>";
 615                         } else {
 616                                 $s = "</em>";
 617                         }
 618                         $state["em"] = FALSE;
 619                 } else {
 620                         $s = "<em>";
 621                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 622
 623                 }
 624                 return $s;
 625         }
 626
 627         /* private */ function handle5Quotes( &$state, $token )
 628         {
 629                 $s = "";
 630                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 631                         if ( $state["em"] < $state["strong"] ) {
 632                                 $s .= "</strong></em>";
 633                         } else {
 634                                 $s .= "</em></strong>";
 635                         }
 636                         $state["strong"] = $state["em"] = FALSE;
 637                 } elseif ( $state["em"] !== false ) {
 638                         $s .= "</em><strong>";
 639                         $state["em"] = FALSE;
 640                         $state["strong"] = $token["pos"];
 641                 } elseif ( $state["strong"] !== false ) {
 642                         $s .= "</strong><em>";
 643                         $state["strong"] = FALSE;
 644                         $state["em"] = $token["pos"];
 645                 } else { # not $em and not $strong
 646                         $s .= "<strong><em>";
 647                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 648                 }
 649                 return $s;
 650         }
 651
 652         /* private */ function doTokenizedParser( $str )
 653         {
 654                 global $wgLang; # for language specific parser hook
 655
 656                 $tokenizer=Tokenizer::newFromString( $str );
 657                 $tokenStack = array();
 658
 659                 $s="";
 660                 $state["em"]      = FALSE;
 661                 $state["strong"]  = FALSE;
 662                 $tagIsOpen = FALSE;
 663                 $threeopen = false;
 664
 665                 # The tokenizer splits the text into tokens and returns them one by one.
 666                 # Every call to the tokenizer returns a new token.
 667                 while ( $token = $tokenizer->nextToken() )
 668                 {
 669                         switch ( $token["type"] )
 670                         {
 671                                 case "text":
 672                                         # simple text with no further markup
 673                                         $txt = $token["text"];
 674                                         break;
 675                                 case "[[[":
 676                                         # remember the tag opened with 3 [
 677                                         $threeopen = true;
 678                                 case "[[":
 679                                         # link opening tag.
 680                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 681                                         $tagIsOpen = TRUE;
 682                                         array_push( $tokenStack, $token );
 683                                         $txt="";
 684                                         break;
 685
 686                                 case "]]]":
 687                                 case "]]":
 688                                         # link close tag.
 689                                         # get text from stack, glue it together, and call the code to handle a
 690                                         # link
 691
 692                                         if ( count( $tokenStack ) == 0 )
 693                                         {
 694                                                 # stack empty. Found a ]] without an opening [[
 695                                                 $txt = "]]";
 696                                         } else {
 697                                                 $linkText = "";
 698                                                 $lastToken = array_pop( $tokenStack );
 699                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 700                                                 {
 701                                                         if( !empty( $lastToken["text"] ) ) {
 702                                                                 $linkText = $lastToken["text"] . $linkText;
 703                                                         }
 704                                                         $lastToken = array_pop( $tokenStack );
 705                                                 }
 706
 707                                                 $txt = $linkText ."]]";
 708
 709                                                 if( isset( $lastToken["text"] ) ) {
 710                                                         $prefix = $lastToken["text"];
 711                                                 } else {
 712                                                         $prefix = "";
 713                                                 }
 714                                                 $nextToken = $tokenizer->previewToken();
 715                                                 if ( $nextToken["type"] == "text" )
 716                                                 {
 717                                                         # Preview just looks at it. Now we have to fetch it.
 718                                                         $nextToken = $tokenizer->nextToken();
 719                                                         $txt .= $nextToken["text"];
 720                                                 }
 721                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 722
 723                                                 # did the tag start with 3 [ ?
 724                                                 if($threeopen) {
 725                                                         # show the first as text
 726                                                         $txt = "[".$txt;
 727                                                         $threeopen=false;
 728                                                 }
 729
 730                                         }
 731                                         $tagIsOpen = (count( $tokenStack ) != 0);
 732                                         break;
 733                                 case "----":
 734                                         $txt = "\n<hr />\n";
 735                                         break;
 736                                 case "'''":
 737                                         # This and the three next ones handle quotes
 738                                         $txt = $this->handle3Quotes( $state, $token );
 739                                         break;
 740                                 case "''":
 741                                         $txt = $this->handle2Quotes( $state, $token );
 742                                         break;
 743                                 case "'''''":
 744                                         $txt = $this->handle5Quotes( $state, $token );
 745                                         break;
 746                                 case "":
 747                                         # empty token
 748                                         $txt="";
 749                                         break;
 750                                 case "RFC ":
 751                                         if ( $tagIsOpen ) {
 752                                                 $txt = "RFC ";
 753                                         } else {
 754                                                 $txt = $this->doMagicRFC( $tokenizer );
 755                                         }
 756                                         break;
 757                                 case "ISBN ":
 758                                         if ( $tagIsOpen ) {
 759                                                 $txt = "ISBN ";
 760                                         } else {
 761                                                 $txt = $this->doMagicISBN( $tokenizer );
 762                                         }
 763                                         break;
 764                                 default:
 765                                         # Call language specific Hook.
 766                                         $txt = $wgLang->processToken( $token, $tokenStack );
 767                                         if ( NULL == $txt ) {
 768                                                 # An unkown token. Highlight.
 769                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 770                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 771                                         }
 772                                         break;
 773                         }
 774                         # If we're parsing the interior of a link, don't append the interior to $s,
 775                         # but push it to the stack so it can be processed when a ]] token is found.
 776                         if ( $tagIsOpen  && $txt != "" ) {
 777                                 $token["type"] = "text";
 778                                 $token["text"] = $txt;
 779                                 array_push( $tokenStack, $token );
 780                         } else {
 781                                 $s .= $txt;
 782                         }
 783                 } #end while
 784                 if ( count( $tokenStack ) != 0 )
 785                 {
 786                         # still objects on stack. opened [[ tag without closing ]] tag.
 787                         $txt = "";
 788                         while ( $lastToken = array_pop( $tokenStack ) )
 789                         {
 790                                 if ( $lastToken["type"] == "text" )
 791                                 {
 792                                         $txt = $lastToken["text"] . $txt;
 793                                 } else {
 794                                         $txt = $lastToken["type"] . $txt;
 795                                 }
 796                         }
 797                         $s .= $txt;
 798                 }
 799                 return $s;
 800         }
 801
 802         /* private */ function handleInternalLink( $line, $prefix )
 803         {
 804                 global $wgLang, $wgLinkCache;
 805                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 806                 static $fname = "Parser::handleInternalLink" ;
 807                 wfProfileIn( $fname );
 808
 809                 wfProfileIn( "$fname-setup" );
 810                 static $tc = FALSE;
 811                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 812                 $sk =& $this->mOptions->getSkin();
 813
 814                 # Match a link having the form [[namespace:link|alternate]]trail
 815                 static $e1 = FALSE;
 816                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 817                 # Match the end of a line for a word that's not followed by whitespace,
 818                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 819                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 820                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 821                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 822
 823
 824                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 825                 static $image = FALSE;
 826                 static $special = FALSE;
 827                 static $media = FALSE;
 828                 static $category = FALSE;
 829                 if ( !$image ) { $image = Namespace::getImage(); }
 830                 if ( !$special ) { $special = Namespace::getSpecial(); }
 831                 if ( !$media ) { $media = Namespace::getMedia(); }
 832                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 833
 834                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 835
 836                 wfProfileOut( "$fname-setup" );
 837                 $s = "";
 838
 839                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 840                         $text = $m[2];
 841                         $trail = $m[3];
 842                 } else { # Invalid form; output directly
 843                         $s .= $prefix . "[[" . $line ;
 844                         return $s;
 845                 }
 846
 847                 /* Valid link forms:
 848                 Foobar -- normal
 849                 :Foobar -- override special treatment of prefix (images, language links)
 850                 /Foobar -- convert to CurrentPage/Foobar
 851                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 852                 */
 853                 $c = substr($m[1],0,1);
 854                 $noforce = ($c != ":");
 855                 if( $c == "/" ) { # subpage
 856                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 857                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 858                                 $noslash=$m[1];
 859                         } else {
 860                                 $noslash=substr($m[1],1);
 861                         }
 862                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 863                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 864                                 if( "" == $text ) {
 865                                         $text= $m[1];
 866                                 } # this might be changed for ugliness reasons
 867                         } else {
 868                                 $link = $noslash; # no subpage allowed, use standard link
 869                         }
 870                 } elseif( $noforce ) { # no subpage
 871                         $link = $m[1];
 872                 } else {
 873                         $link = substr( $m[1], 1 );
 874                 }
 875                 if( "" == $text )
 876                         $text = $link;
 877
 878                 $nt = Title::newFromText( $link );
 879                 if( !$nt ) {
 880                         $s .= $prefix . "[[" . $line;
 881                         return $s;
 882                 }
 883                 $ns = $nt->getNamespace();
 884                 $iw = $nt->getInterWiki();
 885                 if( $noforce ) {
 886                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 887                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 888                                 return (trim($s) == '')? '': $s;
 889                         }
 890                         if( $ns == $image ) {
 891                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 892                                 $wgLinkCache->addImageLinkObj( $nt );
 893                                 return $s;
 894                         }
 895                 }
 896                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 897                     ( strpos( $link, "#" ) == FALSE ) ) {
 898                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 899                         return $s;
 900                 }
 901
 902                 # Category feature
 903                 $catns = strtoupper ( $nt->getDBkey () ) ;
 904                 $catns = explode ( ":" , $catns ) ;
 905                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 906                 else $catns = "" ;
 907                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 908                         $t = explode ( ":" , $nt->getText() ) ;
 909                         array_shift ( $t ) ;
 910                         $t = implode ( ":" , $t ) ;
 911                         $t = $wgLang->ucFirst ( $t ) ;
 912                         $nnt = Title::newFromText ( $category.":".$t ) ;
 913                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 914                         $this->mOutput->mCategoryLinks[] = $t ;
 915                         $s .= $prefix . $trail ;
 916                         return $s ;
 917                 }
 918
 919                 if( $ns == $media ) {
 920                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 921                         $wgLinkCache->addImageLinkObj( $nt );
 922                         return $s;
 923                 } elseif( $ns == $special ) {
 924                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 925                         return $s;
 926                 }
 927                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 928
 929                 wfProfileOut( $fname );
 930                 return $s;
 931         }
 932
 933         # Some functions here used by doBlockLevels()
 934         #
 935         /* private */ function closeParagraph()
 936         {
 937                 $result = "";
 938                 if ( '' != $this->mLastSection ) {
 939                         $result = "</" . $this->mLastSection  . ">\n";
 940                 }
 941                 $this->mInPre = false;
 942                 $this->mLastSection = "";
 943                 return $result;
 944         }
 945         # getCommon() returns the length of the longest common substring
 946         # of both arguments, starting at the beginning of both.
 947         #
 948         /* private */ function getCommon( $st1, $st2 )
 949         {
 950                 $fl = strlen( $st1 );
 951                 $shorter = strlen( $st2 );
 952                 if ( $fl < $shorter ) { $shorter = $fl; }
 953
 954                 for ( $i = 0; $i < $shorter; ++$i ) {
 955                         if ( $st1{$i} != $st2{$i} ) { break; }
 956                 }
 957                 return $i;
 958         }
 959         # These next three functions open, continue, and close the list
 960         # element appropriate to the prefix character passed into them.
 961         #
 962         /* private */ function openList( $char )
 963     {
 964                 $result = $this->closeParagraph();
 965
 966                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 967                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 968                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 969                 else if ( ";" == $char ) {
 970                         $result .= "<dl><dt>";
 971                         $this->mDTopen = true;
 972                 }
 973                 else { $result = "<!-- ERR 1 -->"; }
 974
 975                 return $result;
 976         }
 977
 978         /* private */ function nextItem( $char )
 979         {
 980                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 981                 else if ( ":" == $char || ";" == $char ) {
 982                         $close = "</dd>";
 983                         if ( $this->mDTopen ) { $close = "</dt>"; }
 984                         if ( ";" == $char ) {
 985                                 $this->mDTopen = true;
 986                                 return $close . "<dt>";
 987                         } else {
 988                                 $this->mDTopen = false;
 989                                 return $close . "<dd>";
 990                         }
 991                 }
 992                 return "<!-- ERR 2 -->";
 993         }
 994
 995         /* private */function closeList( $char )
 996         {
 997                 if ( "*" == $char ) { $text = "</li></ul>"; }
 998                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 999                 else if ( ":" == $char ) {
1000                         if ( $this->mDTopen ) {
1001                                 $this->mDTopen = false;
1002                                 $text = "</dt></dl>";
1003                         } else {
1004                                 $text = "</dd></dl>";
1005                         }
1006                 }
1007                 else {  return "<!-- ERR 3 -->"; }
1008                 return $text."\n";
1009         }
1010
1011         /* private */ function doBlockLevels( $text, $linestart )
1012         {
1013                 $fname = "Parser::doBlockLevels";
1014                 wfProfileIn( $fname );
1015                 # Parsing through the text line by line.  The main thing
1016                 # happening here is handling of block-level elements p, pre,
1017                 # and making lists from lines starting with * # : etc.
1018                 #
1019                 $a = explode( "\n", $text );
1020
1021                 $lastPref = $text = $lastLine = '';
1022                 $this->mDTopen = $inBlockElem = false;
1023                 $npl = 0;
1024                 $pstack = false;
1025
1026                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1027                 foreach ( $a as $t ) {
1028                         $oLine = $t;
1029                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1030                         $preOpenMatch = preg_match("/<pre/i", $t );
1031                         if (!$this->mInPre) {
1032                                 $this->mInPre = ($preOpenMatch)? true : false;
1033                         }
1034                         if ( !$this->mInPre ) {
1035                                 $opl = strlen( $lastPref );
1036                                 $npl = strspn( $t, "*#:;" );
1037                                 $pref = substr( $t, 0, $npl );
1038                                 $pref2 = str_replace( ";", ":", $pref );
1039                                 $t = substr( $t, $npl );
1040                                 // list generation
1041                                 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1042                                         $text .= $this->nextItem( substr( $pref, -1 ) );
1043                                         if ( $pstack ) { $pstack = false; }
1044
1045                                         if ( ";" == substr( $pref, -1 ) ) {
1046                                                 $cpos = strpos( $t, ":" );
1047                                                 if ( false !== $cpos ) {
1048                                                         $term = substr( $t, 0, $cpos );
1049                                                         $text .= $term . $this->nextItem( ":" );
1050                                                         $t = substr( $t, $cpos + 1 );
1051                                                 }
1052                                         }
1053                                 } else if (0 != $npl || 0 != $opl) {
1054                                         $cpl = $this->getCommon( $pref, $lastPref );
1055                                         if ( $pstack ) { $pstack = false; }
1056
1057                                         while ( $cpl < $opl ) {
1058                                                 $text .= $this->closeList( $lastPref{$opl-1} );
1059                                                 --$opl;
1060                                         }
1061                                         if ( $npl <= $cpl && $cpl > 0 ) {
1062                                                 $text .= $this->nextItem( $pref{$cpl-1} );
1063                                         }
1064                                         while ( $npl > $cpl ) {
1065                                                 $char = substr( $pref, $cpl, 1 );
1066                                                 $text .= $this->openList( $char );
1067
1068                                                 if ( ";" == $char ) {
1069                                                         $cpos = strpos( $t, ":" );
1070                                                         if ( ! ( false === $cpos ) ) {
1071                                                                 $term = substr( $t, 0, $cpos );
1072                                                                 $text .= $term . $this->nextItem( ":" );
1073                                                                 $t = substr( $t, $cpos + 1 );
1074                                                         }
1075                                                 }
1076                                                 ++$cpl;
1077                                         }
1078                                         $lastPref = $pref2;
1079                                 }
1080                         }
1081                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1082                                 $uniq_prefix = UNIQ_PREFIX;
1083                                 // XXX: use a stack for nestable elements like span, table and div
1084                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1085                                 $closematch = preg_match(
1086                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1087                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1088                                 if ( $openmatch or $closematch ) {
1089                                         if ( $pstack ) { $pstack = false; }
1090                                         $text .= $this->closeParagraph();
1091                                         if($preOpenMatch and !$preCloseMatch) {
1092                                                 $this->mInPre = true;
1093                                         }
1094                                         if ( $closematch  ) {
1095                                                 $inBlockElem = false;
1096                                         } else {
1097                                                 $inBlockElem = true;
1098                                         }
1099                                 } else if ( !$inBlockElem ) {
1100                                         if ( " " == $t{0} ) {
1101                                                 // pre
1102                                                 if ($this->mLastSection != 'pre') {
1103                                                         $pstack = false;
1104                                                         $text .= $this->closeParagraph().'<pre>';
1105                                                         $this->mLastSection = 'pre';
1106                                                 }
1107                                         } else {
1108                                                 // paragraph
1109                                                 if ( '' == trim($t) ) {
1110                                                         if ( $pstack ) {
1111                                                                 $text .= $pstack.'<br/>';
1112                                                                 $pstack = false;
1113                                                                 $this->mLastSection = 'p';
1114                                                         } else {
1115                                                                 if ($this->mLastSection != 'p' ) {
1116                                                                         $text .= $this->closeParagraph();
1117                                                                         $this->mLastSection = '';
1118                                                                         $pstack = "<p>";
1119                                                                 } else {
1120                                                                         $pstack = '</p><p>';
1121                                                                 }
1122                                                         }
1123                                                 } else {
1124                                                         if ( $pstack ) {
1125                                                                 $text .= $pstack;
1126                                                                 $pstack = false;
1127                                                                 $this->mLastSection = 'p';
1128                                                         } else if ($this->mLastSection != 'p') {
1129                                                                 $text .= $this->closeParagraph().'<p>';
1130                                                                 $this->mLastSection = 'p';
1131                                                         }
1132                                                 }
1133                                         }
1134                                 }
1135                         }
1136                         if ($pstack === false) {
1137                                 $text .= $t."\n";
1138                         }
1139                 }
1140                 while ( $npl ) {
1141                         $text .= $this->closeList( $pref2{$npl-1} );
1142                         --$npl;
1143                 }
1144                 if ( "" != $this->mLastSection ) {
1145                         $text .= "</" . $this->mLastSection . ">";
1146                         $this->mLastSection = "";
1147                 }
1148
1149                 wfProfileOut( $fname );
1150                 return $text;
1151         }
1152
1153         function getVariableValue( $index ) {
1154                 global $wgLang, $wgSitename, $wgServer;
1155
1156                 switch ( $index ) {
1157                         case MAG_CURRENTMONTH:
1158                                 return date( "m" );
1159                         case MAG_CURRENTMONTHNAME:
1160                                 return $wgLang->getMonthName( date("n") );
1161                         case MAG_CURRENTMONTHNAMEGEN:
1162                                 return $wgLang->getMonthNameGen( date("n") );
1163                         case MAG_CURRENTDAY:
1164                                 return date("j");
1165                         case MAG_CURRENTDAYNAME:
1166                                 return $wgLang->getWeekdayName( date("w")+1 );
1167                         case MAG_CURRENTYEAR:
1168                                 return date( "Y" );
1169                         case MAG_CURRENTTIME:
1170                                 return $wgLang->time( wfTimestampNow(), false );
1171                         case MAG_NUMBEROFARTICLES:
1172                                 return wfNumberOfArticles();
1173                         case MAG_SITENAME:
1174                                 return $wgSitename;
1175                         case MAG_SERVER:
1176                                 return $wgServer;
1177                         default:
1178                                 return NULL;
1179                 }
1180         }
1181
1182         function initialiseVariables()
1183         {
1184                 global $wgVariableIDs;
1185                 $this->mVariables = array();
1186                 foreach ( $wgVariableIDs as $id ) {
1187                         $mw =& MagicWord::get( $id );
1188                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1189                 }
1190         }
1191
1192         /* private */ function replaceVariables( $text, $args = array() )
1193         {
1194                 global $wgLang, $wgScript, $wgArticlePath;
1195
1196                 $fname = "Parser::replaceVariables";
1197                 wfProfileIn( $fname );
1198
1199                 $bail = false;
1200                 if ( !$this->mVariables ) {
1201                         $this->initialiseVariables();
1202                 }
1203                 $titleChars = Title::legalChars();
1204                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1205
1206                 # This function is called recursively. To keep track of arguments we need a stack:
1207                 array_push( $this->mArgStack, $args );
1208
1209                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1210                 $GLOBALS['wgCurParser'] =& $this;
1211                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1212
1213                 array_pop( $this->mArgStack );
1214
1215                 return $text;
1216         }
1217
1218         function braceSubstitution( $matches )
1219         {
1220                 global $wgLinkCache, $wgLang;
1221                 $fname = "Parser::braceSubstitution";
1222                 $found = false;
1223                 $nowiki = false;
1224                 $title = NULL;
1225
1226                 # $newline is an optional newline character before the braces
1227                 # $part1 is the bit before the first |, and must contain only title characters
1228                 # $args is a list of arguments, starting from index 0, not including $part1
1229
1230                 $newline = $matches[1];
1231                 $part1 = $matches[2];
1232                 # If the third subpattern matched anything, it will start with |
1233                 if ( $matches[3] !== "" ) {
1234                         $args = explode( "|", substr( $matches[3], 1 ) );
1235                 } else {
1236                         $args = array();
1237                 }
1238                 $argc = count( $args );
1239
1240                 # SUBST
1241                 $mwSubst =& MagicWord::get( MAG_SUBST );
1242                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1243                         if ( $this->mOutputType != OT_WIKI ) {
1244                                 # Invalid SUBST not replaced at PST time
1245                                 # Return without further processing
1246                                 $text = $matches[0];
1247                                 $found = true;
1248                         }
1249                 } elseif ( $this->mOutputType == OT_WIKI ) {
1250                         # SUBST not found in PST pass, do nothing
1251                         $text = $matches[0];
1252                         $found = true;
1253                 }
1254
1255                 # MSG, MSGNW and INT
1256                 if ( !$found ) {
1257                         # Check for MSGNW:
1258                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1259                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1260                                 $nowiki = true;
1261                         } else {
1262                                 # Remove obsolete MSG:
1263                                 $mwMsg =& MagicWord::get( MAG_MSG );
1264                                 $mwMsg->matchStartAndRemove( $part1 );
1265                         }
1266
1267                         # Check if it is an internal message
1268                         $mwInt =& MagicWord::get( MAG_INT );
1269                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1270                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1271                                         $text = wfMsgReal( $part1, $args, true );
1272                                         $found = true;
1273                                 }
1274                         }
1275                 }
1276
1277                 # NS
1278                 if ( !$found ) {
1279                         # Check for NS: (namespace expansion)
1280                         $mwNs = MagicWord::get( MAG_NS );
1281                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1282                                 if ( intval( $part1 ) ) {
1283                                         $text = $wgLang->getNsText( intval( $part1 ) );
1284                                         $found = true;
1285                                 } else {
1286                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1287                                         if ( !is_null( $index ) ) {
1288                                                 $text = $wgLang->getNsText( $index );
1289                                                 $found = true;
1290                                         }
1291                                 }
1292                         }
1293                 }
1294
1295                 # LOCALURL and LOCALURLE
1296                 if ( !$found ) {
1297                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1298                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1299
1300                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1301                                 $func = 'getLocalURL';
1302                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1303                                 $func = 'escapeLocalURL';
1304                         } else {
1305                                 $func = '';
1306                         }
1307
1308                         if ( $func !== '' ) {
1309                                 $title = Title::newFromText( $part1 );
1310                                 if ( !is_null( $title ) ) {
1311                                         if ( $argc > 0 ) {
1312                                                 $text = $title->$func( $args[0] );
1313                                         } else {
1314                                                 $text = $title->$func();
1315                                         }
1316                                         $found = true;
1317                                 }
1318                         }
1319                 }
1320
1321                 # Internal variables
1322                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1323                         $text = $this->mVariables[$part1];
1324                         $found = true;
1325                         $this->mOutput->mContainsOldMagic = true;
1326                 }
1327
1328                 # Arguments input from the caller
1329                 $inputArgs = end( $this->mArgStack );
1330                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1331                         $text = $inputArgs[$part1];
1332                         $found = true;
1333                 }
1334
1335                 # Load from database
1336                 if ( !$found ) {
1337                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1338                         if ( !is_null( $title ) && !$title->isExternal() ) {
1339                                 # Check for excessive inclusion
1340                                 $dbk = $title->getPrefixedDBkey();
1341                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1342                                         $article = new Article( $title );
1343                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1344                                         if ( $articleContent !== false ) {
1345                                                 $found = true;
1346                                                 $text = $articleContent;
1347
1348                                         }
1349                                 }
1350
1351                                 # If the title is valid but undisplayable, make a link to it
1352                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1353                                         $text = "[[" . $title->getPrefixedText() . "]]";
1354                                         $found = true;
1355                                 }
1356                         }
1357                 }
1358
1359                 # Recursive parsing, escaping and link table handling
1360                 # Only for HTML output
1361                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1362                         $text = wfEscapeWikiText( $text );
1363                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1364                         # Clean up argument array
1365                         $assocArgs = array();
1366                         $index = 1;
1367                         foreach( $args as $arg ) {
1368                                 $eqpos = strpos( $arg, "=" );
1369                                 if ( $eqpos === false ) {
1370                                         $assocArgs[$index++] = $arg;
1371                                 } else {
1372                                         $name = trim( substr( $arg, 0, $eqpos ) );
1373                                         $value = trim( substr( $arg, $eqpos+1 ) );
1374                                         if ( $value === false ) {
1375                                                 $value = "";
1376                                         }
1377                                         if ( $name !== false ) {
1378                                                 $assocArgs[$name] = $value;
1379                                         }
1380                                 }
1381                         }
1382
1383                         # Do not enter included links in link table
1384                         if ( !is_null( $title ) ) {
1385                                 $wgLinkCache->suspend();
1386                         }
1387
1388                         # Run full parser on the included text
1389                         $text = $this->strip( $text, $this->mStripState );
1390                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1391
1392                         # Add the result to the strip state for re-inclusion after
1393                         # the rest of the processing
1394                         $text = $this->insertStripItem( $text, $this->mStripState );
1395
1396                         # Resume the link cache and register the inclusion as a link
1397                         if ( !is_null( $title ) ) {
1398                                 $wgLinkCache->resume();
1399                                 $wgLinkCache->addLinkObj( $title );
1400                         }
1401                 }
1402
1403                 if ( !$found ) {
1404                         return $matches[0];
1405                 } else {
1406                         return $newline . $text;
1407                 }
1408         }
1409
1410         # Returns true if the function is allowed to include this entity
1411         function incrementIncludeCount( $dbk )
1412         {
1413                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1414                         $this->mIncludeCount[$dbk] = 0;
1415                 }
1416                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1417                         return true;
1418                 } else {
1419                         return false;
1420                 }
1421         }
1422
1423
1424         # Cleans up HTML, removes dangerous tags and attributes
1425         /* private */ function removeHTMLtags( $text )
1426         {
1427                 $fname = "Parser::removeHTMLtags";
1428                 wfProfileIn( $fname );
1429                 $htmlpairs = array( # Tags that must be closed
1430                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1431                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1432                         "strike", "strong", "tt", "var", "div", "center",
1433                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1434                         "ruby", "rt" , "rb" , "rp", "p"
1435                 );
1436                 $htmlsingle = array(
1437                         "br", "hr", "li", "dt", "dd"
1438                 );
1439                 $htmlnest = array( # Tags that can be nested--??
1440                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1441                         "dl", "font", "big", "small", "sub", "sup"
1442                 );
1443                 $tabletags = array( # Can only appear inside table
1444                         "td", "th", "tr"
1445                 );
1446
1447                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1448                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1449
1450                 $htmlattrs = $this->getHTMLattrs () ;
1451
1452                 # Remove HTML comments
1453                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1454
1455                 $bits = explode( "<", $text );
1456                 $text = array_shift( $bits );
1457                 $tagstack = array(); $tablestack = array();
1458
1459                 foreach ( $bits as $x ) {
1460                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1461                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1462                           $x, $regs );
1463                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1464                         error_reporting( $prev );
1465
1466                         $badtag = 0 ;
1467                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1468                                 # Check our stack
1469                                 if ( $slash ) {
1470                                         # Closing a tag...
1471                                         if ( ! in_array( $t, $htmlsingle ) &&
1472                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1473                                                 array_push( $tagstack, $ot );
1474                                                 $badtag = 1;
1475                                         } else {
1476                                                 if ( $t == "table" ) {
1477                                                         $tagstack = array_pop( $tablestack );
1478                                                 }
1479                                                 $newparams = "";
1480                                         }
1481                                 } else {
1482                                         # Keep track for later
1483                                         if ( in_array( $t, $tabletags ) &&
1484                                           ! in_array( "table", $tagstack ) ) {
1485                                                 $badtag = 1;
1486                                         } else if ( in_array( $t, $tagstack ) &&
1487                                           ! in_array ( $t , $htmlnest ) ) {
1488                                                 $badtag = 1 ;
1489                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1490                                                 if ( $t == "table" ) {
1491                                                         array_push( $tablestack, $tagstack );
1492                                                         $tagstack = array();
1493                                                 }
1494                                                 array_push( $tagstack, $t );
1495                                         }
1496                                         # Strip non-approved attributes from the tag
1497                                         $newparams = $this->fixTagAttributes($params);
1498
1499                                 }
1500                                 if ( ! $badtag ) {
1501                                         $rest = str_replace( ">", "&gt;", $rest );
1502                                         $text .= "<$slash$t $newparams$brace$rest";
1503                                         continue;
1504                                 }
1505                         }
1506                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1507                 }
1508                 # Close off any remaining tags
1509                 while ( $t = array_pop( $tagstack ) ) {
1510                         $text .= "</$t>\n";
1511                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1512                 }
1513                 wfProfileOut( $fname );
1514                 return $text;
1515         }
1516
1517 /*
1518  *
1519  * This function accomplishes several tasks:
1520  * 1) Auto-number headings if that option is enabled
1521  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1522  * 3) Add a Table of contents on the top for users who have enabled the option
1523  * 4) Auto-anchor headings
1524  *
1525  * It loops through all headlines, collects the necessary data, then splits up the
1526  * string and re-inserts the newly formatted headlines.
1527  *
1528  */
1529
1530         /* private */ function formatHeadings( $text )
1531         {
1532                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1533                 $doShowToc = $this->mOptions->getShowToc();
1534                 if( !$this->mTitle->userCanEdit() ) {
1535                         $showEditLink = 0;
1536                         $rightClickHack = 0;
1537                 } else {
1538                         $showEditLink = $this->mOptions->getEditSection();
1539                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1540                 }
1541
1542                 # Inhibit editsection links if requested in the page
1543                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1544                 if( $esw->matchAndRemove( $text ) ) {
1545                         $showEditLink = 0;
1546                 }
1547                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1548                 # do not add TOC
1549                 $mw =& MagicWord::get( MAG_NOTOC );
1550                 if( $mw->matchAndRemove( $text ) ) {
1551                         $doShowToc = 0;
1552                 }
1553
1554                 # never add the TOC to the Main Page. This is an entry page that should not
1555                 # be more than 1-2 screens large anyway
1556                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1557                         $doShowToc = 0;
1558                 }
1559
1560                 # Get all headlines for numbering them and adding funky stuff like [edit]
1561                 # links - this is for later, but we need the number of headlines right now
1562                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1563
1564                 # if there are fewer than 4 headlines in the article, do not show TOC
1565                 if( $numMatches < 4 ) {
1566                         $doShowToc = 0;
1567                 }
1568
1569                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1570                 # override above conditions and always show TOC
1571                 $mw =& MagicWord::get( MAG_FORCETOC );
1572                 if ($mw->matchAndRemove( $text ) ) {
1573                         $doShowToc = 1;
1574                 }
1575
1576
1577                 # We need this to perform operations on the HTML
1578                 $sk =& $this->mOptions->getSkin();
1579
1580                 # headline counter
1581                 $headlineCount = 0;
1582
1583                 # Ugh .. the TOC should have neat indentation levels which can be
1584                 # passed to the skin functions. These are determined here
1585                 $toclevel = 0;
1586                 $toc = "";
1587                 $full = "";
1588                 $head = array();
1589                 $sublevelCount = array();
1590                 $level = 0;
1591                 $prevlevel = 0;
1592                 foreach( $matches[3] as $headline ) {
1593                         $numbering = "";
1594                         if( $level ) {
1595                                 $prevlevel = $level;
1596                         }
1597                         $level = $matches[1][$headlineCount];
1598                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1599                                 # reset when we enter a new level
1600                                 $sublevelCount[$level] = 0;
1601                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1602                                 $toclevel += $level - $prevlevel;
1603                         }
1604                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1605                                 # reset when we step back a level
1606                                 $sublevelCount[$level+1]=0;
1607                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1608                                 $toclevel -= $prevlevel - $level;
1609                         }
1610                         # count number of headlines for each level
1611                         @$sublevelCount[$level]++;
1612                         if( $doNumberHeadings || $doShowToc ) {
1613                                 $dot = 0;
1614                                 for( $i = 1; $i <= $level; $i++ ) {
1615                                         if( !empty( $sublevelCount[$i] ) ) {
1616                                                 if( $dot ) {
1617                                                         $numbering .= ".";
1618                                                 }
1619                                                 $numbering .= $sublevelCount[$i];
1620                                                 $dot = 1;
1621                                         }
1622                                 }
1623                         }
1624
1625                         # The canonized header is a version of the header text safe to use for links
1626                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1627                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1628
1629                         # strip out HTML
1630                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1631                         $tocline = trim( $canonized_headline );
1632                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1633                         $refer[$headlineCount] = $canonized_headline;
1634
1635                         # count how many in assoc. array so we can track dupes in anchors
1636                         @$refers[$canonized_headline]++;
1637                         $refcount[$headlineCount]=$refers[$canonized_headline];
1638
1639                         # Prepend the number to the heading text
1640
1641                         if( $doNumberHeadings || $doShowToc ) {
1642                                 $tocline = $numbering . " " . $tocline;
1643
1644                                 # Don't number the heading if it is the only one (looks silly)
1645                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1646                                         # the two are different if the line contains a link
1647                                         $headline=$numbering . " " . $headline;
1648                                 }
1649                         }
1650
1651                         # Create the anchor for linking from the TOC to the section
1652                         $anchor = $canonized_headline;
1653                         if($refcount[$headlineCount] > 1 ) {
1654                                 $anchor .= "_" . $refcount[$headlineCount];
1655                         }
1656                         if( $doShowToc ) {
1657                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1658                         }
1659                         if( $showEditLink ) {
1660                                 if ( empty( $head[$headlineCount] ) ) {
1661                                         $head[$headlineCount] = "";
1662                                 }
1663                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1664                         }
1665
1666                         # Add the edit section span
1667                         if( $rightClickHack ) {
1668                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1669                         }
1670
1671                         # give headline the correct <h#> tag
1672                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1673
1674                         $headlineCount++;
1675                 }
1676
1677                 if( $doShowToc ) {
1678                         $toclines = $headlineCount;
1679                         $toc .= $sk->tocUnindent( $toclevel );
1680                         $toc = $sk->tocTable( $toc );
1681                 }
1682
1683                 # split up and insert constructed headlines
1684
1685                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1686                 $i = 0;
1687
1688                 foreach( $blocks as $block ) {
1689                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1690                             # This is the [edit] link that appears for the top block of text when
1691                                 # section editing is enabled
1692
1693                                 # Disabled because it broke block formatting
1694                                 # For example, a bullet point in the top line
1695                                 # $full .= $sk->editSectionLink(0);
1696                         }
1697                         $full .= $block;
1698                         if( $doShowToc && !$i) {
1699                         # Top anchor now in skin
1700                                 $full = $full.$toc;
1701                         }
1702
1703                         if( !empty( $head[$i] ) ) {
1704                                 $full .= $head[$i];
1705                         }
1706                         $i++;
1707                 }
1708
1709                 return $full;
1710         }
1711
1712         /* private */ function doMagicISBN( &$tokenizer )
1713         {
1714                 global $wgLang;
1715
1716                 # Check whether next token is a text token
1717                 # If yes, fetch it and convert the text into a
1718                 # Special::BookSources link
1719                 $token = $tokenizer->previewToken();
1720                 while ( $token["type"] == "" )
1721                 {
1722                         $tokenizer->nextToken();
1723                         $token = $tokenizer->previewToken();
1724                 }
1725                 if ( $token["type"] == "text" )
1726                 {
1727                         $token = $tokenizer->nextToken();
1728                         $x = $token["text"];
1729                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1730
1731                         $isbn = $blank = "" ;
1732                         while ( " " == $x{0} ) {
1733                                 $blank .= " ";
1734                                 $x = substr( $x, 1 );
1735                         }
1736                         while ( strstr( $valid, $x{0} ) != false ) {
1737                                 $isbn .= $x{0};
1738                                 $x = substr( $x, 1 );
1739                         }
1740                         $num = str_replace( "-", "", $isbn );
1741                         $num = str_replace( " ", "", $num );
1742
1743                         if ( "" == $num ) {
1744                                 $text = "ISBN $blank$x";
1745                         } else {
1746                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1747                                 $text = "<a href=\"" .
1748                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1749                                         "\" class=\"internal\">ISBN $isbn</a>";
1750                                 $text .= $x;
1751                         }
1752                 } else {
1753                         $text = "ISBN ";
1754                 }
1755                 return $text;
1756         }
1757         /* private */ function doMagicRFC( &$tokenizer )
1758         {
1759                 global $wgLang;
1760
1761                 # Check whether next token is a text token
1762                 # If yes, fetch it and convert the text into a
1763                 # link to an RFC source
1764                 $token = $tokenizer->previewToken();
1765                 while ( $token["type"] == "" )
1766                 {
1767                         $tokenizer->nextToken();
1768                         $token = $tokenizer->previewToken();
1769                 }
1770                 if ( $token["type"] == "text" )
1771                 {
1772                         $token = $tokenizer->nextToken();
1773                         $x = $token["text"];
1774                         $valid = "0123456789";
1775
1776                         $rfc = $blank = "" ;
1777                         while ( " " == $x{0} ) {
1778                                 $blank .= " ";
1779                                 $x = substr( $x, 1 );
1780                         }
1781                         while ( strstr( $valid, $x{0} ) != false ) {
1782                                 $rfc .= $x{0};
1783                                 $x = substr( $x, 1 );
1784                         }
1785
1786                         if ( "" == $rfc ) {
1787                                 $text .= "RFC $blank$x";
1788                         } else {
1789                                 $url = wfmsg( "rfcurl" );
1790                                 $url = str_replace( "$1", $rfc, $url);
1791                                 $sk =& $this->mOptions->getSkin();
1792                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1793                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1794                         }
1795                 } else {
1796                         $text = "RFC ";
1797                 }
1798                 return $text;
1799         }
1800
1801         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1802         {
1803                 $this->mOptions = $options;
1804                 $this->mTitle =& $title;
1805                 $this->mOutputType = OT_WIKI;
1806
1807                 if ( $clearState ) {
1808                         $this->clearState();
1809                 }
1810
1811                 $stripState = false;
1812                 $pairs = array(
1813                         "\r\n" => "\n",
1814                         );
1815                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1816                 // now with regexes
1817                 $pairs = array(
1818                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1819                         "/<br *?>/i" => "<br/>",
1820                 );
1821                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1822                 $text = $this->strip( $text, $stripState, false );
1823                 $text = $this->pstPass2( $text, $user );
1824                 $text = $this->unstrip( $text, $stripState );
1825                 return $text;
1826         }
1827
1828         /* private */ function pstPass2( $text, &$user )
1829         {
1830                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1831
1832                 # Variable replacement
1833                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1834                 $text = $this->replaceVariables( $text );
1835
1836                 # Signatures
1837                 #
1838                 $n = $user->getName();
1839                 $k = $user->getOption( "nickname" );
1840                 if ( "" == $k ) { $k = $n; }
1841                 if(isset($wgLocaltimezone)) {
1842                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1843                 }
1844                 /* Note: this is an ugly timezone hack for the European wikis */
1845                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1846                   " (" . date( "T" ) . ")";
1847                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1848
1849                 $text = preg_replace( "/~~~~~/", $d, $text );
1850                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1851                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1852                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1853                   Namespace::getUser() ) . ":$n|$k]]", $text );
1854
1855                 # Context links: [[|name]] and [[name (context)|]]
1856                 #
1857                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1858                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1859                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1860                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1861
1862                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1863                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1864                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1865                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1866                                                                                                                 # [[ns:page (cont)|]]
1867                 $context = "";
1868                 $t = $this->mTitle->getText();
1869                 if ( preg_match( $conpat, $t, $m ) ) {
1870                         $context = $m[2];
1871                 }
1872                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1873                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1874                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1875
1876                 if ( "" == $context ) {
1877                         $text = preg_replace( $p2, "[[\\1]]", $text );
1878                 } else {
1879                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1880                 }
1881
1882                 /*
1883                 $mw =& MagicWord::get( MAG_SUBST );
1884                 $wgCurParser = $this->fork();
1885                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1886                 $this->merge( $wgCurParser );
1887                 */
1888
1889                 # Trim trailing whitespace
1890                 # MAG_END (__END__) tag allows for trailing
1891                 # whitespace to be deliberately included
1892                 $text = rtrim( $text );
1893                 $mw =& MagicWord::get( MAG_END );
1894                 $mw->matchAndRemove( $text );
1895
1896                 return $text;
1897         }
1898
1899         # Set up some variables which are usually set up in parse()
1900         # so that an external function can call some class members with confidence
1901         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1902         {
1903                 $this->mTitle =& $title;
1904                 $this->mOptions = $options;
1905                 $this->mOutputType = $outputType;
1906                 if ( $clearState ) {
1907                         $this->clearState();
1908                 }
1909         }
1910
1911         function transformMsg( $text, $options ) {
1912                 global $wgTitle;
1913                 static $executing = false;
1914
1915                 # Guard against infinite recursion
1916                 if ( $executing ) {
1917                         return $text;
1918                 }
1919                 $executing = true;
1920
1921                 $this->mTitle = $wgTitle;
1922                 $this->mOptions = $options;
1923                 $this->mOutputType = OT_MSG;
1924                 $this->clearState();
1925                 $text = $this->replaceVariables( $text );
1926
1927                 $executing = false;
1928                 return $text;
1929         }
1930 }
1931
1932 class ParserOutput
1933 {
1934         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1935
1936         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1937                 $containsOldMagic = false )
1938         {
1939                 $this->mText = $text;
1940                 $this->mLanguageLinks = $languageLinks;
1941                 $this->mCategoryLinks = $categoryLinks;
1942                 $this->mContainsOldMagic = $containsOldMagic;
1943         }
1944
1945         function getText() { return $this->mText; }
1946         function getLanguageLinks() { return $this->mLanguageLinks; }
1947         function getCategoryLinks() { return $this->mCategoryLinks; }
1948         function containsOldMagic() { return $this->mContainsOldMagic; }
1949         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1950         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1951         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1952         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1953
1954         function merge( $other ) {
1955                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1956                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1957                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1958         }
1959
1960 }
1961
1962 class ParserOptions
1963 {
1964         # All variables are private
1965         var $mUseTeX;                    # Use texvc to expand <math> tags
1966         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1967         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1968         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1969         var $mAllowExternalImages;       # Allow external images inline
1970         var $mSkin;                      # Reference to the preferred skin
1971         var $mDateFormat;                # Date format index
1972         var $mEditSection;               # Create "edit section" links
1973         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1974         var $mNumberHeadings;            # Automatically number headings
1975         var $mShowToc;                   # Show table of contents
1976
1977         function getUseTeX() { return $this->mUseTeX; }
1978         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1979         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1980         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1981         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1982         function getSkin() { return $this->mSkin; }
1983         function getDateFormat() { return $this->mDateFormat; }
1984         function getEditSection() { return $this->mEditSection; }
1985         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1986         function getNumberHeadings() { return $this->mNumberHeadings; }
1987         function getShowToc() { return $this->mShowToc; }
1988
1989         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1990         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1991         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1992         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1993         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1994         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1995         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1996         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1997         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1998         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1999         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2000
2001         /* static */ function newFromUser( &$user )
2002         {
2003                 $popts = new ParserOptions;
2004                 $popts->initialiseFromUser( &$user );
2005                 return $popts;
2006         }
2007
2008         function initialiseFromUser( &$userInput )
2009         {
2010                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2011
2012                 if ( !$userInput ) {
2013                         $user = new User;
2014                         $user->setLoaded( true );
2015                 } else {
2016                         $user =& $userInput;
2017                 }
2018
2019                 $this->mUseTeX = $wgUseTeX;
2020                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2021                 $this->mUseDynamicDates = $wgUseDynamicDates;
2022                 $this->mInterwikiMagic = $wgInterwikiMagic;
2023                 $this->mAllowExternalImages = $wgAllowExternalImages;
2024                 $this->mSkin =& $user->getSkin();
2025                 $this->mDateFormat = $user->getOption( "date" );
2026                 $this->mEditSection = $user->getOption( "editsection" );
2027                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2028                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2029                 $this->mShowToc = $user->getOption( "showtoc" );
2030         }
2031
2032
2033 }
2034
2035 # Regex callbacks, used in Parser::replaceVariables
2036 function wfBraceSubstitution( $matches )
2037 {
2038         global $wgCurParser;
2039         return $wgCurParser->braceSubstitution( $matches );
2040 }
2041
2042 ?>