includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         if ( $doesexist ) {
 215                                 $t = $x->l_from ;
 216                         } else {
 217                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 218                                 if ( $t != "" ) $t .= ":" ;
 219                                 $t .= $x->cur_title ;
 220                         }
 221
 222                         $y = explode ( ":" , $t , 2 ) ;
 223                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 224                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 225                         } else {
 226                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 227                         }
 228                 }
 229                 wfFreeResult ( $res ) ;
 230
 231                 # Children
 232                 if ( count ( $children ) > 0 )
 233                 {
 234                         asort ( $children ) ;
 235                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 236                         $r .= implode ( ", " , $children ) ;
 237                 }
 238
 239                 # Articles
 240                 if ( count ( $articles ) > 0 )
 241                 {
 242                         asort ( $articles ) ;
 243                         $h =  wfMsg( "category_header", $ti[1] );
 244                         $r .= "<h2>{$h}</h2>\n" ;
 245                         $r .= implode ( ", " , $articles ) ;
 246                 }
 247
 248
 249                 return $r ;
 250         }
 251
 252         function getHTMLattrs ()
 253         {
 254                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 255                                 "title", "align", "lang", "dir", "width", "height",
 256                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 257                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 258                                 /* FONT */ "type", "start", "value", "compact",
 259                                 /* For various lists, mostly deprecated but safe */
 260                                 "summary", "width", "border", "frame", "rules",
 261                                 "cellspacing", "cellpadding", "valign", "char",
 262                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 263                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 264                                 "id", "class", "name", "style" /* For CSS */
 265                                 );
 266                 return $htmlattrs ;
 267         }
 268
 269         function fixTagAttributes ( $t )
 270         {
 271                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 272                 $htmlattrs = $this->getHTMLattrs() ;
 273
 274                 # Strip non-approved attributes from the tag
 275                 $t = preg_replace(
 276                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 277                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 278                         $t);
 279                 # Strip javascript "expression" from stylesheets. Brute force approach:
 280                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 281
 282                 if( preg_match(
 283                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 284                         wfMungeToUtf8( $t ) ) )
 285                 {
 286                         $t="";
 287                 }
 288
 289                 return trim ( $t ) ;
 290         }
 291
 292         function doTableStuff ( $t )
 293         {
 294                 $t = explode ( "\n" , $t ) ;
 295                 $td = array () ; # Is currently a td tag open?
 296                         $ltd = array () ; # Was it TD or TH?
 297                         $tr = array () ; # Is currently a tr tag open?
 298                         $ltr = array () ; # tr attributes
 299                         foreach ( $t AS $k => $x )
 300                         {
 301                                 $x = rtrim ( $x ) ;
 302                                 $fc = substr ( $x , 0 , 1 ) ;
 303                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 304                                 {
 305                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 306                                         array_push ( $td , false ) ;
 307                                         array_push ( $ltd , "" ) ;
 308                                         array_push ( $tr , false ) ;
 309                                         array_push ( $ltr , "" ) ;
 310                                 }
 311                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 312                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 313                                 {
 314                                         $z = "</table>\n" ;
 315                                         $l = array_pop ( $ltd ) ;
 316                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 317                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 318                                         array_pop ( $ltr ) ;
 319                                         $t[$k] = $z ;
 320                                 }
 321                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 322                                                 {
 323                                                 $z = trim ( substr ( $x , 2 ) ) ;
 324                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 325                                                 }*/
 326                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 327                                 {
 328                                         $x = substr ( $x , 1 ) ;
 329                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 330                                         $z = "" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                         array_push ( $tr , false ) ;
 337                                         array_push ( $td , false ) ;
 338                                         array_push ( $ltd , "" ) ;
 339                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 340                                 }
 341                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 342                                 {
 343                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 344                                         {
 345                                                 $fc = "+" ;
 346                                                 $x = substr ( $x , 1 ) ;
 347                                         }
 348                                         $after = substr ( $x , 1 ) ;
 349                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 350                                         $after = explode ( "||" , $after ) ;
 351                                         $t[$k] = "" ;
 352                                         foreach ( $after AS $theline )
 353                                         {
 354                                                 $z = "" ;
 355                                                 if ( $fc != "+" )
 356                                                 {
 357                                                         $tra = array_pop ( $ltr ) ;
 358                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 359                                                         array_push ( $tr , true ) ;
 360                                                         array_push ( $ltr , "" ) ;
 361                                                 }
 362
 363                                                 $l = array_pop ( $ltd ) ;
 364                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 365                                                 if ( $fc == "|" ) $l = "TD" ;
 366                                                 else if ( $fc == "!" ) $l = "TH" ;
 367                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 368                                                 else $l = "" ;
 369                                                 array_push ( $ltd , $l ) ;
 370                                                 $y = explode ( "|" , $theline , 2 ) ;
 371                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 372                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 373                                                 $t[$k] .= $y ;
 374                                                 array_push ( $td , true ) ;
 375                                         }
 376                                 }
 377                         }
 378
 379                 # Closing open td, tr && table
 380                 while ( count ( $td ) > 0 )
 381                 {
 382                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 383                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 384                         $t[] = "</table>" ;
 385                 }
 386
 387                 $t = implode ( "\n" , $t ) ;
 388                 #               $t = $this->removeHTMLtags( $t );
 389                 return $t ;
 390         }
 391
 392         # Well, OK, it's actually about 14 passes.  But since all the
 393         # hard lifting is done inside PHP's regex code, it probably
 394         # wouldn't speed things up much to add a real parser.
 395         #
 396         function doWikiPass2( $text, $linestart )
 397         {
 398                 $fname = "OutputPage::doWikiPass2";
 399                 wfProfileIn( $fname );
 400
 401                 $text = $this->removeHTMLtags( $text );
 402                 $text = $this->replaceVariables( $text );
 403
 404                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 405                 $text = str_replace ( "<HR>", "<hr>", $text );
 406
 407                 $text = $this->doHeadings( $text );
 408                 $text = $this->doBlockLevels( $text, $linestart );
 409
 410                 if($this->mOptions->getUseDynamicDates()) {
 411                         global $wgDateFormatter;
 412                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 413                 }
 414
 415                 $text = $this->replaceExternalLinks( $text );
 416                 $text = $this->replaceInternalLinks ( $text );
 417                 $text = $this->doTableStuff ( $text ) ;
 418
 419                 $text = $this->formatHeadings( $text );
 420
 421                 $sk =& $this->mOptions->getSkin();
 422                 $text = $sk->transformContent( $text );
 423                 $text .= $this->categoryMagic () ;
 424
 425                 wfProfileOut( $fname );
 426                 return $text;
 427         }
 428
 429
 430         /* private */ function doHeadings( $text )
 431         {
 432                 for ( $i = 6; $i >= 1; --$i ) {
 433                         $h = substr( "======", 0, $i );
 434                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 435                           "<h{$i}>\\1</h{$i}>\\2", $text );
 436                 }
 437                 return $text;
 438         }
 439
 440         # Note: we have to do external links before the internal ones,
 441         # and otherwise take great care in the order of things here, so
 442         # that we don't end up interpreting some URLs twice.
 443
 444         /* private */ function replaceExternalLinks( $text )
 445         {
 446                 $fname = "OutputPage::replaceExternalLinks";
 447                 wfProfileIn( $fname );
 448                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 449                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 450                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 451                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 452                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 453                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 454                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 455                 wfProfileOut( $fname );
 456                 return $text;
 457         }
 458
 459         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 460         {
 461                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 462                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 463
 464                 # this is  the list of separators that should be ignored if they
 465                 # are the last character of an URL but that should be included
 466                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 467                 # in this case, the last comma should not become part of the URL,
 468                 # but in "www.foo.com/123,2342,32.htm" it should.
 469                 $sep = ",;\.:";
 470                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 471                 $images = "gif|png|jpg|jpeg";
 472
 473                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 474                 # they are interpreted as part of the string (used to tell PHP
 475                 # that the content of the string should be inserted there).
 476                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 477                   "((?i){$images})([^{$uc}]|$)/";
 478
 479                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 480                 $sk =& $this->mOptions->getSkin();
 481
 482                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 483                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 484                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 485                 }
 486                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 487                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 488                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 489                   "</a>\\5", $s );
 490                 $s = str_replace( $unique, $protocol, $s );
 491
 492                 $a = explode( "[{$protocol}:", " " . $s );
 493                 $s = array_shift( $a );
 494                 $s = substr( $s, 1 );
 495
 496                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 497                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 498
 499                 foreach ( $a as $line ) {
 500                         if ( preg_match( $e1, $line, $m ) ) {
 501                                 $link = "{$protocol}:{$m[1]}";
 502                                 $trail = $m[2];
 503                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 504                                 else { $text = wfEscapeHTML( $link ); }
 505                         } else if ( preg_match( $e2, $line, $m ) ) {
 506                                 $link = "{$protocol}:{$m[1]}";
 507                                 $text = $m[2];
 508                                 $trail = $m[3];
 509                         } else {
 510                                 $s .= "[{$protocol}:" . $line;
 511                                 continue;
 512                         }
 513                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 514                         else $paren = "";
 515                         $la = $sk->getExternalLinkAttributes( $link, $text );
 516                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 517
 518                 }
 519                 return $s;
 520         }
 521
 522         /* private */ function handle3Quotes( &$state, $token )
 523         {
 524                 if ( $state["strong"] ) {
 525                         if ( $state["em"] && $state["em"] > $state["strong"] )
 526                         {
 527                                 # ''' lala ''lala '''
 528                                 $s = "</em></strong><em>";
 529                         } else {
 530                                 $s = "</strong>";
 531                         }
 532                         $state["strong"] = FALSE;
 533                 } else {
 534                         $s = "<strong>";
 535                         $state["strong"] = $token["pos"];
 536                 }
 537                 return $s;
 538         }
 539
 540         /* private */ function handle2Quotes( &$state, $token )
 541         {
 542                 if ( $state["em"] ) {
 543                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 544                         {
 545                                 # ''lala'''lala'' ....'''
 546                                 $s = "</strong></em><strong>";
 547                         } else {
 548                                 $s = "</em>";
 549                         }
 550                         $state["em"] = FALSE;
 551                 } else {
 552                         $s = "<em>";
 553                         $state["em"] = $token["pos"];
 554                 }
 555                 return $s;
 556         }
 557
 558         /* private */ function handle5Quotes( &$state, $token )
 559         {
 560                 if ( $state["em"] && $state["strong"] ) {
 561                         if ( $state["em"] < $state["strong"] ) {
 562                                 $s .= "</strong></em>";
 563                         } else {
 564                                 $s .= "</em></strong>";
 565                         }
 566                         $state["strong"] = $state["em"] = FALSE;
 567                 } elseif ( $state["em"] ) {
 568                         $s .= "</em><strong>";
 569                         $state["em"] = FALSE;
 570                         $state["strong"] = $token["pos"];
 571                 } elseif ( $state["strong"] ) {
 572                         $s .= "</strong><em>";
 573                         $state["strong"] = FALSE;
 574                         $state["em"] = $token["pos"];
 575                 } else { # not $em and not $strong
 576                         $s .= "<strong><em>";
 577                         $state["strong"] = $state["em"] = $token["pos"];
 578                 }
 579                 return $s;
 580         }
 581
 582         /* private */ function replaceInternalLinks( $str )
 583         {
 584                 global $wgLang; # for language specific parser hook
 585
 586                 $tokenizer=Tokenizer::newFromString( $str );
 587                 $tokenStack = array();
 588
 589                 $s="";
 590                 $state["em"]      = FALSE;
 591                 $state["strong"]  = FALSE;
 592                 $tagIsOpen = FALSE;
 593
 594                 # The tokenizer splits the text into tokens and returns them one by one.
 595                 # Every call to the tokenizer returns a new token.
 596                 while ( $token = $tokenizer->nextToken() )
 597                 {
 598                         switch ( $token["type"] )
 599                         {
 600                                 case "text":
 601                                         # simple text with no further markup
 602                                         $txt = $token["text"];
 603                                         break;
 604                                 case "[[":
 605                                         # link opening tag.
 606                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 607                                         $tagIsOpen = TRUE;
 608                                         array_push( $tokenStack, $token );
 609                                         $txt="";
 610                                         break;
 611                                 case "]]":
 612                                         # link close tag.
 613                                         # get text from stack, glue it together, and call the code to handle a
 614                                         # link
 615                                         if ( count( $tokenStack ) == 0 )
 616                                         {
 617                                                 # stack empty. Found a ]] without an opening [[
 618                                                 $txt = "]]";
 619                                         } else {
 620                                                 $linkText = "";
 621                                                 $lastToken = array_pop( $tokenStack );
 622                                                 while ( $lastToken["type"] != "[[" )
 623                                                 {
 624                                                         $linkText = $lastToken["text"] . $linkText;
 625                                                         $lastToken = array_pop( $tokenStack );
 626                                                 }
 627                                                 $txt = $linkText ."]]";
 628                                                 $prefix = $lastToken["text"];
 629                                                 $nextToken = $tokenizer->previewToken();
 630                                                 if ( $nextToken["type"] == "text" )
 631                                                 {
 632                                                         # Preview just looks at it. Now we have to fetch it.
 633                                                         $nextToken = $tokenizer->nextToken();
 634                                                         $txt .= $nextToken["text"];
 635                                                 }
 636                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 637                                         }
 638                                         $tagIsOpen = (count( $tokenStack ) != 0);
 639                                         break;
 640                                 case "----":
 641                                         $txt = "\n<hr>\n";
 642                                         break;
 643                                 case "'''":
 644                                         # This and the three next ones handle quotes
 645                                         $txt = $this->handle3Quotes( $state, $token );
 646                                         break;
 647                                 case "''":
 648                                         $txt = $this->handle2Quotes( $state, $token );
 649                                         break;
 650                                 case "'''''":
 651                                         $txt = $this->handle5Quotes( $state, $token );
 652                                         break;
 653                                 case "":
 654                                         # empty token
 655                                         $txt="";
 656                                         break;
 657                                 case "RFC ":
 658                                         if ( $tagIsOpen ) {
 659                                                 $txt = "RFC ";
 660                                         } else {
 661                                                 $txt = $this->doMagicRFC( $tokenizer );
 662                                         }
 663                                         break;
 664                                 case "ISBN ":
 665                                         if ( $tagIsOpen ) {
 666                                                 $txt = "ISBN ";
 667                                         } else {
 668                                                 $txt = $this->doMagicISBN( $tokenizer );
 669                                         }
 670                                         break;
 671                                 default:
 672                                         # Call language specific Hook.
 673                                         $txt = $wgLang->processToken( $token, $tokenStack );
 674                                         if ( NULL == $txt ) {
 675                                                 # An unkown token. Highlight.
 676                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 677                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 678                                         }
 679                                         break;
 680                         }
 681                         # If we're parsing the interior of a link, don't append the interior to $s,
 682                         # but push it to the stack so it can be processed when a ]] token is found.
 683                         if ( $tagIsOpen  && $txt != "" ) {
 684                                 $token["type"] = "text";
 685                                 $token["text"] = $txt;
 686                                 array_push( $tokenStack, $token );
 687                         } else {
 688                                 $s .= $txt;
 689                         }
 690                 } #end while
 691                 if ( count( $tokenStack ) != 0 )
 692                 {
 693                         # still objects on stack. opened [[ tag without closing ]] tag.
 694                         $txt = "";
 695                         while ( $lastToken = array_pop( $tokenStack ) )
 696                         {
 697                                 if ( $lastToken["type"] == "text" )
 698                                 {
 699                                         $txt = $lastToken["text"] . $txt;
 700                                 } else {
 701                                         $txt = $lastToken["type"] . $txt;
 702                                 }
 703                         }
 704                         $s .= $txt;
 705                 }
 706                 return $s;
 707         }
 708
 709         /* private */ function handleInternalLink( $line, $prefix )
 710         {
 711                 global $wgLang, $wgLinkCache;
 712                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 713                 static $fname = "OutputPage::replaceInternalLinks" ;
 714                 wfProfileIn( $fname );
 715
 716                 wfProfileIn( "$fname-setup" );
 717                 static $tc = FALSE;
 718                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 719                 $sk =& $this->mOptions->getSkin();
 720
 721                 # Match a link having the form [[namespace:link|alternate]]trail
 722                 static $e1 = FALSE;
 723                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 724                 # Match the end of a line for a word that's not followed by whitespace,
 725                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 726                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 727                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 728                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 729
 730
 731                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 732                 static $image = FALSE;
 733                 static $special = FALSE;
 734                 static $media = FALSE;
 735                 static $category = FALSE;
 736                 if ( !$image ) { $image = Namespace::getImage(); }
 737                 if ( !$special ) { $special = Namespace::getSpecial(); }
 738                 if ( !$media ) { $media = Namespace::getMedia(); }
 739                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 740
 741                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 742
 743                 wfProfileOut( "$fname-setup" );
 744
 745                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 746                         $text = $m[2];
 747                         $trail = $m[3];
 748                 } else { # Invalid form; output directly
 749                         $s .= $prefix . "[[" . $line ;
 750                         return $s;
 751                 }
 752
 753                 /* Valid link forms:
 754                 Foobar -- normal
 755                 :Foobar -- override special treatment of prefix (images, language links)
 756                 /Foobar -- convert to CurrentPage/Foobar
 757                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 758                 */
 759                 $c = substr($m[1],0,1);
 760                 $noforce = ($c != ":");
 761                 if( $c == "/" ) { # subpage
 762                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 763                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 764                                 $noslash=$m[1];
 765                         } else {
 766                                 $noslash=substr($m[1],1);
 767                         }
 768                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 769                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 770                                 if( "" == $text ) {
 771                                         $text= $m[1];
 772                                 } # this might be changed for ugliness reasons
 773                         } else {
 774                                 $link = $noslash; # no subpage allowed, use standard link
 775                         }
 776                 } elseif( $noforce ) { # no subpage
 777                         $link = $m[1];
 778                 } else {
 779                         $link = substr( $m[1], 1 );
 780                 }
 781                 if( "" == $text )
 782                         $text = $link;
 783
 784                 $nt = Title::newFromText( $link );
 785                 if( !$nt ) {
 786                         $s .= $prefix . "[[" . $line;
 787                         return $s;
 788                 }
 789                 $ns = $nt->getNamespace();
 790                 $iw = $nt->getInterWiki();
 791                 if( $noforce ) {
 792                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 793                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 794                                 $s .= $prefix . $trail;
 795                                 return $s;
 796                         }
 797                         if( $ns == $image ) {
 798                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 799                                 $wgLinkCache->addImageLinkObj( $nt );
 800                                 return $s;
 801                         }
 802                 }
 803                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 804                     ( strpos( $link, "#" ) == FALSE ) ) {
 805                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 806                         return $s;
 807                 }
 808                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 809                         $t = explode ( ":" , $nt->getText() ) ;
 810                         array_shift ( $t ) ;
 811                         $t = implode ( ":" , $t ) ;
 812                         $t = $wgLang->ucFirst ( $t ) ;
 813 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 814                         $nnt = Title::newFromText ( $category.":".$t ) ;
 815                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 816                         $this->mCategoryLinks[] = $t ;
 817                         $s .= $prefix . $trail ;
 818                         return $s ;
 819                 }
 820                 if( $ns == $media ) {
 821                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 822                         $wgLinkCache->addImageLinkObj( $nt );
 823                         return $s;
 824                 } elseif( $ns == $special ) {
 825                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 826                         return $s;
 827                 }
 828                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 829
 830                 wfProfileOut( $fname );
 831                 return $s;
 832         }
 833
 834         # Some functions here used by doBlockLevels()
 835         #
 836         /* private */ function closeParagraph()
 837         {
 838                 $result = "";
 839                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 840                   0 != strcmp( "", $this->mLastSection ) ) {
 841                         $result = "</" . $this->mLastSection  . ">";
 842                 }
 843                 $this->mLastSection = "";
 844                 return $result."\n";
 845         }
 846         # getCommon() returns the length of the longest common substring
 847         # of both arguments, starting at the beginning of both.
 848         #
 849         /* private */ function getCommon( $st1, $st2 )
 850         {
 851                 $fl = strlen( $st1 );
 852                 $shorter = strlen( $st2 );
 853                 if ( $fl < $shorter ) { $shorter = $fl; }
 854
 855                 for ( $i = 0; $i < $shorter; ++$i ) {
 856                         if ( $st1{$i} != $st2{$i} ) { break; }
 857                 }
 858                 return $i;
 859         }
 860         # These next three functions open, continue, and close the list
 861         # element appropriate to the prefix character passed into them.
 862         #
 863         /* private */ function openList( $char )
 864     {
 865                 $result = $this->closeParagraph();
 866
 867                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 868                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 869                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 870                 else if ( ";" == $char ) {
 871                         $result .= "<dl><dt>";
 872                         $this->mDTopen = true;
 873                 }
 874                 else { $result = "<!-- ERR 1 -->"; }
 875
 876                 return $result;
 877         }
 878
 879         /* private */ function nextItem( $char )
 880         {
 881                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 882                 else if ( ":" == $char || ";" == $char ) {
 883                         $close = "</dd>";
 884                         if ( $this->mDTopen ) { $close = "</dt>"; }
 885                         if ( ";" == $char ) {
 886                                 $this->mDTopen = true;
 887                                 return $close . "<dt>";
 888                         } else {
 889                                 $this->mDTopen = false;
 890                                 return $close . "<dd>";
 891                         }
 892                 }
 893                 return "<!-- ERR 2 -->";
 894         }
 895
 896         /* private */function closeList( $char )
 897         {
 898                 if ( "*" == $char ) { $text = "</li></ul>"; }
 899                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 900                 else if ( ":" == $char ) {
 901                         if ( $this->mDTopen ) {
 902                                 $this->mDTopen = false;
 903                                 $text = "</dt></dl>";
 904                         } else {
 905                                 $text = "</dd></dl>";
 906                         }
 907                 }
 908                 else {  return "<!-- ERR 3 -->"; }
 909                 return $text."\n";
 910         }
 911
 912         /* private */ function doBlockLevels( $text, $linestart )
 913         {
 914                 $fname = "OutputPage::doBlockLevels";
 915                 wfProfileIn( $fname );
 916                 # Parsing through the text line by line.  The main thing
 917                 # happening here is handling of block-level elements p, pre,
 918                 # and making lists from lines starting with * # : etc.
 919                 #
 920                 $a = explode( "\n", $text );
 921                 $text = $lastPref = "";
 922                 $this->mDTopen = $inBlockElem = false;
 923
 924                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 925                 foreach ( $a as $t ) {
 926                         if ( "" != $text ) { $text .= "\n"; }
 927
 928                         $oLine = $t;
 929                         $opl = strlen( $lastPref );
 930                         $npl = strspn( $t, "*#:;" );
 931                         $pref = substr( $t, 0, $npl );
 932                         $pref2 = str_replace( ";", ":", $pref );
 933                         $t = substr( $t, $npl );
 934
 935                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 936                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 937
 938                                 if ( ";" == substr( $pref, -1 ) ) {
 939                                         $cpos = strpos( $t, ":" );
 940                                         if ( ! ( false === $cpos ) ) {
 941                                                 $term = substr( $t, 0, $cpos );
 942                                                 $text .= $term . $this->nextItem( ":" );
 943                                                 $t = substr( $t, $cpos + 1 );
 944                                         }
 945                                 }
 946                         } else if (0 != $npl || 0 != $opl) {
 947                                 $cpl = $this->getCommon( $pref, $lastPref );
 948
 949                                 while ( $cpl < $opl ) {
 950                                         $text .= $this->closeList( $lastPref{$opl-1} );
 951                                         --$opl;
 952                                 }
 953                                 if ( $npl <= $cpl && $cpl > 0 ) {
 954                                         $text .= $this->nextItem( $pref{$cpl-1} );
 955                                 }
 956                                 while ( $npl > $cpl ) {
 957                                         $char = substr( $pref, $cpl, 1 );
 958                                         $text .= $this->openList( $char );
 959
 960                                         if ( ";" == $char ) {
 961                                                 $cpos = strpos( $t, ":" );
 962                                                 if ( ! ( false === $cpos ) ) {
 963                                                         $term = substr( $t, 0, $cpos );
 964                                                         $text .= $term . $this->nextItem( ":" );
 965                                                         $t = substr( $t, $cpos + 1 );
 966                                                 }
 967                                         }
 968                                         ++$cpl;
 969                                 }
 970                                 $lastPref = $pref2;
 971                         }
 972                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 973                                 if ( preg_match(
 974                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 975                                         $text .= $this->closeParagraph();
 976                                         $inBlockElem = true;
 977                                 }
 978                                 if ( ! $inBlockElem ) {
 979                                         if ( " " == $t{0} ) {
 980                                                 $newSection = "pre";
 981                                                 # $t = wfEscapeHTML( $t );
 982                                         }
 983                                         else { $newSection = "p"; }
 984
 985                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 986                                                 $text .= $this->closeParagraph();
 987                                                 $text .= "<" . $newSection . ">";
 988                                         } else if ( 0 != strcmp( $this->mLastSection,
 989                                           $newSection ) ) {
 990                                                 $text .= $this->closeParagraph();
 991                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 992                                                         $text .= "<" . $newSection . ">";
 993                                                 }
 994                                         }
 995                                         $this->mLastSection = $newSection;
 996                                 }
 997                                 if ( $inBlockElem &&
 998                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 999                                         $inBlockElem = false;
1000                                 }
1001                         }
1002                         $text .= $t;
1003                 }
1004                 while ( $npl ) {
1005                         $text .= $this->closeList( $pref2{$npl-1} );
1006                         --$npl;
1007                 }
1008                 if ( "" != $this->mLastSection ) {
1009                         if ( "p" != $this->mLastSection ) {
1010                                 $text .= "</" . $this->mLastSection . ">";
1011                         }
1012                         $this->mLastSection = "";
1013                 }
1014                 wfProfileOut( $fname );
1015                 return $text;
1016         }
1017
1018         /* private */ function replaceVariables( $text )
1019         {
1020                 global $wgLang, $wgCurOut;
1021                 $fname = "OutputPage::replaceVariables";
1022                 wfProfileIn( $fname );
1023
1024                 $magic = array();
1025
1026                 # Basic variables
1027                 # See Language.php for the definition of each magic word
1028                 # As with sigs, this uses the server's local time -- ensure
1029                 # this is appropriate for your audience!
1030
1031                 $magic[MAG_CURRENTMONTH] = date( "m" );
1032                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1033                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1034                 $magic[MAG_CURRENTDAY] = date("j");
1035                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1036                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1037                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1038
1039                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1040
1041                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1042                 if ( $mw->match( $text ) ) {
1043                         $v = wfNumberOfArticles();
1044                         $text = $mw->replace( $v, $text );
1045                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1046                 }
1047
1048                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1049                 # The callbacks are at the bottom of this file
1050                 $wgCurOut = $this;
1051                 $mw =& MagicWord::get( MAG_MSG );
1052                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1053                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1054
1055                 $mw =& MagicWord::get( MAG_MSGNW );
1056                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1057                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1058
1059                 wfProfileOut( $fname );
1060                 return $text;
1061         }
1062
1063         # Cleans up HTML, removes dangerous tags and attributes
1064         /* private */ function removeHTMLtags( $text )
1065         {
1066                 $fname = "OutputPage::removeHTMLtags";
1067                 wfProfileIn( $fname );
1068                 $htmlpairs = array( # Tags that must be closed
1069                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1070                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1071                         "strike", "strong", "tt", "var", "div", "center",
1072                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1073                         "ruby", "rt" , "rb" , "rp"
1074                 );
1075                 $htmlsingle = array(
1076                         "br", "p", "hr", "li", "dt", "dd"
1077                 );
1078                 $htmlnest = array( # Tags that can be nested--??
1079                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1080                         "dl", "font", "big", "small", "sub", "sup"
1081                 );
1082                 $tabletags = array( # Can only appear inside table
1083                         "td", "th", "tr"
1084                 );
1085
1086                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1087                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1088
1089                 $htmlattrs = $this->getHTMLattrs () ;
1090
1091                 # Remove HTML comments
1092                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1093
1094                 $bits = explode( "<", $text );
1095                 $text = array_shift( $bits );
1096                 $tagstack = array(); $tablestack = array();
1097
1098                 foreach ( $bits as $x ) {
1099                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1100                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1101                           $x, $regs );
1102                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1103                         error_reporting( $prev );
1104
1105                         $badtag = 0 ;
1106                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1107                                 # Check our stack
1108                                 if ( $slash ) {
1109                                         # Closing a tag...
1110                                         if ( ! in_array( $t, $htmlsingle ) &&
1111                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1112                                                 array_push( $tagstack, $ot );
1113                                                 $badtag = 1;
1114                                         } else {
1115                                                 if ( $t == "table" ) {
1116                                                         $tagstack = array_pop( $tablestack );
1117                                                 }
1118                                                 $newparams = "";
1119                                         }
1120                                 } else {
1121                                         # Keep track for later
1122                                         if ( in_array( $t, $tabletags ) &&
1123                                           ! in_array( "table", $tagstack ) ) {
1124                                                 $badtag = 1;
1125                                         } else if ( in_array( $t, $tagstack ) &&
1126                                           ! in_array ( $t , $htmlnest ) ) {
1127                                                 $badtag = 1 ;
1128                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1129                                                 if ( $t == "table" ) {
1130                                                         array_push( $tablestack, $tagstack );
1131                                                         $tagstack = array();
1132                                                 }
1133                                                 array_push( $tagstack, $t );
1134                                         }
1135                                         # Strip non-approved attributes from the tag
1136                                         $newparams = $this->fixTagAttributes($params);
1137
1138                                 }
1139                                 if ( ! $badtag ) {
1140                                         $rest = str_replace( ">", "&gt;", $rest );
1141                                         $text .= "<$slash$t $newparams$brace$rest";
1142                                         continue;
1143                                 }
1144                         }
1145                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1146                 }
1147                 # Close off any remaining tags
1148                 while ( $t = array_pop( $tagstack ) ) {
1149                         $text .= "</$t>\n";
1150                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1151                 }
1152                 wfProfileOut( $fname );
1153                 return $text;
1154         }
1155
1156 /*
1157  *
1158  * This function accomplishes several tasks:
1159  * 1) Auto-number headings if that option is enabled
1160  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1161  * 3) Add a Table of contents on the top for users who have enabled the option
1162  * 4) Auto-anchor headings
1163  *
1164  * It loops through all headlines, collects the necessary data, then splits up the
1165  * string and re-inserts the newly formatted headlines.
1166  *
1167  * */
1168         /* private */ function formatHeadings( $text )
1169         {
1170                 $nh=$this->mOptions->getNumberHeadings();
1171                 $st=$this->mOptions->getShowToc();
1172                 if(!$this->mTitle->userCanEdit()) {
1173                         $es=0;
1174                         $esr=0;
1175                 } else {
1176                         $es=$this->mOptions->getEditSection();
1177                         $esr=$this->mOptions->getEditSectionOnRightClick();
1178                 }
1179
1180                 # Inhibit editsection links if requested in the page
1181                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1182                 if ($esw->matchAndRemove( $text )) {
1183                         $es=0;
1184                 }
1185                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1186                 # do not add TOC
1187                 $mw =& MagicWord::get( MAG_NOTOC );
1188                 if ($mw->matchAndRemove( $text ))
1189                 {
1190                         $st = 0;
1191                 }
1192
1193                 # never add the TOC to the Main Page. This is an entry page that should not
1194                 # be more than 1-2 screens large anyway
1195                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1196
1197                 # We need this to perform operations on the HTML
1198                 $sk =& $this->mOptions->getSkin();
1199
1200                 # Get all headlines for numbering them and adding funky stuff like [edit]
1201                 # links
1202                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1203
1204                 # headline counter
1205                 $c=0;
1206
1207                 # Ugh .. the TOC should have neat indentation levels which can be
1208                 # passed to the skin functions. These are determined here
1209                 foreach($matches[3] as $headline) {
1210                         if($level) { $prevlevel=$level;}
1211                         $level=$matches[1][$c];
1212                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1213
1214                                 $h[$level]=0; // reset when we enter a new level
1215                                 $toc.=$sk->tocIndent($level-$prevlevel);
1216                                 $toclevel+=$level-$prevlevel;
1217
1218                         }
1219                         if(($nh||$st) && $level<$prevlevel) {
1220                                 $h[$level+1]=0; // reset when we step back a level
1221                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1222                                 $toclevel-=$prevlevel-$level;
1223
1224                         }
1225                         $h[$level]++; // count number of headlines for each level
1226
1227                         if($nh||$st) {
1228                                 for($i=1;$i<=$level;$i++) {
1229                                         if($h[$i]) {
1230                                                 if($dot) {$numbering.=".";}
1231                                                 $numbering.=$h[$i];
1232                                                 $dot=1;
1233                                         }
1234                                 }
1235                         }
1236
1237                         // The canonized header is a version of the header text safe to use for links
1238                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1239                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1240                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1241                         $tocline = trim( $canonized_headline );
1242                         $canonized_headline=str_replace('"',"",$canonized_headline);
1243                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1244                         $refer[$c]=$canonized_headline;
1245                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1246                         $refcount[$c]=$refers[$canonized_headline];
1247
1248             // Prepend the number to the heading text
1249
1250                         if($nh||$st) {
1251                                 $tocline=$numbering ." ". $tocline;
1252
1253                                 // Don't number the heading if it is the only one (looks silly)
1254                                 if($nh && count($matches[3]) > 1) {
1255                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1256                                 }
1257                         }
1258
1259                         // Create the anchor for linking from the TOC to the section
1260
1261                         $anchor=$canonized_headline;
1262                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1263                         if($st) {
1264                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1265                         }
1266                         if($es) {
1267                                 $head[$c].=$sk->editSectionLink($c+1);
1268                         }
1269
1270                         // Put it all together
1271
1272                         $head[$c].="<h".$level.$matches[2][$c]
1273                          ."<a name=\"".$anchor."\">"
1274                          .$headline
1275                          ."</a>"
1276                          ."</h".$level.">";
1277
1278                         // Add the edit section link
1279
1280                         if($esr) {
1281                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1282                         }
1283
1284                         $numbering="";
1285                         $c++;
1286                         $dot=0;
1287                 }
1288
1289                 if($st) {
1290                         $toclines=$c;
1291                         $toc.=$sk->tocUnindent($toclevel);
1292                         $toc=$sk->tocTable($toc);
1293                 }
1294
1295                 // split up and insert constructed headlines
1296
1297                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1298                 $i=0;
1299
1300                 foreach($blocks as $block) {
1301                         if(($es) && $c>0 && $i==0) {
1302                             # This is the [edit] link that appears for the top block of text when
1303                                 # section editing is enabled
1304                                 $full.=$sk->editSectionLink(0);
1305                         }
1306                         $full.=$block;
1307                         if($st && $toclines>3 && !$i) {
1308                                 # Let's add a top anchor just in case we want to link to the top of the page
1309                                 $full="<a name=\"top\"></a>".$full.$toc;
1310                         }
1311
1312                         $full.=$head[$i];
1313                         $i++;
1314                 }
1315
1316                 return $full;
1317         }
1318
1319         /* private */ function doMagicISBN( &$tokenizer )
1320         {
1321                 global $wgLang;
1322
1323                 # Check whether next token is a text token
1324                 # If yes, fetch it and convert the text into a
1325                 # Special::BookSources link
1326                 $token = $tokenizer->previewToken();
1327                 while ( $token["type"] == "" )
1328                 {
1329                         $tokenizer->nextToken();
1330                         $token = $tokenizer->previewToken();
1331                 }
1332                 if ( $token["type"] == "text" )
1333                 {
1334                         $token = $tokenizer->nextToken();
1335                         $x = $token["text"];
1336                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1337
1338                         $isbn = $blank = "" ;
1339                         while ( " " == $x{0} ) {
1340                                 $blank .= " ";
1341                                 $x = substr( $x, 1 );
1342                         }
1343                         while ( strstr( $valid, $x{0} ) != false ) {
1344                                 $isbn .= $x{0};
1345                                 $x = substr( $x, 1 );
1346                         }
1347                         $num = str_replace( "-", "", $isbn );
1348                         $num = str_replace( " ", "", $num );
1349
1350                         if ( "" == $num ) {
1351                                 $text .= "ISBN $blank$x";
1352                         } else {
1353                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1354                                 $text .= "<a href=\"" .
1355                                 $titleObj->getUrl( "isbn={$num}", false, true ) .
1356                                         "\" class=\"internal\">ISBN $isbn</a>";
1357                                 $text .= $x;
1358                         }
1359                 } else {
1360                         $text = "ISBN ";
1361                 }
1362                 return $text;
1363         }
1364         /* private */ function doMagicRFC( &$tokenizer )
1365         {
1366                 global $wgLang;
1367
1368                 # Check whether next token is a text token
1369                 # If yes, fetch it and convert the text into a
1370                 # link to an RFC source
1371                 $token = $tokenizer->previewToken();
1372                 while ( $token["type"] == "" )
1373                 {
1374                         $tokenizer->nextToken();
1375                         $token = $tokenizer->previewToken();
1376                 }
1377                 if ( $token["type"] == "text" )
1378                 {
1379                         $token = $tokenizer->nextToken();
1380                         $x = $token["text"];
1381                         $valid = "0123456789";
1382
1383                         $rfc = $blank = "" ;
1384                         while ( " " == $x{0} ) {
1385                                 $blank .= " ";
1386                                 $x = substr( $x, 1 );
1387                         }
1388                         while ( strstr( $valid, $x{0} ) != false ) {
1389                                 $rfc .= $x{0};
1390                                 $x = substr( $x, 1 );
1391                         }
1392
1393                         if ( "" == $rfc ) {
1394                                 $text .= "RFC $blank$x";
1395                         } else {
1396                                 $url = wfmsg( "rfcurl" );
1397                                 $url = str_replace( "$1", $rfc, $url);
1398                                 $sk =& $this->mOptions->getSkin();
1399                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1400                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1401                         }
1402                 } else {
1403                         $text = "RFC ";
1404                 }
1405                 return $text;
1406         }
1407
1408         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1409         {
1410                 $this->mOptions = $options;
1411                 $this->mTitle = $title;
1412                 if ( $clearState ) {
1413                         $this->clearState;
1414                 }
1415
1416                 $stripState = false;
1417                 $text = $this->strip( $text, $stripState, false );
1418                 $text = $this->pstPass2( $text, $user );
1419                 $text = $this->unstrip( $text, $stripState );
1420                 return $text;
1421         }
1422
1423         /* private */ function pstPass2( $text, &$user )
1424         {
1425                 global $wgLang, $wgLocaltimezone;
1426
1427                 # Signatures
1428                 #
1429                 $n = $user->getName();
1430                 $k = $user->getOption( "nickname" );
1431                 if ( "" == $k ) { $k = $n; }
1432                 if(isset($wgLocaltimezone)) {
1433                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1434                 }
1435                 /* Note: this is an ugly timezone hack for the European wikis */
1436                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1437                   " (" . date( "T" ) . ")";
1438                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1439
1440                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1441                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1442                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1443                   Namespace::getUser() ) . ":$n|$k]]", $text );
1444
1445                 # Context links: [[|name]] and [[name (context)|]]
1446                 #
1447                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1448                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1449                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1450                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1451
1452                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1453                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1454                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1455                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1456                                                                                                                 # [[ns:page (cont)|]]
1457                 $context = "";
1458                 $t = $this->mTitle->getText();
1459                 if ( preg_match( $conpat, $t, $m ) ) {
1460                         $context = $m[2];
1461                 }
1462                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1463                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1464                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1465
1466                 if ( "" == $context ) {
1467                         $text = preg_replace( $p2, "[[\\1]]", $text );
1468                 } else {
1469                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1470                 }
1471
1472                 # {{SUBST:xxx}} variables
1473                 #
1474                 $mw =& MagicWord::get( MAG_SUBST );
1475                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1476
1477                 # Trim trailing whitespace
1478                 # MAG_END (__END__) tag allows for trailing
1479                 # whitespace to be deliberately included
1480                 $text = rtrim( $text );
1481                 $mw =& MagicWord::get( MAG_END );
1482                 $mw->matchAndRemove( $text );
1483
1484                 return $text;
1485         }
1486
1487
1488 }
1489
1490 class ParserOutput
1491 {
1492         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1493
1494         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1495                 $containsOldMagic = false )
1496         {
1497                 $this->mText = $text;
1498                 $this->mLanguageLinks = $languageLinks;
1499                 $this->mCategoryLinks = $categoryLinks;
1500                 $this->mContainsOldMagic = $containsOldMagic;
1501         }
1502
1503         function getText() { return $this->mText; }
1504         function getLanguageLinks() { return $this->mLanguageLinks; }
1505         function getCategoryLinks() { return $this->mCategoryLinks; }
1506         function containsOldMagic() { return $this->mContainsOldMagic; }
1507         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1508         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1509         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1510         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1511 }
1512
1513 class ParserOptions
1514 {
1515         # All variables are private
1516         var $mUseTeX;                    # Use texvc to expand <math> tags
1517         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1518         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1519         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1520         var $mAllowExternalImages;       # Allow external images inline
1521         var $mSkin;                      # Reference to the preferred skin
1522         var $mDateFormat;                # Date format index
1523         var $mEditSection;               # Create "edit section" links
1524         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1525         var $mPrintable;                 # Generate printable output
1526         var $mNumberHeadings;            # Automatically number headings
1527         var $mShowToc;                   # Show table of contents
1528
1529         function getUseTeX() { return $this->mUseTeX; }
1530         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1531         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1532         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1533         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1534         function getSkin() { return $this->mSkin; }
1535         function getDateFormat() { return $this->mDateFormat; }
1536         function getEditSection() { return $this->mEditSection; }
1537         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1538         function getPrintable() { return $this->mPrintable; }
1539         function getNumberHeadings() { return $this->mNumberHeadings; }
1540         function getShowToc() { return $this->mShowToc; }
1541
1542         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1543         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1544         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1545         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1546         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1547         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1548         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1549         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1550         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1551         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1552         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1553         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1554
1555         /* static */ function newFromUser( &$user )
1556         {
1557                 $popts = new ParserOptions;
1558                 $popts->initialiseFromUser( &$user );
1559                 return $popts;
1560         }
1561
1562         function initialiseFromUser( &$userInput )
1563         {
1564                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1565
1566                 if ( !$userInput ) {
1567                         $user = new User;
1568                 } else {
1569                         $user =& $userInput;
1570                 }
1571
1572                 $this->mUseTeX = $wgUseTeX;
1573                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1574                 $this->mUseDynamicDates = $wgUseDynamicDates;
1575                 $this->mInterwikiMagic = $wgInterwikiMagic;
1576                 $this->mAllowExternalImages = $wgAllowExternalImages;
1577                 $this->mSkin =& $user->getSkin();
1578                 $this->mDateFormat = $user->getOption( "date" );
1579                 $this->mEditSection = $user->getOption( "editsection" );
1580                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1581                 $this->mPrintable = false;
1582                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1583                 $this->mShowToc = $user->getOption( "showtoc" );
1584         }
1585
1586
1587 }
1588
1589 # Regex callbacks, used in OutputPage::replaceVariables
1590
1591 # Just get rid of the dangerous stuff
1592 # Necessary because replaceVariables is called after removeHTMLtags,
1593 # and message text can come from any user
1594 function wfReplaceMsgVar( $matches ) {
1595         global $wgCurOut, $wgLinkCache;
1596         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1597         $wgLinkCache->suspend();
1598         $text = $wgCurOut->replaceInternalLinks( $text );
1599         $wgLinkCache->resume();
1600         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1601         return $text;
1602 }
1603
1604 # Effective <nowiki></nowiki>
1605 # Not real <nowiki> because this is called after nowiki sections are processed
1606 function wfReplaceMsgnwVar( $matches ) {
1607         global $wgCurOut, $wgLinkCache;
1608         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1609         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1610         return $text;
1611 }
1612
1613
1614
1615 ?>