includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         if ( $doesexist ) {
 215                                 $t = $x->l_from ;
 216                         } else {
 217                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 218                                 if ( $t != "" ) $t .= ":" ;
 219                                 $t .= $x->cur_title ;
 220                         }
 221
 222                         $y = explode ( ":" , $t , 2 ) ;
 223                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 224                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 225                         } else {
 226                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 227                         }
 228                 }
 229                 wfFreeResult ( $res ) ;
 230
 231                 # Children
 232                 if ( count ( $children ) > 0 )
 233                 {
 234                         asort ( $children ) ;
 235                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 236                         $r .= implode ( ", " , $children ) ;
 237                 }
 238
 239                 # Articles
 240                 if ( count ( $articles ) > 0 )
 241                 {
 242                         asort ( $articles ) ;
 243                         $h =  wfMsg( "category_header", $ti[1] );
 244                         $r .= "<h2>{$h}</h2>\n" ;
 245                         $r .= implode ( ", " , $articles ) ;
 246                 }
 247
 248
 249                 return $r ;
 250         }
 251
 252         function getHTMLattrs ()
 253         {
 254                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 255                                 "title", "align", "lang", "dir", "width", "height",
 256                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 257                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 258                                 /* FONT */ "type", "start", "value", "compact",
 259                                 /* For various lists, mostly deprecated but safe */
 260                                 "summary", "width", "border", "frame", "rules",
 261                                 "cellspacing", "cellpadding", "valign", "char",
 262                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 263                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 264                                 "id", "class", "name", "style" /* For CSS */
 265                                 );
 266                 return $htmlattrs ;
 267         }
 268
 269         function fixTagAttributes ( $t )
 270         {
 271                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 272                 $htmlattrs = $this->getHTMLattrs() ;
 273
 274                 # Strip non-approved attributes from the tag
 275                 $t = preg_replace(
 276                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 277                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 278                         $t);
 279                 # Strip javascript "expression" from stylesheets. Brute force approach:
 280                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 281
 282                 if( preg_match(
 283                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 284                         wfMungeToUtf8( $t ) ) )
 285                 {
 286                         $t="";
 287                 }
 288
 289                 return trim ( $t ) ;
 290         }
 291
 292         function doTableStuff ( $t )
 293         {
 294                 $t = explode ( "\n" , $t ) ;
 295                 $td = array () ; # Is currently a td tag open?
 296                         $ltd = array () ; # Was it TD or TH?
 297                         $tr = array () ; # Is currently a tr tag open?
 298                         $ltr = array () ; # tr attributes
 299                         foreach ( $t AS $k => $x )
 300                         {
 301                                 $x = rtrim ( $x ) ;
 302                                 $fc = substr ( $x , 0 , 1 ) ;
 303                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 304                                 {
 305                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 306                                         array_push ( $td , false ) ;
 307                                         array_push ( $ltd , "" ) ;
 308                                         array_push ( $tr , false ) ;
 309                                         array_push ( $ltr , "" ) ;
 310                                 }
 311                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 312                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 313                                 {
 314                                         $z = "</table>\n" ;
 315                                         $l = array_pop ( $ltd ) ;
 316                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 317                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 318                                         array_pop ( $ltr ) ;
 319                                         $t[$k] = $z ;
 320                                 }
 321                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 322                                                 {
 323                                                 $z = trim ( substr ( $x , 2 ) ) ;
 324                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 325                                                 }*/
 326                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 327                                 {
 328                                         $x = substr ( $x , 1 ) ;
 329                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 330                                         $z = "" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                         array_push ( $tr , false ) ;
 337                                         array_push ( $td , false ) ;
 338                                         array_push ( $ltd , "" ) ;
 339                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 340                                 }
 341                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 342                                 {
 343                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 344                                         {
 345                                                 $fc = "+" ;
 346                                                 $x = substr ( $x , 1 ) ;
 347                                         }
 348                                         $after = substr ( $x , 1 ) ;
 349                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 350                                         $after = explode ( "||" , $after ) ;
 351                                         $t[$k] = "" ;
 352                                         foreach ( $after AS $theline )
 353                                         {
 354                                                 $z = "" ;
 355                                                 if ( $fc != "+" )
 356                                                 {
 357                                                         $tra = array_pop ( $ltr ) ;
 358                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 359                                                         array_push ( $tr , true ) ;
 360                                                         array_push ( $ltr , "" ) ;
 361                                                 }
 362
 363                                                 $l = array_pop ( $ltd ) ;
 364                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 365                                                 if ( $fc == "|" ) $l = "TD" ;
 366                                                 else if ( $fc == "!" ) $l = "TH" ;
 367                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 368                                                 else $l = "" ;
 369                                                 array_push ( $ltd , $l ) ;
 370                                                 $y = explode ( "|" , $theline , 2 ) ;
 371                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 372                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 373                                                 $t[$k] .= $y ;
 374                                                 array_push ( $td , true ) ;
 375                                         }
 376                                 }
 377                         }
 378
 379                 # Closing open td, tr && table
 380                 while ( count ( $td ) > 0 )
 381                 {
 382                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 383                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 384                         $t[] = "</table>" ;
 385                 }
 386
 387                 $t = implode ( "\n" , $t ) ;
 388                 #               $t = $this->removeHTMLtags( $t );
 389                 return $t ;
 390         }
 391
 392         # Well, OK, it's actually about 14 passes.  But since all the
 393         # hard lifting is done inside PHP's regex code, it probably
 394         # wouldn't speed things up much to add a real parser.
 395         #
 396         function doWikiPass2( $text, $linestart )
 397         {
 398                 $fname = "OutputPage::doWikiPass2";
 399                 wfProfileIn( $fname );
 400
 401                 $text = $this->removeHTMLtags( $text );
 402                 $text = $this->replaceVariables( $text );
 403
 404                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 405                 $text = str_replace ( "<HR>", "<hr>", $text );
 406
 407                 $text = $this->doHeadings( $text );
 408                 $text = $this->doBlockLevels( $text, $linestart );
 409
 410                 if($this->mOptions->getUseDynamicDates()) {
 411                         global $wgDateFormatter;
 412                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 413                 }
 414
 415                 $text = $this->replaceExternalLinks( $text );
 416                 $text = $this->replaceInternalLinks ( $text );
 417                 $text = $this->doTableStuff ( $text ) ;
 418
 419                 $text = $this->formatHeadings( $text );
 420
 421                 $sk =& $this->mOptions->getSkin();
 422                 $text = $sk->transformContent( $text );
 423                 $text .= $this->categoryMagic () ;
 424
 425                 wfProfileOut( $fname );
 426                 return $text;
 427         }
 428
 429
 430         /* private */ function doHeadings( $text )
 431         {
 432                 for ( $i = 6; $i >= 1; --$i ) {
 433                         $h = substr( "======", 0, $i );
 434                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 435                           "<h{$i}>\\1</h{$i}>\\2", $text );
 436                 }
 437                 return $text;
 438         }
 439
 440         # Note: we have to do external links before the internal ones,
 441         # and otherwise take great care in the order of things here, so
 442         # that we don't end up interpreting some URLs twice.
 443
 444         /* private */ function replaceExternalLinks( $text )
 445         {
 446                 $fname = "OutputPage::replaceExternalLinks";
 447                 wfProfileIn( $fname );
 448                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 449                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 450                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 451                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 452                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 453                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 454                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 455                 wfProfileOut( $fname );
 456                 return $text;
 457         }
 458
 459         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 460         {
 461                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 462                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 463
 464                 # this is  the list of separators that should be ignored if they
 465                 # are the last character of an URL but that should be included
 466                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 467                 # in this case, the last comma should not become part of the URL,
 468                 # but in "www.foo.com/123,2342,32.htm" it should.
 469                 $sep = ",;\.:";
 470                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 471                 $images = "gif|png|jpg|jpeg";
 472
 473                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 474                 # they are interpreted as part of the string (used to tell PHP
 475                 # that the content of the string should be inserted there).
 476                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 477                   "((?i){$images})([^{$uc}]|$)/";
 478
 479                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 480                 $sk =& $this->mOptions->getSkin();
 481
 482                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 483                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 484                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 485                 }
 486                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 487                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 488                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 489                   "</a>\\5", $s );
 490                 $s = str_replace( $unique, $protocol, $s );
 491
 492                 $a = explode( "[{$protocol}:", " " . $s );
 493                 $s = array_shift( $a );
 494                 $s = substr( $s, 1 );
 495
 496                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 497                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 498
 499                 foreach ( $a as $line ) {
 500                         if ( preg_match( $e1, $line, $m ) ) {
 501                                 $link = "{$protocol}:{$m[1]}";
 502                                 $trail = $m[2];
 503                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 504                                 else { $text = wfEscapeHTML( $link ); }
 505                         } else if ( preg_match( $e2, $line, $m ) ) {
 506                                 $link = "{$protocol}:{$m[1]}";
 507                                 $text = $m[2];
 508                                 $trail = $m[3];
 509                         } else {
 510                                 $s .= "[{$protocol}:" . $line;
 511                                 continue;
 512                         }
 513                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 514                         else $paren = "";
 515                         $la = $sk->getExternalLinkAttributes( $link, $text );
 516                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 517
 518                 }
 519                 return $s;
 520         }
 521
 522         /* private */ function handle3Quotes( &$state, $token )
 523         {
 524                 if ( $state["strong"] ) {
 525                         if ( $state["em"] && $state["em"] > $state["strong"] )
 526                         {
 527                                 # ''' lala ''lala '''
 528                                 $s = "</em></strong><em>";
 529                         } else {
 530                                 $s = "</strong>";
 531                         }
 532                         $state["strong"] = FALSE;
 533                 } else {
 534                         $s = "<strong>";
 535                         $state["strong"] = $token["pos"];
 536                 }
 537                 return $s;
 538         }
 539
 540         /* private */ function handle2Quotes( &$state, $token )
 541         {
 542                 if ( $state["em"] ) {
 543                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 544                         {
 545                                 # ''lala'''lala'' ....'''
 546                                 $s = "</strong></em><strong>";
 547                         } else {
 548                                 $s = "</em>";
 549                         }
 550                         $state["em"] = FALSE;
 551                 } else {
 552                         $s = "<em>";
 553                         $state["em"] = $token["pos"];
 554                 }
 555                 return $s;
 556         }
 557
 558         /* private */ function handle5Quotes( &$state, $token )
 559         {
 560                 if ( $state["em"] && $state["strong"] ) {
 561                         if ( $state["em"] < $state["strong"] ) {
 562                                 $s .= "</strong></em>";
 563                         } else {
 564                                 $s .= "</em></strong>";
 565                         }
 566                         $state["strong"] = $state["em"] = FALSE;
 567                 } elseif ( $state["em"] ) {
 568                         $s .= "</em><strong>";
 569                         $state["em"] = FALSE;
 570                         $state["strong"] = $token["pos"];
 571                 } elseif ( $state["strong"] ) {
 572                         $s .= "</strong><em>";
 573                         $state["strong"] = FALSE;
 574                         $state["em"] = $token["pos"];
 575                 } else { # not $em and not $strong
 576                         $s .= "<strong><em>";
 577                         $state["strong"] = $state["em"] = $token["pos"];
 578                 }
 579                 return $s;
 580         }
 581
 582         /* private */ function replaceInternalLinks( $str )
 583         {
 584                 global $wgLang; # for language specific parser hook
 585
 586                 $tokenizer=Tokenizer::newFromString( $str );
 587                 $tokenStack = array();
 588
 589                 $s="";
 590                 $state["em"]      = FALSE;
 591                 $state["strong"]  = FALSE;
 592                 $tagIsOpen = FALSE;
 593
 594                 # The tokenizer splits the text into tokens and returns them one by one.
 595                 # Every call to the tokenizer returns a new token.
 596                 while ( $token = $tokenizer->nextToken() )
 597                 {
 598                         switch ( $token["type"] )
 599                         {
 600                                 case "text":
 601                                         # simple text with no further markup
 602                                         $txt = $token["text"];
 603                                         break;
 604                                 case "[[":
 605                                         # link opening tag.
 606                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 607                                         $tagIsOpen = TRUE;
 608                                         array_push( $tokenStack, $token );
 609                                         $txt="";
 610                                         break;
 611                                 case "]]":
 612                                         # link close tag.
 613                                         # get text from stack, glue it together, and call the code to handle a
 614                                         # link
 615                                         if ( count( $tokenStack ) == 0 )
 616                                         {
 617                                                 # stack empty. Found a ]] without an opening [[
 618                                                 $txt = "]]";
 619                                         } else {
 620                                                 $linkText = "";
 621                                                 $lastToken = array_pop( $tokenStack );
 622                                                 while ( $lastToken["type"] != "[[" )
 623                                                 {
 624                                                         $linkText = $lastToken["text"] . $linkText;
 625                                                         $lastToken = array_pop( $tokenStack );
 626                                                 }
 627                                                 $txt = $linkText ."]]";
 628                                                 $prefix = $lastToken["text"];
 629                                                 $nextToken = $tokenizer->previewToken();
 630                                                 if ( $nextToken["type"] == "text" )
 631                                                 {
 632                                                         # Preview just looks at it. Now we have to fetch it.
 633                                                         $nextToken = $tokenizer->nextToken();
 634                                                         $txt .= $nextToken["text"];
 635                                                 }
 636                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 637                                         }
 638                                         $tagIsOpen = (count( $tokenStack ) != 0);
 639                                         break;
 640                                 case "----":
 641                                         $txt = "\n<hr>\n";
 642                                         break;
 643                                 case "'''":
 644                                         # This and the three next ones handle quotes
 645                                         $txt = $this->handle3Quotes( $state, $token );
 646                                         break;
 647                                 case "''":
 648                                         $txt = $this->handle2Quotes( $state, $token );
 649                                         break;
 650                                 case "'''''":
 651                                         $txt = $this->handle5Quotes( $state, $token );
 652                                         break;
 653                                 case "":
 654                                         # empty token
 655                                         $txt="";
 656                                         break;
 657                                 case "RFC ":
 658                                         $txt = $this->doMagicRFC( $tokenizer );
 659                                         break;
 660                                 case "ISBN ":
 661                                         $txt = $this->doMagicISBN( $tokenizer );
 662                                         break;
 663                                 default:
 664                                         # Call language specific Hook.
 665                                         $txt = $wgLang->processToken( $token, $tokenStack );
 666                                         if ( NULL == $txt ) {
 667                                                 # An unkown token. Highlight.
 668                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 669                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 670                                         }
 671                                         break;
 672                         }
 673                         # If we're parsing the interior of a link, don't append the interior to $s,
 674                         # but push it to the stack so it can be processed when a ]] token is found.
 675                         if ( $tagIsOpen  && $txt != "" ) {
 676                                 $token["type"] = "text";
 677                                 $token["text"] = $txt;
 678                                 array_push( $tokenStack, $token );
 679                         } else {
 680                                 $s .= $txt;
 681                         }
 682                 } #end while
 683                 if ( count( $tokenStack ) != 0 )
 684                 {
 685                         # still objects on stack. opened [[ tag without closing ]] tag.
 686                         $txt = "";
 687                         while ( $lastToken = array_pop( $tokenStack ) )
 688                         {
 689                                 if ( $lastToken["type"] == "text" )
 690                                 {
 691                                         $txt = $lastToken["text"] . $txt;
 692                                 } else {
 693                                         $txt = $lastToken["type"] . $txt;
 694                                 }
 695                         }
 696                         $s .= $txt;
 697                 }
 698                 return $s;
 699         }
 700
 701         /* private */ function handleInternalLink( $line, $prefix )
 702         {
 703                 global $wgLang, $wgLinkCache;
 704                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 705                 static $fname = "OutputPage::replaceInternalLinks" ;
 706                 wfProfileIn( $fname );
 707
 708                 wfProfileIn( "$fname-setup" );
 709                 static $tc = FALSE;
 710                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 711                 $sk =& $this->mOptions->getSkin();
 712
 713                 # Match a link having the form [[namespace:link|alternate]]trail
 714                 static $e1 = FALSE;
 715                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 716                 # Match the end of a line for a word that's not followed by whitespace,
 717                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 718                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 719                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 720                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 721
 722
 723                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 724                 static $image = FALSE;
 725                 static $special = FALSE;
 726                 static $media = FALSE;
 727                 static $category = FALSE;
 728                 if ( !$image ) { $image = Namespace::getImage(); }
 729                 if ( !$special ) { $special = Namespace::getSpecial(); }
 730                 if ( !$media ) { $media = Namespace::getMedia(); }
 731                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 732
 733                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 734
 735                 wfProfileOut( "$fname-setup" );
 736
 737                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 738                         $text = $m[2];
 739                         $trail = $m[3];
 740                 } else { # Invalid form; output directly
 741                         $s .= $prefix . "[[" . $line ;
 742                         return $s;
 743                 }
 744
 745                 /* Valid link forms:
 746                 Foobar -- normal
 747                 :Foobar -- override special treatment of prefix (images, language links)
 748                 /Foobar -- convert to CurrentPage/Foobar
 749                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 750                 */
 751                 $c = substr($m[1],0,1);
 752                 $noforce = ($c != ":");
 753                 if( $c == "/" ) { # subpage
 754                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 755                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 756                                 $noslash=$m[1];
 757                         } else {
 758                                 $noslash=substr($m[1],1);
 759                         }
 760                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 761                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 762                                 if( "" == $text ) {
 763                                         $text= $m[1];
 764                                 } # this might be changed for ugliness reasons
 765                         } else {
 766                                 $link = $noslash; # no subpage allowed, use standard link
 767                         }
 768                 } elseif( $noforce ) { # no subpage
 769                         $link = $m[1];
 770                 } else {
 771                         $link = substr( $m[1], 1 );
 772                 }
 773                 if( "" == $text )
 774                         $text = $link;
 775
 776                 $nt = Title::newFromText( $link );
 777                 if( !$nt ) {
 778                         $s .= $prefix . "[[" . $line;
 779                         return $s;
 780                 }
 781                 $ns = $nt->getNamespace();
 782                 $iw = $nt->getInterWiki();
 783                 if( $noforce ) {
 784                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 785                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 786                                 $s .= $prefix . $trail;
 787                                 return $s;
 788                         }
 789                         if( $ns == $image ) {
 790                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 791                                 $wgLinkCache->addImageLinkObj( $nt );
 792                                 return $s;
 793                         }
 794                 }
 795                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 796                     ( strpos( $link, "#" ) == FALSE ) ) {
 797                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 798                         return $s;
 799                 }
 800                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 801                         $t = explode ( ":" , $nt->getText() ) ;
 802                         array_shift ( $t ) ;
 803                         $t = implode ( ":" , $t ) ;
 804                         $t = $wgLang->ucFirst ( $t ) ;
 805 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 806                         $nnt = Title::newFromText ( $category.":".$t ) ;
 807                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 808                         $this->mCategoryLinks[] = $t ;
 809                         $s .= $prefix . $trail ;
 810                         return $s ;
 811                 }
 812                 if( $ns == $media ) {
 813                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 814                         $wgLinkCache->addImageLinkObj( $nt );
 815                         return $s;
 816                 } elseif( $ns == $special ) {
 817                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 818                         return $s;
 819                 }
 820                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 821
 822                 wfProfileOut( $fname );
 823                 return $s;
 824         }
 825
 826         # Some functions here used by doBlockLevels()
 827         #
 828         /* private */ function closeParagraph()
 829         {
 830                 $result = "";
 831                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 832                   0 != strcmp( "", $this->mLastSection ) ) {
 833                         $result = "</" . $this->mLastSection  . ">";
 834                 }
 835                 $this->mLastSection = "";
 836                 return $result."\n";
 837         }
 838         # getCommon() returns the length of the longest common substring
 839         # of both arguments, starting at the beginning of both.
 840         #
 841         /* private */ function getCommon( $st1, $st2 )
 842         {
 843                 $fl = strlen( $st1 );
 844                 $shorter = strlen( $st2 );
 845                 if ( $fl < $shorter ) { $shorter = $fl; }
 846
 847                 for ( $i = 0; $i < $shorter; ++$i ) {
 848                         if ( $st1{$i} != $st2{$i} ) { break; }
 849                 }
 850                 return $i;
 851         }
 852         # These next three functions open, continue, and close the list
 853         # element appropriate to the prefix character passed into them.
 854         #
 855         /* private */ function openList( $char )
 856     {
 857                 $result = $this->closeParagraph();
 858
 859                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 860                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 861                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 862                 else if ( ";" == $char ) {
 863                         $result .= "<dl><dt>";
 864                         $this->mDTopen = true;
 865                 }
 866                 else { $result = "<!-- ERR 1 -->"; }
 867
 868                 return $result;
 869         }
 870
 871         /* private */ function nextItem( $char )
 872         {
 873                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 874                 else if ( ":" == $char || ";" == $char ) {
 875                         $close = "</dd>";
 876                         if ( $this->mDTopen ) { $close = "</dt>"; }
 877                         if ( ";" == $char ) {
 878                                 $this->mDTopen = true;
 879                                 return $close . "<dt>";
 880                         } else {
 881                                 $this->mDTopen = false;
 882                                 return $close . "<dd>";
 883                         }
 884                 }
 885                 return "<!-- ERR 2 -->";
 886         }
 887
 888         /* private */function closeList( $char )
 889         {
 890                 if ( "*" == $char ) { $text = "</li></ul>"; }
 891                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 892                 else if ( ":" == $char ) {
 893                         if ( $this->mDTopen ) {
 894                                 $this->mDTopen = false;
 895                                 $text = "</dt></dl>";
 896                         } else {
 897                                 $text = "</dd></dl>";
 898                         }
 899                 }
 900                 else {  return "<!-- ERR 3 -->"; }
 901                 return $text."\n";
 902         }
 903
 904         /* private */ function doBlockLevels( $text, $linestart )
 905         {
 906                 $fname = "OutputPage::doBlockLevels";
 907                 wfProfileIn( $fname );
 908                 # Parsing through the text line by line.  The main thing
 909                 # happening here is handling of block-level elements p, pre,
 910                 # and making lists from lines starting with * # : etc.
 911                 #
 912                 $a = explode( "\n", $text );
 913                 $text = $lastPref = "";
 914                 $this->mDTopen = $inBlockElem = false;
 915
 916                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 917                 foreach ( $a as $t ) {
 918                         if ( "" != $text ) { $text .= "\n"; }
 919
 920                         $oLine = $t;
 921                         $opl = strlen( $lastPref );
 922                         $npl = strspn( $t, "*#:;" );
 923                         $pref = substr( $t, 0, $npl );
 924                         $pref2 = str_replace( ";", ":", $pref );
 925                         $t = substr( $t, $npl );
 926
 927                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 928                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 929
 930                                 if ( ";" == substr( $pref, -1 ) ) {
 931                                         $cpos = strpos( $t, ":" );
 932                                         if ( ! ( false === $cpos ) ) {
 933                                                 $term = substr( $t, 0, $cpos );
 934                                                 $text .= $term . $this->nextItem( ":" );
 935                                                 $t = substr( $t, $cpos + 1 );
 936                                         }
 937                                 }
 938                         } else if (0 != $npl || 0 != $opl) {
 939                                 $cpl = $this->getCommon( $pref, $lastPref );
 940
 941                                 while ( $cpl < $opl ) {
 942                                         $text .= $this->closeList( $lastPref{$opl-1} );
 943                                         --$opl;
 944                                 }
 945                                 if ( $npl <= $cpl && $cpl > 0 ) {
 946                                         $text .= $this->nextItem( $pref{$cpl-1} );
 947                                 }
 948                                 while ( $npl > $cpl ) {
 949                                         $char = substr( $pref, $cpl, 1 );
 950                                         $text .= $this->openList( $char );
 951
 952                                         if ( ";" == $char ) {
 953                                                 $cpos = strpos( $t, ":" );
 954                                                 if ( ! ( false === $cpos ) ) {
 955                                                         $term = substr( $t, 0, $cpos );
 956                                                         $text .= $term . $this->nextItem( ":" );
 957                                                         $t = substr( $t, $cpos + 1 );
 958                                                 }
 959                                         }
 960                                         ++$cpl;
 961                                 }
 962                                 $lastPref = $pref2;
 963                         }
 964                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 965                                 if ( preg_match(
 966                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 967                                         $text .= $this->closeParagraph();
 968                                         $inBlockElem = true;
 969                                 }
 970                                 if ( ! $inBlockElem ) {
 971                                         if ( " " == $t{0} ) {
 972                                                 $newSection = "pre";
 973                                                 # $t = wfEscapeHTML( $t );
 974                                         }
 975                                         else { $newSection = "p"; }
 976
 977                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 978                                                 $text .= $this->closeParagraph();
 979                                                 $text .= "<" . $newSection . ">";
 980                                         } else if ( 0 != strcmp( $this->mLastSection,
 981                                           $newSection ) ) {
 982                                                 $text .= $this->closeParagraph();
 983                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 984                                                         $text .= "<" . $newSection . ">";
 985                                                 }
 986                                         }
 987                                         $this->mLastSection = $newSection;
 988                                 }
 989                                 if ( $inBlockElem &&
 990                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 991                                         $inBlockElem = false;
 992                                 }
 993                         }
 994                         $text .= $t;
 995                 }
 996                 while ( $npl ) {
 997                         $text .= $this->closeList( $pref2{$npl-1} );
 998                         --$npl;
 999                 }
1000                 if ( "" != $this->mLastSection ) {
1001                         if ( "p" != $this->mLastSection ) {
1002                                 $text .= "</" . $this->mLastSection . ">";
1003                         }
1004                         $this->mLastSection = "";
1005                 }
1006                 wfProfileOut( $fname );
1007                 return $text;
1008         }
1009
1010         /* private */ function replaceVariables( $text )
1011         {
1012                 global $wgLang, $wgCurOut;
1013                 $fname = "OutputPage::replaceVariables";
1014                 wfProfileIn( $fname );
1015
1016                 $magic = array();
1017
1018                 # Basic variables
1019                 # See Language.php for the definition of each magic word
1020                 # As with sigs, this uses the server's local time -- ensure
1021                 # this is appropriate for your audience!
1022
1023                 $magic[MAG_CURRENTMONTH] = date( "m" );
1024                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1025                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1026                 $magic[MAG_CURRENTDAY] = date("j");
1027                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1028                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1029                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1030
1031                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1032
1033                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1034                 if ( $mw->match( $text ) ) {
1035                         $v = wfNumberOfArticles();
1036                         $text = $mw->replace( $v, $text );
1037                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1038                 }
1039
1040                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1041                 # The callbacks are at the bottom of this file
1042                 $wgCurOut = $this;
1043                 $mw =& MagicWord::get( MAG_MSG );
1044                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1045                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1046
1047                 $mw =& MagicWord::get( MAG_MSGNW );
1048                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1049                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1050
1051                 wfProfileOut( $fname );
1052                 return $text;
1053         }
1054
1055         # Cleans up HTML, removes dangerous tags and attributes
1056         /* private */ function removeHTMLtags( $text )
1057         {
1058                 $fname = "OutputPage::removeHTMLtags";
1059                 wfProfileIn( $fname );
1060                 $htmlpairs = array( # Tags that must be closed
1061                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1062                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1063                         "strike", "strong", "tt", "var", "div", "center",
1064                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1065                         "ruby", "rt" , "rb" , "rp"
1066                 );
1067                 $htmlsingle = array(
1068                         "br", "p", "hr", "li", "dt", "dd"
1069                 );
1070                 $htmlnest = array( # Tags that can be nested--??
1071                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1072                         "dl", "font", "big", "small", "sub", "sup"
1073                 );
1074                 $tabletags = array( # Can only appear inside table
1075                         "td", "th", "tr"
1076                 );
1077
1078                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1079                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1080
1081                 $htmlattrs = $this->getHTMLattrs () ;
1082
1083                 # Remove HTML comments
1084                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1085
1086                 $bits = explode( "<", $text );
1087                 $text = array_shift( $bits );
1088                 $tagstack = array(); $tablestack = array();
1089
1090                 foreach ( $bits as $x ) {
1091                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1092                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1093                           $x, $regs );
1094                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1095                         error_reporting( $prev );
1096
1097                         $badtag = 0 ;
1098                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1099                                 # Check our stack
1100                                 if ( $slash ) {
1101                                         # Closing a tag...
1102                                         if ( ! in_array( $t, $htmlsingle ) &&
1103                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1104                                                 array_push( $tagstack, $ot );
1105                                                 $badtag = 1;
1106                                         } else {
1107                                                 if ( $t == "table" ) {
1108                                                         $tagstack = array_pop( $tablestack );
1109                                                 }
1110                                                 $newparams = "";
1111                                         }
1112                                 } else {
1113                                         # Keep track for later
1114                                         if ( in_array( $t, $tabletags ) &&
1115                                           ! in_array( "table", $tagstack ) ) {
1116                                                 $badtag = 1;
1117                                         } else if ( in_array( $t, $tagstack ) &&
1118                                           ! in_array ( $t , $htmlnest ) ) {
1119                                                 $badtag = 1 ;
1120                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1121                                                 if ( $t == "table" ) {
1122                                                         array_push( $tablestack, $tagstack );
1123                                                         $tagstack = array();
1124                                                 }
1125                                                 array_push( $tagstack, $t );
1126                                         }
1127                                         # Strip non-approved attributes from the tag
1128                                         $newparams = $this->fixTagAttributes($params);
1129
1130                                 }
1131                                 if ( ! $badtag ) {
1132                                         $rest = str_replace( ">", "&gt;", $rest );
1133                                         $text .= "<$slash$t $newparams$brace$rest";
1134                                         continue;
1135                                 }
1136                         }
1137                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1138                 }
1139                 # Close off any remaining tags
1140                 while ( $t = array_pop( $tagstack ) ) {
1141                         $text .= "</$t>\n";
1142                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1143                 }
1144                 wfProfileOut( $fname );
1145                 return $text;
1146         }
1147
1148 /*
1149  *
1150  * This function accomplishes several tasks:
1151  * 1) Auto-number headings if that option is enabled
1152  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1153  * 3) Add a Table of contents on the top for users who have enabled the option
1154  * 4) Auto-anchor headings
1155  *
1156  * It loops through all headlines, collects the necessary data, then splits up the
1157  * string and re-inserts the newly formatted headlines.
1158  *
1159  * */
1160         /* private */ function formatHeadings( $text )
1161         {
1162                 $nh=$this->mOptions->getNumberHeadings();
1163                 $st=$this->mOptions->getShowToc();
1164                 if(!$this->mTitle->userCanEdit()) {
1165                         $es=0;
1166                         $esr=0;
1167                 } else {
1168                         $es=$this->mOptions->getEditSection();
1169                         $esr=$this->mOptions->getEditSectionOnRightClick();
1170                 }
1171
1172                 # Inhibit editsection links if requested in the page
1173                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1174                 if ($esw->matchAndRemove( $text )) {
1175                         $es=0;
1176                 }
1177                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1178                 # do not add TOC
1179                 $mw =& MagicWord::get( MAG_NOTOC );
1180                 if ($mw->matchAndRemove( $text ))
1181                 {
1182                         $st = 0;
1183                 }
1184
1185                 # never add the TOC to the Main Page. This is an entry page that should not
1186                 # be more than 1-2 screens large anyway
1187                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1188
1189                 # We need this to perform operations on the HTML
1190                 $sk =& $this->mOptions->getSkin();
1191
1192                 # Get all headlines for numbering them and adding funky stuff like [edit]
1193                 # links
1194                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1195
1196                 # headline counter
1197                 $c=0;
1198
1199                 # Ugh .. the TOC should have neat indentation levels which can be
1200                 # passed to the skin functions. These are determined here
1201                 foreach($matches[3] as $headline) {
1202                         if($level) { $prevlevel=$level;}
1203                         $level=$matches[1][$c];
1204                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1205
1206                                 $h[$level]=0; // reset when we enter a new level
1207                                 $toc.=$sk->tocIndent($level-$prevlevel);
1208                                 $toclevel+=$level-$prevlevel;
1209
1210                         }
1211                         if(($nh||$st) && $level<$prevlevel) {
1212                                 $h[$level+1]=0; // reset when we step back a level
1213                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1214                                 $toclevel-=$prevlevel-$level;
1215
1216                         }
1217                         $h[$level]++; // count number of headlines for each level
1218
1219                         if($nh||$st) {
1220                                 for($i=1;$i<=$level;$i++) {
1221                                         if($h[$i]) {
1222                                                 if($dot) {$numbering.=".";}
1223                                                 $numbering.=$h[$i];
1224                                                 $dot=1;
1225                                         }
1226                                 }
1227                         }
1228
1229                         // The canonized header is a version of the header text safe to use for links
1230                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1231                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1232                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1233                         $tocline = trim( $canonized_headline );
1234                         $canonized_headline=str_replace('"',"",$canonized_headline);
1235                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1236                         $refer[$c]=$canonized_headline;
1237                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1238                         $refcount[$c]=$refers[$canonized_headline];
1239
1240             // Prepend the number to the heading text
1241
1242                         if($nh||$st) {
1243                                 $tocline=$numbering ." ". $tocline;
1244
1245                                 // Don't number the heading if it is the only one (looks silly)
1246                                 if($nh && count($matches[3]) > 1) {
1247                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1248                                 }
1249                         }
1250
1251                         // Create the anchor for linking from the TOC to the section
1252
1253                         $anchor=$canonized_headline;
1254                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1255                         if($st) {
1256                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1257                         }
1258                         if($es) {
1259                                 $head[$c].=$sk->editSectionLink($c+1);
1260                         }
1261
1262                         // Put it all together
1263
1264                         $head[$c].="<h".$level.$matches[2][$c]
1265                          ."<a name=\"".$anchor."\">"
1266                          .$headline
1267                          ."</a>"
1268                          ."</h".$level.">";
1269
1270                         // Add the edit section link
1271
1272                         if($esr) {
1273                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1274                         }
1275
1276                         $numbering="";
1277                         $c++;
1278                         $dot=0;
1279                 }
1280
1281                 if($st) {
1282                         $toclines=$c;
1283                         $toc.=$sk->tocUnindent($toclevel);
1284                         $toc=$sk->tocTable($toc);
1285                 }
1286
1287                 // split up and insert constructed headlines
1288
1289                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1290                 $i=0;
1291
1292                 foreach($blocks as $block) {
1293                         if(($es) && $c>0 && $i==0) {
1294                             # This is the [edit] link that appears for the top block of text when
1295                                 # section editing is enabled
1296                                 $full.=$sk->editSectionLink(0);
1297                         }
1298                         $full.=$block;
1299                         if($st && $toclines>3 && !$i) {
1300                                 # Let's add a top anchor just in case we want to link to the top of the page
1301                                 $full="<a name=\"top\"></a>".$full.$toc;
1302                         }
1303
1304                         $full.=$head[$i];
1305                         $i++;
1306                 }
1307
1308                 return $full;
1309         }
1310
1311         /* private */ function doMagicISBN( &$tokenizer )
1312         {
1313                 global $wgLang;
1314
1315                 # Check whether next token is a text token
1316                 # If yes, fetch it and convert the text into a
1317                 # Special::BookSources link
1318                 $token = $tokenizer->previewToken();
1319                 while ( $token["type"] == "" )
1320                 {
1321                         $tokenizer->nextToken();
1322                         $token = $tokenizer->previewToken();
1323                 }
1324                 if ( $token["type"] == "text" )
1325                 {
1326                         $token = $tokenizer->nextToken();
1327                         $x = $token["text"];
1328                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1329
1330                         $isbn = $blank = "" ;
1331                         while ( " " == $x{0} ) {
1332                                 $blank .= " ";
1333                                 $x = substr( $x, 1 );
1334                         }
1335                         while ( strstr( $valid, $x{0} ) != false ) {
1336                                 $isbn .= $x{0};
1337                                 $x = substr( $x, 1 );
1338                         }
1339                         $num = str_replace( "-", "", $isbn );
1340                         $num = str_replace( " ", "", $num );
1341
1342                         if ( "" == $num ) {
1343                                 $text .= "ISBN $blank$x";
1344                         } else {
1345                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1346                                 $text .= "<a href=\"" .
1347                                 $titleObj->getUrl( "isbn={$num}", false, true ) .
1348                                         "\" class=\"internal\">ISBN $isbn</a>";
1349                                 $text .= $x;
1350                         }
1351                 } else {
1352                         $text = "ISBN ";
1353                 }
1354                 return $text;
1355         }
1356         /* private */ function doMagicRFC( &$tokenizer )
1357         {
1358                 global $wgLang;
1359
1360                 # Check whether next token is a text token
1361                 # If yes, fetch it and convert the text into a
1362                 # link to an RFC source
1363                 $token = $tokenizer->previewToken();
1364                 while ( $token["type"] == "" )
1365                 {
1366                         $tokenizer->nextToken();
1367                         $token = $tokenizer->previewToken();
1368                 }
1369                 if ( $token["type"] == "text" )
1370                 {
1371                         $token = $tokenizer->nextToken();
1372                         $x = $token["text"];
1373                         $valid = "0123456789";
1374
1375                         $rfc = $blank = "" ;
1376                         while ( " " == $x{0} ) {
1377                                 $blank .= " ";
1378                                 $x = substr( $x, 1 );
1379                         }
1380                         while ( strstr( $valid, $x{0} ) != false ) {
1381                                 $rfc .= $x{0};
1382                                 $x = substr( $x, 1 );
1383                         }
1384
1385                         if ( "" == $rfc ) {
1386                                 $text .= "RFC $blank$x";
1387                         } else {
1388                                 $url = wfmsg( "rfcurl" );
1389                                 $url = str_replace( "$1", $rfc, $url);
1390                                 $sk =& $this->mOptions->getSkin();
1391                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1392                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1393                         }
1394                 } else {
1395                         $text = "RFC ";
1396                 }
1397                 return $text;
1398         }
1399
1400         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1401         {
1402                 $this->mOptions = $options;
1403                 $this->mTitle = $title;
1404                 if ( $clearState ) {
1405                         $this->clearState;
1406                 }
1407
1408                 $stripState = false;
1409                 $text = $this->strip( $text, $stripState, false );
1410                 $text = $this->pstPass2( $text, $user );
1411                 $text = $this->unstrip( $text, $stripState );
1412                 return $text;
1413         }
1414
1415         /* private */ function pstPass2( $text, &$user )
1416         {
1417                 global $wgLang, $wgLocaltimezone;
1418
1419                 # Signatures
1420                 #
1421                 $n = $user->getName();
1422                 $k = $user->getOption( "nickname" );
1423                 if ( "" == $k ) { $k = $n; }
1424                 if(isset($wgLocaltimezone)) {
1425                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1426                 }
1427                 /* Note: this is an ugly timezone hack for the European wikis */
1428                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1429                   " (" . date( "T" ) . ")";
1430                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1431
1432                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1433                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1434                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1435                   Namespace::getUser() ) . ":$n|$k]]", $text );
1436
1437                 # Context links: [[|name]] and [[name (context)|]]
1438                 #
1439                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1440                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1441                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1442                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1443
1444                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1445                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1446                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1447                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1448                                                                                                                 # [[ns:page (cont)|]]
1449                 $context = "";
1450                 $t = $this->mTitle->getText();
1451                 if ( preg_match( $conpat, $t, $m ) ) {
1452                         $context = $m[2];
1453                 }
1454                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1455                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1456                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1457
1458                 if ( "" == $context ) {
1459                         $text = preg_replace( $p2, "[[\\1]]", $text );
1460                 } else {
1461                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1462                 }
1463
1464                 # {{SUBST:xxx}} variables
1465                 #
1466                 $mw =& MagicWord::get( MAG_SUBST );
1467                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1468
1469                 # Trim trailing whitespace
1470                 # MAG_END (__END__) tag allows for trailing
1471                 # whitespace to be deliberately included
1472                 $text = rtrim( $text );
1473                 $mw =& MagicWord::get( MAG_END );
1474                 $mw->matchAndRemove( $text );
1475
1476                 return $text;
1477         }
1478
1479
1480 }
1481
1482 class ParserOutput
1483 {
1484         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1485
1486         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1487                 $containsOldMagic = false )
1488         {
1489                 $this->mText = $text;
1490                 $this->mLanguageLinks = $languageLinks;
1491                 $this->mCategoryLinks = $categoryLinks;
1492                 $this->mContainsOldMagic = $containsOldMagic;
1493         }
1494
1495         function getText() { return $this->mText; }
1496         function getLanguageLinks() { return $this->mLanguageLinks; }
1497         function getCategoryLinks() { return $this->mCategoryLinks; }
1498         function containsOldMagic() { return $this->mContainsOldMagic; }
1499         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1500         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1501         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1502         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1503 }
1504
1505 class ParserOptions
1506 {
1507         # All variables are private
1508         var $mUseTeX;                    # Use texvc to expand <math> tags
1509         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1510         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1511         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1512         var $mAllowExternalImages;       # Allow external images inline
1513         var $mSkin;                      # Reference to the preferred skin
1514         var $mDateFormat;                # Date format index
1515         var $mEditSection;               # Create "edit section" links
1516         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1517         var $mPrintable;                 # Generate printable output
1518         var $mNumberHeadings;            # Automatically number headings
1519         var $mShowToc;                   # Show table of contents
1520
1521         function getUseTeX() { return $this->mUseTeX; }
1522         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1523         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1524         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1525         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1526         function getSkin() { return $this->mSkin; }
1527         function getDateFormat() { return $this->mDateFormat; }
1528         function getEditSection() { return $this->mEditSection; }
1529         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1530         function getPrintable() { return $this->mPrintable; }
1531         function getNumberHeadings() { return $this->mNumberHeadings; }
1532         function getShowToc() { return $this->mShowToc; }
1533
1534         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1535         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1536         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1537         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1538         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1539         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1540         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1541         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1542         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1543         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1544         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1545         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1546
1547         /* static */ function newFromUser( &$user )
1548         {
1549                 $popts = new ParserOptions;
1550                 $popts->initialiseFromUser( &$user );
1551                 return $popts;
1552         }
1553
1554         function initialiseFromUser( &$userInput )
1555         {
1556                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1557
1558                 if ( !$userInput ) {
1559                         $user = new User;
1560                 } else {
1561                         $user =& $userInput;
1562                 }
1563
1564                 $this->mUseTeX = $wgUseTeX;
1565                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1566                 $this->mUseDynamicDates = $wgUseDynamicDates;
1567                 $this->mInterwikiMagic = $wgInterwikiMagic;
1568                 $this->mAllowExternalImages = $wgAllowExternalImages;
1569                 $this->mSkin =& $user->getSkin();
1570                 $this->mDateFormat = $user->getOption( "date" );
1571                 $this->mEditSection = $user->getOption( "editsection" );
1572                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1573                 $this->mPrintable = false;
1574                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1575                 $this->mShowToc = $user->getOption( "showtoc" );
1576         }
1577
1578
1579 }
1580
1581 # Regex callbacks, used in OutputPage::replaceVariables
1582
1583 # Just get rid of the dangerous stuff
1584 # Necessary because replaceVariables is called after removeHTMLtags,
1585 # and message text can come from any user
1586 function wfReplaceMsgVar( $matches ) {
1587         global $wgCurOut, $wgLinkCache;
1588         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1589         $wgLinkCache->suspend();
1590         $text = $wgCurOut->replaceInternalLinks( $text );
1591         $wgLinkCache->resume();
1592         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1593         return $text;
1594 }
1595
1596 # Effective <nowiki></nowiki>
1597 # Not real <nowiki> because this is called after nowiki sections are processed
1598 function wfReplaceMsgnwVar( $matches ) {
1599         global $wgCurOut, $wgLinkCache;
1600         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1601         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1602         return $text;
1603 }
1604
1605
1606
1607 ?>