includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         if ( $doesexist ) {
 215                                 $t = $x->l_from ;
 216                         } else {
 217                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 218                                 if ( $t != "" ) $t .= ":" ;
 219                                 $t .= $x->cur_title ;
 220                         }
 221
 222                         $y = explode ( ":" , $t , 2 ) ;
 223                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 224                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 225                         } else {
 226                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 227                         }
 228                 }
 229                 wfFreeResult ( $res ) ;
 230
 231                 # Children
 232                 if ( count ( $children ) > 0 )
 233                 {
 234                         asort ( $children ) ;
 235                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 236                         $r .= implode ( ", " , $children ) ;
 237                 }
 238
 239                 # Articles
 240                 if ( count ( $articles ) > 0 )
 241                 {
 242                         asort ( $articles ) ;
 243                         $h =  wfMsg( "category_header", $ti[1] );
 244                         $r .= "<h2>{$h}</h2>\n" ;
 245                         $r .= implode ( ", " , $articles ) ;
 246                 }
 247
 248
 249                 return $r ;
 250         }
 251
 252         function getHTMLattrs ()
 253         {
 254                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 255                                 "title", "align", "lang", "dir", "width", "height",
 256                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 257                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 258                                 /* FONT */ "type", "start", "value", "compact",
 259                                 /* For various lists, mostly deprecated but safe */
 260                                 "summary", "width", "border", "frame", "rules",
 261                                 "cellspacing", "cellpadding", "valign", "char",
 262                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 263                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 264                                 "id", "class", "name", "style" /* For CSS */
 265                                 );
 266                 return $htmlattrs ;
 267         }
 268
 269         function fixTagAttributes ( $t )
 270         {
 271                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 272                 $htmlattrs = $this->getHTMLattrs() ;
 273
 274                 # Strip non-approved attributes from the tag
 275                 $t = preg_replace(
 276                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 277                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 278                         $t);
 279                 # Strip javascript "expression" from stylesheets. Brute force approach:
 280                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 281
 282                 if( preg_match(
 283                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 284                         wfMungeToUtf8( $t ) ) )
 285                 {
 286                         $t="";
 287                 }
 288
 289                 return trim ( $t ) ;
 290         }
 291
 292         function doTableStuff ( $t )
 293         {
 294                 $t = explode ( "\n" , $t ) ;
 295                 $td = array () ; # Is currently a td tag open?
 296                         $ltd = array () ; # Was it TD or TH?
 297                         $tr = array () ; # Is currently a tr tag open?
 298                         $ltr = array () ; # tr attributes
 299                         foreach ( $t AS $k => $x )
 300                         {
 301                                 $x = rtrim ( $x ) ;
 302                                 $fc = substr ( $x , 0 , 1 ) ;
 303                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 304                                 {
 305                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 306                                         array_push ( $td , false ) ;
 307                                         array_push ( $ltd , "" ) ;
 308                                         array_push ( $tr , false ) ;
 309                                         array_push ( $ltr , "" ) ;
 310                                 }
 311                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 312                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 313                                 {
 314                                         $z = "</table>\n" ;
 315                                         $l = array_pop ( $ltd ) ;
 316                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 317                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 318                                         array_pop ( $ltr ) ;
 319                                         $t[$k] = $z ;
 320                                 }
 321                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 322                                                 {
 323                                                 $z = trim ( substr ( $x , 2 ) ) ;
 324                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 325                                                 }*/
 326                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 327                                 {
 328                                         $x = substr ( $x , 1 ) ;
 329                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 330                                         $z = "" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                         array_push ( $tr , false ) ;
 337                                         array_push ( $td , false ) ;
 338                                         array_push ( $ltd , "" ) ;
 339                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 340                                 }
 341                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 342                                 {
 343                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 344                                         {
 345                                                 $fc = "+" ;
 346                                                 $x = substr ( $x , 1 ) ;
 347                                         }
 348                                         $after = substr ( $x , 1 ) ;
 349                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 350                                         $after = explode ( "||" , $after ) ;
 351                                         $t[$k] = "" ;
 352                                         foreach ( $after AS $theline )
 353                                         {
 354                                                 $z = "" ;
 355                                                 if ( $fc != "+" )
 356                                                 {
 357                                                         $tra = array_pop ( $ltr ) ;
 358                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 359                                                         array_push ( $tr , true ) ;
 360                                                         array_push ( $ltr , "" ) ;
 361                                                 }
 362
 363                                                 $l = array_pop ( $ltd ) ;
 364                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 365                                                 if ( $fc == "|" ) $l = "TD" ;
 366                                                 else if ( $fc == "!" ) $l = "TH" ;
 367                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 368                                                 else $l = "" ;
 369                                                 array_push ( $ltd , $l ) ;
 370                                                 $y = explode ( "|" , $theline , 2 ) ;
 371                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 372                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 373                                                 $t[$k] .= $y ;
 374                                                 array_push ( $td , true ) ;
 375                                         }
 376                                 }
 377                         }
 378
 379                 # Closing open td, tr && table
 380                 while ( count ( $td ) > 0 )
 381                 {
 382                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 383                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 384                         $t[] = "</table>" ;
 385                 }
 386
 387                 $t = implode ( "\n" , $t ) ;
 388                 #               $t = $this->removeHTMLtags( $t );
 389                 return $t ;
 390         }
 391
 392         # Well, OK, it's actually about 14 passes.  But since all the
 393         # hard lifting is done inside PHP's regex code, it probably
 394         # wouldn't speed things up much to add a real parser.
 395         #
 396         function doWikiPass2( $text, $linestart )
 397         {
 398                 $fname = "OutputPage::doWikiPass2";
 399                 wfProfileIn( $fname );
 400
 401                 $text = $this->removeHTMLtags( $text );
 402                 $text = $this->replaceVariables( $text );
 403
 404                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 405                 $text = str_replace ( "<HR>", "<hr>", $text );
 406
 407                 $text = $this->doHeadings( $text );
 408                 $text = $this->doBlockLevels( $text, $linestart );
 409
 410                 if($this->mOptions->getUseDynamicDates()) {
 411                         global $wgDateFormatter;
 412                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 413                 }
 414
 415                 $text = $this->replaceExternalLinks( $text );
 416                 $text = $this->replaceInternalLinks ( $text );
 417                 $text = $this->doTableStuff ( $text ) ;
 418
 419                 #$text = $this->magicISBN( $text );
 420                 $text = $this->magicRFC( $text );
 421                 $text = $this->formatHeadings( $text );
 422
 423                 $sk =& $this->mOptions->getSkin();
 424                 $text = $sk->transformContent( $text );
 425                 $text .= $this->categoryMagic () ;
 426
 427                 wfProfileOut( $fname );
 428                 return $text;
 429         }
 430
 431
 432         /* private */ function doHeadings( $text )
 433         {
 434                 for ( $i = 6; $i >= 1; --$i ) {
 435                         $h = substr( "======", 0, $i );
 436                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 437                           "<h{$i}>\\1</h{$i}>\\2", $text );
 438                 }
 439                 return $text;
 440         }
 441
 442         # Note: we have to do external links before the internal ones,
 443         # and otherwise take great care in the order of things here, so
 444         # that we don't end up interpreting some URLs twice.
 445
 446         /* private */ function replaceExternalLinks( $text )
 447         {
 448                 $fname = "OutputPage::replaceExternalLinks";
 449                 wfProfileIn( $fname );
 450                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 451                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 452                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 453                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 454                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 455                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 456                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 457                 wfProfileOut( $fname );
 458                 return $text;
 459         }
 460
 461         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 462         {
 463                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 464                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 465
 466                 # this is  the list of separators that should be ignored if they
 467                 # are the last character of an URL but that should be included
 468                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 469                 # in this case, the last comma should not become part of the URL,
 470                 # but in "www.foo.com/123,2342,32.htm" it should.
 471                 $sep = ",;\.:";
 472                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 473                 $images = "gif|png|jpg|jpeg";
 474
 475                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 476                 # they are interpreted as part of the string (used to tell PHP
 477                 # that the content of the string should be inserted there).
 478                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 479                   "((?i){$images})([^{$uc}]|$)/";
 480
 481                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 482                 $sk =& $this->mOptions->getSkin();
 483
 484                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 485                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 486                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 487                 }
 488                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 489                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 490                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 491                   "</a>\\5", $s );
 492                 $s = str_replace( $unique, $protocol, $s );
 493
 494                 $a = explode( "[{$protocol}:", " " . $s );
 495                 $s = array_shift( $a );
 496                 $s = substr( $s, 1 );
 497
 498                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 499                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 500
 501                 foreach ( $a as $line ) {
 502                         if ( preg_match( $e1, $line, $m ) ) {
 503                                 $link = "{$protocol}:{$m[1]}";
 504                                 $trail = $m[2];
 505                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 506                                 else { $text = wfEscapeHTML( $link ); }
 507                         } else if ( preg_match( $e2, $line, $m ) ) {
 508                                 $link = "{$protocol}:{$m[1]}";
 509                                 $text = $m[2];
 510                                 $trail = $m[3];
 511                         } else {
 512                                 $s .= "[{$protocol}:" . $line;
 513                                 continue;
 514                         }
 515                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 516                         else $paren = "";
 517                         $la = $sk->getExternalLinkAttributes( $link, $text );
 518                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 519
 520                 }
 521                 return $s;
 522         }
 523
 524         /* private */ function handle3Quotes( &$state, $token )
 525         {
 526                 if ( $state["strong"] ) {
 527                         if ( $state["em"] && $state["em"] > $state["strong"] )
 528                         {
 529                                 # ''' lala ''lala '''
 530                                 $s = "</em></strong><em>";
 531                         } else {
 532                                 $s = "</strong>";
 533                         }
 534                         $state["strong"] = FALSE;
 535                 } else {
 536                         $s = "<strong>";
 537                         $state["strong"] = $token["pos"];
 538                 }
 539                 return $s;
 540         }
 541
 542         /* private */ function handle2Quotes( &$state, $token )
 543         {
 544                 if ( $state["em"] ) {
 545                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 546                         {
 547                                 # ''lala'''lala'' ....'''
 548                                 $s = "</strong></em><strong>";
 549                         } else {
 550                                 $s = "</em>";
 551                         }
 552                         $state["em"] = FALSE;
 553                 } else {
 554                         $s = "<em>";
 555                         $state["em"] = $token["pos"];
 556                 }
 557                 return $s;
 558         }
 559
 560         /* private */ function handle5Quotes( &$state, $token )
 561         {
 562                 if ( $state["em"] && $state["strong"] ) {
 563                         if ( $state["em"] < $state["strong"] ) {
 564                                 $s .= "</strong></em>";
 565                         } else {
 566                                 $s .= "</em></strong>";
 567                         }
 568                         $state["strong"] = $state["em"] = FALSE;
 569                 } elseif ( $state["em"] ) {
 570                         $s .= "</em><strong>";
 571                         $state["em"] = FALSE;
 572                         $state["strong"] = $token["pos"];
 573                 } elseif ( $state["strong"] ) {
 574                         $s .= "</strong><em>";
 575                         $state["strong"] = FALSE;
 576                         $state["em"] = $token["pos"];
 577                 } else { # not $em and not $strong
 578                         $s .= "<strong><em>";
 579                         $state["strong"] = $state["em"] = $token["pos"];
 580                 }
 581                 return $s;
 582         }
 583
 584         /* private */ function replaceInternalLinks( $str )
 585         {
 586                 global $wgLang; # for language specific parser hook
 587
 588                 $tokenizer=Tokenizer::newFromString( $str );
 589                 $tokenStack = array();
 590
 591                 $s="";
 592                 $state["em"]      = FALSE;
 593                 $state["strong"]  = FALSE;
 594                 $tagIsOpen = FALSE;
 595
 596                 # The tokenizer splits the text into tokens and returns them one by one.
 597                 # Every call to the tokenizer returns a new token.
 598                 while ( $token = $tokenizer->nextToken() )
 599                 {
 600                         switch ( $token["type"] )
 601                         {
 602                                 case "text":
 603                                         # simple text with no further markup
 604                                         $txt = $token["text"];
 605                                         break;
 606                                 case "[[":
 607                                         # link opening tag.
 608                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 609                                         $tagIsOpen = TRUE;
 610                                         array_push( $tokenStack, $token );
 611                                         $txt="";
 612                                         break;
 613                                 case "]]":
 614                                         # link close tag.
 615                                         # get text from stack, glue it together, and call the code to handle a
 616                                         # link
 617                                         if ( count( $tokenStack ) == 0 )
 618                                         {
 619                                                 # stack empty. Found a ]] without an opening [[
 620                                                 $txt = "]]";
 621                                         } else {
 622                                                 $linkText = "";
 623                                                 $lastToken = array_pop( $tokenStack );
 624                                                 while ( $lastToken["type"] != "[[" )
 625                                                 {
 626                                                         $linkText = $lastToken["text"] . $linkText;
 627                                                         $lastToken = array_pop( $tokenStack );
 628                                                 }
 629                                                 $txt = $linkText ."]]";
 630                                                 $prefix = $lastToken["text"];
 631                                                 $nextToken = $tokenizer->previewToken();
 632                                                 if ( $nextToken["type"] == "text" )
 633                                                 {
 634                                                         # Preview just looks at it. Now we have to fetch it.
 635                                                         $nextToken = $tokenizer->nextToken();
 636                                                         $txt .= $nextToken["text"];
 637                                                 }
 638                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 639                                         }
 640                                         $tagIsOpen = (count( $tokenStack ) != 0);
 641                                         break;
 642                                 case "----":
 643                                         $txt = "\n<hr>\n";
 644                                         break;
 645                                 case "'''":
 646                                         # This and the three next ones handle quotes
 647                                         $txt = $this->handle3Quotes( $state, $token );
 648                                         break;
 649                                 case "''":
 650                                         $txt = $this->handle2Quotes( $state, $token );
 651                                         break;
 652                                 case "'''''":
 653                                         $txt = $this->handle5Quotes( $state, $token );
 654                                         break;
 655                                 case "":
 656                                         # empty token
 657                                         $txt="";
 658                                         break;
 659                                 case "ISBN ":
 660                                         $txt = $this->doMagicISBN( $tokenizer );
 661                                         break;
 662                                 default:
 663                                         # Call language specific Hook.
 664                                         $txt = $wgLang->processToken( $token, $tokenStack );
 665                                         if ( NULL == $txt ) {
 666                                                 # An unkown token. Highlight.
 667                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 668                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 669                                         }
 670                                         break;
 671                         }
 672                         # If we're parsing the interior of a link, don't append the interior to $s,
 673                         # but push it to the stack so it can be processed when a ]] token is found.
 674                         if ( $tagIsOpen  && $txt != "" ) {
 675                                 $token["type"] = "text";
 676                                 $token["text"] = $txt;
 677                                 array_push( $tokenStack, $token );
 678                         } else {
 679                                 $s .= $txt;
 680                         }
 681                 } #end while
 682                 if ( count( $tokenStack ) != 0 )
 683                 {
 684                         # still objects on stack. opened [[ tag without closing ]] tag.
 685                         $txt = "";
 686                         while ( $lastToken = array_pop( $tokenStack ) )
 687                         {
 688                                 if ( $lastToken["type"] == "text" )
 689                                 {
 690                                         $txt = $lastToken["text"] . $txt;
 691                                 } else {
 692                                         $txt = $lastToken["type"] . $txt;
 693                                 }
 694                         }
 695                         $s .= $txt;
 696                 }
 697                 return $s;
 698         }
 699
 700         /* private */ function handleInternalLink( $line, $prefix )
 701         {
 702                 global $wgLang, $wgLinkCache;
 703                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 704                 static $fname = "OutputPage::replaceInternalLinks" ;
 705                 wfProfileIn( $fname );
 706
 707                 wfProfileIn( "$fname-setup" );
 708                 static $tc = FALSE;
 709                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 710                 $sk =& $this->mOptions->getSkin();
 711
 712                 # Match a link having the form [[namespace:link|alternate]]trail
 713                 static $e1 = FALSE;
 714                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 715                 # Match the end of a line for a word that's not followed by whitespace,
 716                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 717                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 718                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 719                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 720
 721
 722                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 723                 static $image = FALSE;
 724                 static $special = FALSE;
 725                 static $media = FALSE;
 726                 static $category = FALSE;
 727                 if ( !$image ) { $image = Namespace::getImage(); }
 728                 if ( !$special ) { $special = Namespace::getSpecial(); }
 729                 if ( !$media ) { $media = Namespace::getMedia(); }
 730                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 731
 732                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 733
 734                 wfProfileOut( "$fname-setup" );
 735
 736                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 737                         $text = $m[2];
 738                         $trail = $m[3];
 739                 } else { # Invalid form; output directly
 740                         $s .= $prefix . "[[" . $line ;
 741                         return $s;
 742                 }
 743
 744                 /* Valid link forms:
 745                 Foobar -- normal
 746                 :Foobar -- override special treatment of prefix (images, language links)
 747                 /Foobar -- convert to CurrentPage/Foobar
 748                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 749                 */
 750                 $c = substr($m[1],0,1);
 751                 $noforce = ($c != ":");
 752                 if( $c == "/" ) { # subpage
 753                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 754                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 755                                 $noslash=$m[1];
 756                         } else {
 757                                 $noslash=substr($m[1],1);
 758                         }
 759                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 760                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 761                                 if( "" == $text ) {
 762                                         $text= $m[1];
 763                                 } # this might be changed for ugliness reasons
 764                         } else {
 765                                 $link = $noslash; # no subpage allowed, use standard link
 766                         }
 767                 } elseif( $noforce ) { # no subpage
 768                         $link = $m[1];
 769                 } else {
 770                         $link = substr( $m[1], 1 );
 771                 }
 772                 if( "" == $text )
 773                         $text = $link;
 774
 775                 $nt = Title::newFromText( $link );
 776                 if( !$nt ) {
 777                         $s .= $prefix . "[[" . $line;
 778                         return $s;
 779                 }
 780                 $ns = $nt->getNamespace();
 781                 $iw = $nt->getInterWiki();
 782                 if( $noforce ) {
 783                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 784                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 785                                 $s .= $prefix . $trail;
 786                                 return $s;
 787                         }
 788                         if( $ns == $image ) {
 789                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 790                                 $wgLinkCache->addImageLinkObj( $nt );
 791                                 return $s;
 792                         }
 793                 }
 794                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 795                     ( strpos( $link, "#" ) == FALSE ) ) {
 796                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 797                         return $s;
 798                 }
 799                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 800                         $t = explode ( ":" , $nt->getText() ) ;
 801                         array_shift ( $t ) ;
 802                         $t = implode ( ":" , $t ) ;
 803                         $t = $wgLang->ucFirst ( $t ) ;
 804 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 805                         $nnt = Title::newFromText ( $category.":".$t ) ;
 806                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 807                         $this->mCategoryLinks[] = $t ;
 808                         $s .= $prefix . $trail ;
 809                         return $s ;
 810                 }
 811                 if( $ns == $media ) {
 812                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 813                         $wgLinkCache->addImageLinkObj( $nt );
 814                         return $s;
 815                 } elseif( $ns == $special ) {
 816                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 817                         return $s;
 818                 }
 819                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 820
 821                 wfProfileOut( $fname );
 822                 return $s;
 823         }
 824
 825         # Some functions here used by doBlockLevels()
 826         #
 827         /* private */ function closeParagraph()
 828         {
 829                 $result = "";
 830                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 831                   0 != strcmp( "", $this->mLastSection ) ) {
 832                         $result = "</" . $this->mLastSection  . ">";
 833                 }
 834                 $this->mLastSection = "";
 835                 return $result."\n";
 836         }
 837         # getCommon() returns the length of the longest common substring
 838         # of both arguments, starting at the beginning of both.
 839         #
 840         /* private */ function getCommon( $st1, $st2 )
 841         {
 842                 $fl = strlen( $st1 );
 843                 $shorter = strlen( $st2 );
 844                 if ( $fl < $shorter ) { $shorter = $fl; }
 845
 846                 for ( $i = 0; $i < $shorter; ++$i ) {
 847                         if ( $st1{$i} != $st2{$i} ) { break; }
 848                 }
 849                 return $i;
 850         }
 851         # These next three functions open, continue, and close the list
 852         # element appropriate to the prefix character passed into them.
 853         #
 854         /* private */ function openList( $char )
 855     {
 856                 $result = $this->closeParagraph();
 857
 858                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 859                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 860                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 861                 else if ( ";" == $char ) {
 862                         $result .= "<dl><dt>";
 863                         $this->mDTopen = true;
 864                 }
 865                 else { $result = "<!-- ERR 1 -->"; }
 866
 867                 return $result;
 868         }
 869
 870         /* private */ function nextItem( $char )
 871         {
 872                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 873                 else if ( ":" == $char || ";" == $char ) {
 874                         $close = "</dd>";
 875                         if ( $this->mDTopen ) { $close = "</dt>"; }
 876                         if ( ";" == $char ) {
 877                                 $this->mDTopen = true;
 878                                 return $close . "<dt>";
 879                         } else {
 880                                 $this->mDTopen = false;
 881                                 return $close . "<dd>";
 882                         }
 883                 }
 884                 return "<!-- ERR 2 -->";
 885         }
 886
 887         /* private */function closeList( $char )
 888         {
 889                 if ( "*" == $char ) { $text = "</li></ul>"; }
 890                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 891                 else if ( ":" == $char ) {
 892                         if ( $this->mDTopen ) {
 893                                 $this->mDTopen = false;
 894                                 $text = "</dt></dl>";
 895                         } else {
 896                                 $text = "</dd></dl>";
 897                         }
 898                 }
 899                 else {  return "<!-- ERR 3 -->"; }
 900                 return $text."\n";
 901         }
 902
 903         /* private */ function doBlockLevels( $text, $linestart )
 904         {
 905                 $fname = "OutputPage::doBlockLevels";
 906                 wfProfileIn( $fname );
 907                 # Parsing through the text line by line.  The main thing
 908                 # happening here is handling of block-level elements p, pre,
 909                 # and making lists from lines starting with * # : etc.
 910                 #
 911                 $a = explode( "\n", $text );
 912                 $text = $lastPref = "";
 913                 $this->mDTopen = $inBlockElem = false;
 914
 915                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 916                 foreach ( $a as $t ) {
 917                         if ( "" != $text ) { $text .= "\n"; }
 918
 919                         $oLine = $t;
 920                         $opl = strlen( $lastPref );
 921                         $npl = strspn( $t, "*#:;" );
 922                         $pref = substr( $t, 0, $npl );
 923                         $pref2 = str_replace( ";", ":", $pref );
 924                         $t = substr( $t, $npl );
 925
 926                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 927                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 928
 929                                 if ( ";" == substr( $pref, -1 ) ) {
 930                                         $cpos = strpos( $t, ":" );
 931                                         if ( ! ( false === $cpos ) ) {
 932                                                 $term = substr( $t, 0, $cpos );
 933                                                 $text .= $term . $this->nextItem( ":" );
 934                                                 $t = substr( $t, $cpos + 1 );
 935                                         }
 936                                 }
 937                         } else if (0 != $npl || 0 != $opl) {
 938                                 $cpl = $this->getCommon( $pref, $lastPref );
 939
 940                                 while ( $cpl < $opl ) {
 941                                         $text .= $this->closeList( $lastPref{$opl-1} );
 942                                         --$opl;
 943                                 }
 944                                 if ( $npl <= $cpl && $cpl > 0 ) {
 945                                         $text .= $this->nextItem( $pref{$cpl-1} );
 946                                 }
 947                                 while ( $npl > $cpl ) {
 948                                         $char = substr( $pref, $cpl, 1 );
 949                                         $text .= $this->openList( $char );
 950
 951                                         if ( ";" == $char ) {
 952                                                 $cpos = strpos( $t, ":" );
 953                                                 if ( ! ( false === $cpos ) ) {
 954                                                         $term = substr( $t, 0, $cpos );
 955                                                         $text .= $term . $this->nextItem( ":" );
 956                                                         $t = substr( $t, $cpos + 1 );
 957                                                 }
 958                                         }
 959                                         ++$cpl;
 960                                 }
 961                                 $lastPref = $pref2;
 962                         }
 963                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 964                                 if ( preg_match(
 965                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 966                                         $text .= $this->closeParagraph();
 967                                         $inBlockElem = true;
 968                                 }
 969                                 if ( ! $inBlockElem ) {
 970                                         if ( " " == $t{0} ) {
 971                                                 $newSection = "pre";
 972                                                 # $t = wfEscapeHTML( $t );
 973                                         }
 974                                         else { $newSection = "p"; }
 975
 976                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 977                                                 $text .= $this->closeParagraph();
 978                                                 $text .= "<" . $newSection . ">";
 979                                         } else if ( 0 != strcmp( $this->mLastSection,
 980                                           $newSection ) ) {
 981                                                 $text .= $this->closeParagraph();
 982                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 983                                                         $text .= "<" . $newSection . ">";
 984                                                 }
 985                                         }
 986                                         $this->mLastSection = $newSection;
 987                                 }
 988                                 if ( $inBlockElem &&
 989                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 990                                         $inBlockElem = false;
 991                                 }
 992                         }
 993                         $text .= $t;
 994                 }
 995                 while ( $npl ) {
 996                         $text .= $this->closeList( $pref2{$npl-1} );
 997                         --$npl;
 998                 }
 999                 if ( "" != $this->mLastSection ) {
1000                         if ( "p" != $this->mLastSection ) {
1001                                 $text .= "</" . $this->mLastSection . ">";
1002                         }
1003                         $this->mLastSection = "";
1004                 }
1005                 wfProfileOut( $fname );
1006                 return $text;
1007         }
1008
1009         /* private */ function replaceVariables( $text )
1010         {
1011                 global $wgLang, $wgCurOut;
1012                 $fname = "OutputPage::replaceVariables";
1013                 wfProfileIn( $fname );
1014
1015                 $magic = array();
1016
1017                 # Basic variables
1018                 # See Language.php for the definition of each magic word
1019                 # As with sigs, this uses the server's local time -- ensure
1020                 # this is appropriate for your audience!
1021
1022                 $magic[MAG_CURRENTMONTH] = date( "m" );
1023                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1024                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1025                 $magic[MAG_CURRENTDAY] = date("j");
1026                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1027                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1028                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1029
1030                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1031
1032                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1033                 if ( $mw->match( $text ) ) {
1034                         $v = wfNumberOfArticles();
1035                         $text = $mw->replace( $v, $text );
1036                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1037                 }
1038
1039                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1040                 # The callbacks are at the bottom of this file
1041                 $wgCurOut = $this;
1042                 $mw =& MagicWord::get( MAG_MSG );
1043                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1044                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1045
1046                 $mw =& MagicWord::get( MAG_MSGNW );
1047                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1048                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1049
1050                 wfProfileOut( $fname );
1051                 return $text;
1052         }
1053
1054         # Cleans up HTML, removes dangerous tags and attributes
1055         /* private */ function removeHTMLtags( $text )
1056         {
1057                 $fname = "OutputPage::removeHTMLtags";
1058                 wfProfileIn( $fname );
1059                 $htmlpairs = array( # Tags that must be closed
1060                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1061                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1062                         "strike", "strong", "tt", "var", "div", "center",
1063                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1064                         "ruby", "rt" , "rb" , "rp"
1065                 );
1066                 $htmlsingle = array(
1067                         "br", "p", "hr", "li", "dt", "dd"
1068                 );
1069                 $htmlnest = array( # Tags that can be nested--??
1070                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1071                         "dl", "font", "big", "small", "sub", "sup"
1072                 );
1073                 $tabletags = array( # Can only appear inside table
1074                         "td", "th", "tr"
1075                 );
1076
1077                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1078                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1079
1080                 $htmlattrs = $this->getHTMLattrs () ;
1081
1082                 # Remove HTML comments
1083                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1084
1085                 $bits = explode( "<", $text );
1086                 $text = array_shift( $bits );
1087                 $tagstack = array(); $tablestack = array();
1088
1089                 foreach ( $bits as $x ) {
1090                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1091                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1092                           $x, $regs );
1093                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1094                         error_reporting( $prev );
1095
1096                         $badtag = 0 ;
1097                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1098                                 # Check our stack
1099                                 if ( $slash ) {
1100                                         # Closing a tag...
1101                                         if ( ! in_array( $t, $htmlsingle ) &&
1102                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1103                                                 array_push( $tagstack, $ot );
1104                                                 $badtag = 1;
1105                                         } else {
1106                                                 if ( $t == "table" ) {
1107                                                         $tagstack = array_pop( $tablestack );
1108                                                 }
1109                                                 $newparams = "";
1110                                         }
1111                                 } else {
1112                                         # Keep track for later
1113                                         if ( in_array( $t, $tabletags ) &&
1114                                           ! in_array( "table", $tagstack ) ) {
1115                                                 $badtag = 1;
1116                                         } else if ( in_array( $t, $tagstack ) &&
1117                                           ! in_array ( $t , $htmlnest ) ) {
1118                                                 $badtag = 1 ;
1119                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1120                                                 if ( $t == "table" ) {
1121                                                         array_push( $tablestack, $tagstack );
1122                                                         $tagstack = array();
1123                                                 }
1124                                                 array_push( $tagstack, $t );
1125                                         }
1126                                         # Strip non-approved attributes from the tag
1127                                         $newparams = $this->fixTagAttributes($params);
1128
1129                                 }
1130                                 if ( ! $badtag ) {
1131                                         $rest = str_replace( ">", "&gt;", $rest );
1132                                         $text .= "<$slash$t $newparams$brace$rest";
1133                                         continue;
1134                                 }
1135                         }
1136                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1137                 }
1138                 # Close off any remaining tags
1139                 while ( $t = array_pop( $tagstack ) ) {
1140                         $text .= "</$t>\n";
1141                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1142                 }
1143                 wfProfileOut( $fname );
1144                 return $text;
1145         }
1146
1147 /*
1148  *
1149  * This function accomplishes several tasks:
1150  * 1) Auto-number headings if that option is enabled
1151  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1152  * 3) Add a Table of contents on the top for users who have enabled the option
1153  * 4) Auto-anchor headings
1154  *
1155  * It loops through all headlines, collects the necessary data, then splits up the
1156  * string and re-inserts the newly formatted headlines.
1157  *
1158  * */
1159         /* private */ function formatHeadings( $text )
1160         {
1161                 $nh=$this->mOptions->getNumberHeadings();
1162                 $st=$this->mOptions->getShowToc();
1163                 if(!$this->mTitle->userCanEdit()) {
1164                         $es=0;
1165                         $esr=0;
1166                 } else {
1167                         $es=$this->mOptions->getEditSection();
1168                         $esr=$this->mOptions->getEditSectionOnRightClick();
1169                 }
1170
1171                 # Inhibit editsection links if requested in the page
1172                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1173                 if ($esw->matchAndRemove( $text )) {
1174                         $es=0;
1175                 }
1176                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1177                 # do not add TOC
1178                 $mw =& MagicWord::get( MAG_NOTOC );
1179                 if ($mw->matchAndRemove( $text ))
1180                 {
1181                         $st = 0;
1182                 }
1183
1184                 # never add the TOC to the Main Page. This is an entry page that should not
1185                 # be more than 1-2 screens large anyway
1186                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1187
1188                 # We need this to perform operations on the HTML
1189                 $sk =& $this->mOptions->getSkin();
1190
1191                 # Get all headlines for numbering them and adding funky stuff like [edit]
1192                 # links
1193                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1194
1195                 # headline counter
1196                 $c=0;
1197
1198                 # Ugh .. the TOC should have neat indentation levels which can be
1199                 # passed to the skin functions. These are determined here
1200                 foreach($matches[3] as $headline) {
1201                         if($level) { $prevlevel=$level;}
1202                         $level=$matches[1][$c];
1203                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1204
1205                                 $h[$level]=0; // reset when we enter a new level
1206                                 $toc.=$sk->tocIndent($level-$prevlevel);
1207                                 $toclevel+=$level-$prevlevel;
1208
1209                         }
1210                         if(($nh||$st) && $level<$prevlevel) {
1211                                 $h[$level+1]=0; // reset when we step back a level
1212                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1213                                 $toclevel-=$prevlevel-$level;
1214
1215                         }
1216                         $h[$level]++; // count number of headlines for each level
1217
1218                         if($nh||$st) {
1219                                 for($i=1;$i<=$level;$i++) {
1220                                         if($h[$i]) {
1221                                                 if($dot) {$numbering.=".";}
1222                                                 $numbering.=$h[$i];
1223                                                 $dot=1;
1224                                         }
1225                                 }
1226                         }
1227
1228                         // The canonized header is a version of the header text safe to use for links
1229                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1230                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1231                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1232                         $tocline = trim( $canonized_headline );
1233                         $canonized_headline=str_replace('"',"",$canonized_headline);
1234                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1235                         $refer[$c]=$canonized_headline;
1236                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1237                         $refcount[$c]=$refers[$canonized_headline];
1238
1239             // Prepend the number to the heading text
1240
1241                         if($nh||$st) {
1242                                 $tocline=$numbering ." ". $tocline;
1243
1244                                 // Don't number the heading if it is the only one (looks silly)
1245                                 if($nh && count($matches[3]) > 1) {
1246                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1247                                 }
1248                         }
1249
1250                         // Create the anchor for linking from the TOC to the section
1251
1252                         $anchor=$canonized_headline;
1253                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1254                         if($st) {
1255                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1256                         }
1257                         if($es) {
1258                                 $head[$c].=$sk->editSectionLink($c+1);
1259                         }
1260
1261                         // Put it all together
1262
1263                         $head[$c].="<h".$level.$matches[2][$c]
1264                          ."<a name=\"".$anchor."\">"
1265                          .$headline
1266                          ."</a>"
1267                          ."</h".$level.">";
1268
1269                         // Add the edit section link
1270
1271                         if($esr) {
1272                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1273                         }
1274
1275                         $numbering="";
1276                         $c++;
1277                         $dot=0;
1278                 }
1279
1280                 if($st) {
1281                         $toclines=$c;
1282                         $toc.=$sk->tocUnindent($toclevel);
1283                         $toc=$sk->tocTable($toc);
1284                 }
1285
1286                 // split up and insert constructed headlines
1287
1288                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1289                 $i=0;
1290
1291                 foreach($blocks as $block) {
1292                         if(($es) && $c>0 && $i==0) {
1293                             # This is the [edit] link that appears for the top block of text when
1294                                 # section editing is enabled
1295                                 $full.=$sk->editSectionLink(0);
1296                         }
1297                         $full.=$block;
1298                         if($st && $toclines>3 && !$i) {
1299                                 # Let's add a top anchor just in case we want to link to the top of the page
1300                                 $full="<a name=\"top\"></a>".$full.$toc;
1301                         }
1302
1303                         $full.=$head[$i];
1304                         $i++;
1305                 }
1306
1307                 return $full;
1308         }
1309
1310         /* private */ function doMagicISBN( &$tokenizer )
1311         {
1312                 global $wgLang;
1313
1314                 # Check whether next token is a text token
1315                 # If yes, fetch it and convert the text into a
1316                 # Special::BookSources link
1317                 $token = $tokenizer->previewToken();
1318                 while ( $token["type"] == "" )
1319                 {
1320                         $tokenizer->nextToken();
1321                         $token = $tokenizer->previewToken();
1322                 }
1323                 if ( $token["type"] == "text" )
1324                 {
1325                         $token = $tokenizer->nextToken();
1326                         $x = $token["text"];
1327                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1328
1329                         $isbn = $blank = "" ;
1330                         while ( " " == $x{0} ) {
1331                                 $blank .= " ";
1332                                 $x = substr( $x, 1 );
1333                         }
1334                         while ( strstr( $valid, $x{0} ) != false ) {
1335                                 $isbn .= $x{0};
1336                                 $x = substr( $x, 1 );
1337                         }
1338                         $num = str_replace( "-", "", $isbn );
1339                         $num = str_replace( " ", "", $num );
1340
1341                         if ( "" == $num ) {
1342                                 $text .= "ISBN $blank$x";
1343                         } else {
1344                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1345                                 $text .= "<a href=\"" .
1346                                 $titleObj->getUrl( "isbn={$num}", false, true ) .
1347                                         "\" class=\"internal\">ISBN $isbn</a>";
1348                                 $text .= $x;
1349                         }
1350                 } else {
1351                         $text = "ISBN ";
1352                 }
1353                 return $text;
1354         }
1355
1356         /* private */ function magicRFC( $text )
1357         {
1358                 return $text;
1359         }
1360
1361         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1362         {
1363                 $this->mOptions = $options;
1364                 $this->mTitle = $title;
1365                 if ( $clearState ) {
1366                         $this->clearState;
1367                 }
1368
1369                 $stripState = false;
1370                 $text = $this->strip( $text, $stripState, false );
1371                 $text = $this->pstPass2( $text, $user );
1372                 $text = $this->unstrip( $text, $stripState );
1373                 return $text;
1374         }
1375
1376         /* private */ function pstPass2( $text, &$user )
1377         {
1378                 global $wgLang, $wgLocaltimezone;
1379
1380                 # Signatures
1381                 #
1382                 $n = $user->getName();
1383                 $k = $user->getOption( "nickname" );
1384                 if ( "" == $k ) { $k = $n; }
1385                 if(isset($wgLocaltimezone)) {
1386                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1387                 }
1388                 /* Note: this is an ugly timezone hack for the European wikis */
1389                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1390                   " (" . date( "T" ) . ")";
1391                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1392
1393                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1394                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1395                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1396                   Namespace::getUser() ) . ":$n|$k]]", $text );
1397
1398                 # Context links: [[|name]] and [[name (context)|]]
1399                 #
1400                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1401                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1402                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1403                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1404
1405                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1406                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1407                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1408                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1409                                                                                                                 # [[ns:page (cont)|]]
1410                 $context = "";
1411                 $t = $this->mTitle->getText();
1412                 if ( preg_match( $conpat, $t, $m ) ) {
1413                         $context = $m[2];
1414                 }
1415                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1416                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1417                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1418
1419                 if ( "" == $context ) {
1420                         $text = preg_replace( $p2, "[[\\1]]", $text );
1421                 } else {
1422                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1423                 }
1424
1425                 # {{SUBST:xxx}} variables
1426                 #
1427                 $mw =& MagicWord::get( MAG_SUBST );
1428                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1429
1430                 # Trim trailing whitespace
1431                 # MAG_END (__END__) tag allows for trailing
1432                 # whitespace to be deliberately included
1433                 $text = rtrim( $text );
1434                 $mw =& MagicWord::get( MAG_END );
1435                 $mw->matchAndRemove( $text );
1436
1437                 return $text;
1438         }
1439
1440
1441 }
1442
1443 class ParserOutput
1444 {
1445         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1446
1447         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1448                 $containsOldMagic = false )
1449         {
1450                 $this->mText = $text;
1451                 $this->mLanguageLinks = $languageLinks;
1452                 $this->mCategoryLinks = $categoryLinks;
1453                 $this->mContainsOldMagic = $containsOldMagic;
1454         }
1455
1456         function getText() { return $this->mText; }
1457         function getLanguageLinks() { return $this->mLanguageLinks; }
1458         function getCategoryLinks() { return $this->mCategoryLinks; }
1459         function containsOldMagic() { return $this->mContainsOldMagic; }
1460         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1461         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1462         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1463         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1464 }
1465
1466 class ParserOptions
1467 {
1468         # All variables are private
1469         var $mUseTeX;                    # Use texvc to expand <math> tags
1470         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1471         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1472         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1473         var $mAllowExternalImages;       # Allow external images inline
1474         var $mSkin;                      # Reference to the preferred skin
1475         var $mDateFormat;                # Date format index
1476         var $mEditSection;               # Create "edit section" links
1477         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1478         var $mPrintable;                 # Generate printable output
1479         var $mNumberHeadings;            # Automatically number headings
1480         var $mShowToc;                   # Show table of contents
1481
1482         function getUseTeX() { return $this->mUseTeX; }
1483         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1484         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1485         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1486         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1487         function getSkin() { return $this->mSkin; }
1488         function getDateFormat() { return $this->mDateFormat; }
1489         function getEditSection() { return $this->mEditSection; }
1490         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1491         function getPrintable() { return $this->mPrintable; }
1492         function getNumberHeadings() { return $this->mNumberHeadings; }
1493         function getShowToc() { return $this->mShowToc; }
1494
1495         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1496         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1497         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1498         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1499         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1500         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1501         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1502         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1503         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1504         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1505         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1506         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1507
1508         /* static */ function newFromUser( &$user )
1509         {
1510                 $popts = new ParserOptions;
1511                 $popts->initialiseFromUser( &$user );
1512                 return $popts;
1513         }
1514
1515         function initialiseFromUser( &$userInput )
1516         {
1517                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1518
1519                 if ( !$userInput ) {
1520                         $user = new User;
1521                 } else {
1522                         $user =& $userInput;
1523                 }
1524
1525                 $this->mUseTeX = $wgUseTeX;
1526                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1527                 $this->mUseDynamicDates = $wgUseDynamicDates;
1528                 $this->mInterwikiMagic = $wgInterwikiMagic;
1529                 $this->mAllowExternalImages = $wgAllowExternalImages;
1530                 $this->mSkin =& $user->getSkin();
1531                 $this->mDateFormat = $user->getOption( "date" );
1532                 $this->mEditSection = $user->getOption( "editsection" );
1533                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1534                 $this->mPrintable = false;
1535                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1536                 $this->mShowToc = $user->getOption( "showtoc" );
1537         }
1538
1539
1540 }
1541
1542 # Regex callbacks, used in OutputPage::replaceVariables
1543
1544 # Just get rid of the dangerous stuff
1545 # Necessary because replaceVariables is called after removeHTMLtags,
1546 # and message text can come from any user
1547 function wfReplaceMsgVar( $matches ) {
1548         global $wgCurOut, $wgLinkCache;
1549         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1550         $wgLinkCache->suspend();
1551         $text = $wgCurOut->replaceInternalLinks( $text );
1552         $wgLinkCache->resume();
1553         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1554         return $text;
1555 }
1556
1557 # Effective <nowiki></nowiki>
1558 # Not real <nowiki> because this is called after nowiki sections are processed
1559 function wfReplaceMsgnwVar( $matches ) {
1560         global $wgCurOut, $wgLinkCache;
1561         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1562         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1563         return $text;
1564 }
1565
1566
1567
1568 ?>