includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 215                         if ( $t != "" ) $t .= ":" ;
 216                         $t .= $x->cur_title ;
 217
 218                         $y = explode ( ":" , $t , 2 ) ;
 219                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 220                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 221                         } else {
 222                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 223                         }
 224                 }
 225                 wfFreeResult ( $res ) ;
 226
 227                 # Children
 228                 if ( count ( $children ) > 0 )
 229                 {
 230                         asort ( $children ) ;
 231                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 232                         $r .= implode ( ", " , $children ) ;
 233                 }
 234
 235                 # Articles
 236                 if ( count ( $articles ) > 0 )
 237                 {
 238                         asort ( $articles ) ;
 239                         $h =  wfMsg( "category_header", $ti[1] );
 240                         $r .= "<h2>{$h}</h2>\n" ;
 241                         $r .= implode ( ", " , $articles ) ;
 242                 }
 243
 244
 245                 return $r ;
 246         }
 247
 248         function getHTMLattrs ()
 249         {
 250                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 251                                 "title", "align", "lang", "dir", "width", "height",
 252                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 253                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 254                                 /* FONT */ "type", "start", "value", "compact",
 255                                 /* For various lists, mostly deprecated but safe */
 256                                 "summary", "width", "border", "frame", "rules",
 257                                 "cellspacing", "cellpadding", "valign", "char",
 258                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 259                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 260                                 "id", "class", "name", "style" /* For CSS */
 261                                 );
 262                 return $htmlattrs ;
 263         }
 264
 265         function fixTagAttributes ( $t )
 266         {
 267                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 268                 $htmlattrs = $this->getHTMLattrs() ;
 269
 270                 # Strip non-approved attributes from the tag
 271                 $t = preg_replace(
 272                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 273                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 274                         $t);
 275                 # Strip javascript "expression" from stylesheets. Brute force approach:
 276                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 277
 278                 if( preg_match(
 279                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 280                         wfMungeToUtf8( $t ) ) )
 281                 {
 282                         $t="";
 283                 }
 284
 285                 return trim ( $t ) ;
 286         }
 287
 288         function doTableStuff ( $t )
 289         {
 290                 $t = explode ( "\n" , $t ) ;
 291                 $td = array () ; # Is currently a td tag open?
 292                         $ltd = array () ; # Was it TD or TH?
 293                         $tr = array () ; # Is currently a tr tag open?
 294                         $ltr = array () ; # tr attributes
 295                         foreach ( $t AS $k => $x )
 296                         {
 297                                 $x = rtrim ( $x ) ;
 298                                 $fc = substr ( $x , 0 , 1 ) ;
 299                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 300                                 {
 301                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 302                                         array_push ( $td , false ) ;
 303                                         array_push ( $ltd , "" ) ;
 304                                         array_push ( $tr , false ) ;
 305                                         array_push ( $ltr , "" ) ;
 306                                 }
 307                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 308                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 309                                 {
 310                                         $z = "</table>\n" ;
 311                                         $l = array_pop ( $ltd ) ;
 312                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 313                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 314                                         array_pop ( $ltr ) ;
 315                                         $t[$k] = $z ;
 316                                 }
 317                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 318                                                 {
 319                                                 $z = trim ( substr ( $x , 2 ) ) ;
 320                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 321                                                 }*/
 322                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 323                                 {
 324                                         $x = substr ( $x , 1 ) ;
 325                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 326                                         $z = "" ;
 327                                         $l = array_pop ( $ltd ) ;
 328                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 329                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 330                                         array_pop ( $ltr ) ;
 331                                         $t[$k] = $z ;
 332                                         array_push ( $tr , false ) ;
 333                                         array_push ( $td , false ) ;
 334                                         array_push ( $ltd , "" ) ;
 335                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 336                                 }
 337                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 338                                 {
 339                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 340                                         {
 341                                                 $fc = "+" ;
 342                                                 $x = substr ( $x , 1 ) ;
 343                                         }
 344                                         $after = substr ( $x , 1 ) ;
 345                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 346                                         $after = explode ( "||" , $after ) ;
 347                                         $t[$k] = "" ;
 348                                         foreach ( $after AS $theline )
 349                                         {
 350                                                 $z = "" ;
 351                                                 if ( $fc != "+" )
 352                                                 {
 353                                                         $tra = array_pop ( $ltr ) ;
 354                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 355                                                         array_push ( $tr , true ) ;
 356                                                         array_push ( $ltr , "" ) ;
 357                                                 }
 358
 359                                                 $l = array_pop ( $ltd ) ;
 360                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 361                                                 if ( $fc == "|" ) $l = "TD" ;
 362                                                 else if ( $fc == "!" ) $l = "TH" ;
 363                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 364                                                 else $l = "" ;
 365                                                 array_push ( $ltd , $l ) ;
 366                                                 $y = explode ( "|" , $theline , 2 ) ;
 367                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 368                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 369                                                 $t[$k] .= $y ;
 370                                                 array_push ( $td , true ) ;
 371                                         }
 372                                 }
 373                         }
 374
 375                 # Closing open td, tr && table
 376                 while ( count ( $td ) > 0 )
 377                 {
 378                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 379                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 380                         $t[] = "</table>" ;
 381                 }
 382
 383                 $t = implode ( "\n" , $t ) ;
 384                 #               $t = $this->removeHTMLtags( $t );
 385                 return $t ;
 386         }
 387
 388         # Well, OK, it's actually about 14 passes.  But since all the
 389         # hard lifting is done inside PHP's regex code, it probably
 390         # wouldn't speed things up much to add a real parser.
 391         #
 392         function doWikiPass2( $text, $linestart )
 393         {
 394                 $fname = "OutputPage::doWikiPass2";
 395                 wfProfileIn( $fname );
 396
 397                 $text = $this->removeHTMLtags( $text );
 398                 $text = $this->replaceVariables( $text );
 399
 400                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 401                 $text = str_replace ( "<HR>", "<hr>", $text );
 402
 403                 $text = $this->doHeadings( $text );
 404                 $text = $this->doBlockLevels( $text, $linestart );
 405
 406                 if($this->mOptions->getUseDynamicDates()) {
 407                         global $wgDateFormatter;
 408                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 409                 }
 410
 411                 $text = $this->replaceExternalLinks( $text );
 412                 $text = $this->replaceInternalLinks ( $text );
 413                 $text = $this->doTableStuff ( $text ) ;
 414
 415                 $text = $this->formatHeadings( $text );
 416
 417                 $sk =& $this->mOptions->getSkin();
 418                 $text = $sk->transformContent( $text );
 419                 $text .= $this->categoryMagic () ;
 420
 421                 wfProfileOut( $fname );
 422                 return $text;
 423         }
 424
 425
 426         /* private */ function doHeadings( $text )
 427         {
 428                 for ( $i = 6; $i >= 1; --$i ) {
 429                         $h = substr( "======", 0, $i );
 430                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 431                           "<h{$i}>\\1</h{$i}>\\2", $text );
 432                 }
 433                 return $text;
 434         }
 435
 436         # Note: we have to do external links before the internal ones,
 437         # and otherwise take great care in the order of things here, so
 438         # that we don't end up interpreting some URLs twice.
 439
 440         /* private */ function replaceExternalLinks( $text )
 441         {
 442                 $fname = "OutputPage::replaceExternalLinks";
 443                 wfProfileIn( $fname );
 444                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 445                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 446                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 447                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 448                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 449                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 450                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 451                 wfProfileOut( $fname );
 452                 return $text;
 453         }
 454
 455         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 456         {
 457                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 458                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 459
 460                 # this is  the list of separators that should be ignored if they
 461                 # are the last character of an URL but that should be included
 462                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 463                 # in this case, the last comma should not become part of the URL,
 464                 # but in "www.foo.com/123,2342,32.htm" it should.
 465                 $sep = ",;\.:";
 466                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 467                 $images = "gif|png|jpg|jpeg";
 468
 469                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 470                 # they are interpreted as part of the string (used to tell PHP
 471                 # that the content of the string should be inserted there).
 472                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 473                   "((?i){$images})([^{$uc}]|$)/";
 474
 475                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 476                 $sk =& $this->mOptions->getSkin();
 477
 478                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 479                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 480                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 481                 }
 482                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 483                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 484                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 485                   "</a>\\5", $s );
 486                 $s = str_replace( $unique, $protocol, $s );
 487
 488                 $a = explode( "[{$protocol}:", " " . $s );
 489                 $s = array_shift( $a );
 490                 $s = substr( $s, 1 );
 491
 492                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 493                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 494
 495                 foreach ( $a as $line ) {
 496                         if ( preg_match( $e1, $line, $m ) ) {
 497                                 $link = "{$protocol}:{$m[1]}";
 498                                 $trail = $m[2];
 499                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 500                                 else { $text = wfEscapeHTML( $link ); }
 501                         } else if ( preg_match( $e2, $line, $m ) ) {
 502                                 $link = "{$protocol}:{$m[1]}";
 503                                 $text = $m[2];
 504                                 $trail = $m[3];
 505                         } else {
 506                                 $s .= "[{$protocol}:" . $line;
 507                                 continue;
 508                         }
 509                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 510                         else $paren = "";
 511                         $la = $sk->getExternalLinkAttributes( $link, $text );
 512                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 513
 514                 }
 515                 return $s;
 516         }
 517
 518         /* private */ function handle3Quotes( &$state, $token )
 519         {
 520                 if ( $state["strong"] ) {
 521                         if ( $state["em"] && $state["em"] > $state["strong"] )
 522                         {
 523                                 # ''' lala ''lala '''
 524                                 $s = "</em></strong><em>";
 525                         } else {
 526                                 $s = "</strong>";
 527                         }
 528                         $state["strong"] = FALSE;
 529                 } else {
 530                         $s = "<strong>";
 531                         $state["strong"] = $token["pos"];
 532                 }
 533                 return $s;
 534         }
 535
 536         /* private */ function handle2Quotes( &$state, $token )
 537         {
 538                 if ( $state["em"] ) {
 539                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 540                         {
 541                                 # ''lala'''lala'' ....'''
 542                                 $s = "</strong></em><strong>";
 543                         } else {
 544                                 $s = "</em>";
 545                         }
 546                         $state["em"] = FALSE;
 547                 } else {
 548                         $s = "<em>";
 549                         $state["em"] = $token["pos"];
 550                 }
 551                 return $s;
 552         }
 553
 554         /* private */ function handle5Quotes( &$state, $token )
 555         {
 556                 if ( $state["em"] && $state["strong"] ) {
 557                         if ( $state["em"] < $state["strong"] ) {
 558                                 $s .= "</strong></em>";
 559                         } else {
 560                                 $s .= "</em></strong>";
 561                         }
 562                         $state["strong"] = $state["em"] = FALSE;
 563                 } elseif ( $state["em"] ) {
 564                         $s .= "</em><strong>";
 565                         $state["em"] = FALSE;
 566                         $state["strong"] = $token["pos"];
 567                 } elseif ( $state["strong"] ) {
 568                         $s .= "</strong><em>";
 569                         $state["strong"] = FALSE;
 570                         $state["em"] = $token["pos"];
 571                 } else { # not $em and not $strong
 572                         $s .= "<strong><em>";
 573                         $state["strong"] = $state["em"] = $token["pos"];
 574                 }
 575                 return $s;
 576         }
 577
 578         /* private */ function replaceInternalLinks( $str )
 579         {
 580                 global $wgLang; # for language specific parser hook
 581
 582                 $tokenizer=Tokenizer::newFromString( $str );
 583                 $tokenStack = array();
 584
 585                 $s="";
 586                 $state["em"]      = FALSE;
 587                 $state["strong"]  = FALSE;
 588                 $tagIsOpen = FALSE;
 589
 590                 # The tokenizer splits the text into tokens and returns them one by one.
 591                 # Every call to the tokenizer returns a new token.
 592                 while ( $token = $tokenizer->nextToken() )
 593                 {
 594                         switch ( $token["type"] )
 595                         {
 596                                 case "text":
 597                                         # simple text with no further markup
 598                                         $txt = $token["text"];
 599                                         break;
 600                                 case "[[":
 601                                         # link opening tag.
 602                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 603                                         $tagIsOpen = TRUE;
 604                                         array_push( $tokenStack, $token );
 605                                         $txt="";
 606                                         break;
 607                                 case "]]":
 608                                         # link close tag.
 609                                         # get text from stack, glue it together, and call the code to handle a
 610                                         # link
 611                                         if ( count( $tokenStack ) == 0 )
 612                                         {
 613                                                 # stack empty. Found a ]] without an opening [[
 614                                                 $txt = "]]";
 615                                         } else {
 616                                                 $linkText = "";
 617                                                 $lastToken = array_pop( $tokenStack );
 618                                                 while ( $lastToken["type"] != "[[" )
 619                                                 {
 620                                                         if( !empty( $lastToken["text"] ) ) {
 621                                                                 $linkText = $lastToken["text"] . $linkText;
 622                                                         }
 623                                                         $lastToken = array_pop( $tokenStack );
 624                                                 }
 625                                                 $txt = $linkText ."]]";
 626                                                 if( isset( $lastToken["text"] ) ) {
 627                                                         $prefix = $lastToken["text"];
 628                                                 } else {
 629                                                         $prefix = "";
 630                                                 }
 631                                                 $nextToken = $tokenizer->previewToken();
 632                                                 if ( $nextToken["type"] == "text" )
 633                                                 {
 634                                                         # Preview just looks at it. Now we have to fetch it.
 635                                                         $nextToken = $tokenizer->nextToken();
 636                                                         $txt .= $nextToken["text"];
 637                                                 }
 638                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 639                                         }
 640                                         $tagIsOpen = (count( $tokenStack ) != 0);
 641                                         break;
 642                                 case "----":
 643                                         $txt = "\n<hr>\n";
 644                                         break;
 645                                 case "'''":
 646                                         # This and the three next ones handle quotes
 647                                         $txt = $this->handle3Quotes( $state, $token );
 648                                         break;
 649                                 case "''":
 650                                         $txt = $this->handle2Quotes( $state, $token );
 651                                         break;
 652                                 case "'''''":
 653                                         $txt = $this->handle5Quotes( $state, $token );
 654                                         break;
 655                                 case "":
 656                                         # empty token
 657                                         $txt="";
 658                                         break;
 659                                 case "RFC ":
 660                                         if ( $tagIsOpen ) {
 661                                                 $txt = "RFC ";
 662                                         } else {
 663                                                 $txt = $this->doMagicRFC( $tokenizer );
 664                                         }
 665                                         break;
 666                                 case "ISBN ":
 667                                         if ( $tagIsOpen ) {
 668                                                 $txt = "ISBN ";
 669                                         } else {
 670                                                 $txt = $this->doMagicISBN( $tokenizer );
 671                                         }
 672                                         break;
 673                                 default:
 674                                         # Call language specific Hook.
 675                                         $txt = $wgLang->processToken( $token, $tokenStack );
 676                                         if ( NULL == $txt ) {
 677                                                 # An unkown token. Highlight.
 678                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 679                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 680                                         }
 681                                         break;
 682                         }
 683                         # If we're parsing the interior of a link, don't append the interior to $s,
 684                         # but push it to the stack so it can be processed when a ]] token is found.
 685                         if ( $tagIsOpen  && $txt != "" ) {
 686                                 $token["type"] = "text";
 687                                 $token["text"] = $txt;
 688                                 array_push( $tokenStack, $token );
 689                         } else {
 690                                 $s .= $txt;
 691                         }
 692                 } #end while
 693                 if ( count( $tokenStack ) != 0 )
 694                 {
 695                         # still objects on stack. opened [[ tag without closing ]] tag.
 696                         $txt = "";
 697                         while ( $lastToken = array_pop( $tokenStack ) )
 698                         {
 699                                 if ( $lastToken["type"] == "text" )
 700                                 {
 701                                         $txt = $lastToken["text"] . $txt;
 702                                 } else {
 703                                         $txt = $lastToken["type"] . $txt;
 704                                 }
 705                         }
 706                         $s .= $txt;
 707                 }
 708                 return $s;
 709         }
 710
 711         /* private */ function handleInternalLink( $line, $prefix )
 712         {
 713                 global $wgLang, $wgLinkCache;
 714                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 715                 static $fname = "OutputPage::replaceInternalLinks" ;
 716                 wfProfileIn( $fname );
 717
 718                 wfProfileIn( "$fname-setup" );
 719                 static $tc = FALSE;
 720                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 721                 $sk =& $this->mOptions->getSkin();
 722
 723                 # Match a link having the form [[namespace:link|alternate]]trail
 724                 static $e1 = FALSE;
 725                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 726                 # Match the end of a line for a word that's not followed by whitespace,
 727                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 728                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 729                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 730                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 731
 732
 733                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 734                 static $image = FALSE;
 735                 static $special = FALSE;
 736                 static $media = FALSE;
 737                 static $category = FALSE;
 738                 if ( !$image ) { $image = Namespace::getImage(); }
 739                 if ( !$special ) { $special = Namespace::getSpecial(); }
 740                 if ( !$media ) { $media = Namespace::getMedia(); }
 741                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 742
 743                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 744
 745                 wfProfileOut( "$fname-setup" );
 746                 $s = "";
 747
 748                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 749                         $text = $m[2];
 750                         $trail = $m[3];
 751                 } else { # Invalid form; output directly
 752                         $s .= $prefix . "[[" . $line ;
 753                         return $s;
 754                 }
 755
 756                 /* Valid link forms:
 757                 Foobar -- normal
 758                 :Foobar -- override special treatment of prefix (images, language links)
 759                 /Foobar -- convert to CurrentPage/Foobar
 760                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 761                 */
 762                 $c = substr($m[1],0,1);
 763                 $noforce = ($c != ":");
 764                 if( $c == "/" ) { # subpage
 765                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 766                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 767                                 $noslash=$m[1];
 768                         } else {
 769                                 $noslash=substr($m[1],1);
 770                         }
 771                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 772                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 773                                 if( "" == $text ) {
 774                                         $text= $m[1];
 775                                 } # this might be changed for ugliness reasons
 776                         } else {
 777                                 $link = $noslash; # no subpage allowed, use standard link
 778                         }
 779                 } elseif( $noforce ) { # no subpage
 780                         $link = $m[1];
 781                 } else {
 782                         $link = substr( $m[1], 1 );
 783                 }
 784                 if( "" == $text )
 785                         $text = $link;
 786
 787                 $nt = Title::newFromText( $link );
 788                 if( !$nt ) {
 789                         $s .= $prefix . "[[" . $line;
 790                         return $s;
 791                 }
 792                 $ns = $nt->getNamespace();
 793                 $iw = $nt->getInterWiki();
 794                 if( $noforce ) {
 795                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 796                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 797                                 $s .= $prefix . $trail;
 798                                 return $s;
 799                         }
 800                         if( $ns == $image ) {
 801                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 802                                 $wgLinkCache->addImageLinkObj( $nt );
 803                                 return $s;
 804                         }
 805                 }
 806                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 807                     ( strpos( $link, "#" ) == FALSE ) ) {
 808                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 809                         return $s;
 810                 }
 811                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 812                         $t = explode ( ":" , $nt->getText() ) ;
 813                         array_shift ( $t ) ;
 814                         $t = implode ( ":" , $t ) ;
 815                         $t = $wgLang->ucFirst ( $t ) ;
 816 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 817                         $nnt = Title::newFromText ( $category.":".$t ) ;
 818                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 819                         $this->mCategoryLinks[] = $t ;
 820                         $s .= $prefix . $trail ;
 821                         return $s ;
 822                 }
 823                 if( $ns == $media ) {
 824                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 825                         $wgLinkCache->addImageLinkObj( $nt );
 826                         return $s;
 827                 } elseif( $ns == $special ) {
 828                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 829                         return $s;
 830                 }
 831                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 832
 833                 wfProfileOut( $fname );
 834                 return $s;
 835         }
 836
 837         # Some functions here used by doBlockLevels()
 838         #
 839         /* private */ function closeParagraph()
 840         {
 841                 $result = "";
 842                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 843                   0 != strcmp( "", $this->mLastSection ) ) {
 844                         $result = "</" . $this->mLastSection  . ">";
 845                 }
 846                 $this->mLastSection = "";
 847                 return $result."\n";
 848         }
 849         # getCommon() returns the length of the longest common substring
 850         # of both arguments, starting at the beginning of both.
 851         #
 852         /* private */ function getCommon( $st1, $st2 )
 853         {
 854                 $fl = strlen( $st1 );
 855                 $shorter = strlen( $st2 );
 856                 if ( $fl < $shorter ) { $shorter = $fl; }
 857
 858                 for ( $i = 0; $i < $shorter; ++$i ) {
 859                         if ( $st1{$i} != $st2{$i} ) { break; }
 860                 }
 861                 return $i;
 862         }
 863         # These next three functions open, continue, and close the list
 864         # element appropriate to the prefix character passed into them.
 865         #
 866         /* private */ function openList( $char )
 867     {
 868                 $result = $this->closeParagraph();
 869
 870                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 871                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 872                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 873                 else if ( ";" == $char ) {
 874                         $result .= "<dl><dt>";
 875                         $this->mDTopen = true;
 876                 }
 877                 else { $result = "<!-- ERR 1 -->"; }
 878
 879                 return $result;
 880         }
 881
 882         /* private */ function nextItem( $char )
 883         {
 884                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 885                 else if ( ":" == $char || ";" == $char ) {
 886                         $close = "</dd>";
 887                         if ( $this->mDTopen ) { $close = "</dt>"; }
 888                         if ( ";" == $char ) {
 889                                 $this->mDTopen = true;
 890                                 return $close . "<dt>";
 891                         } else {
 892                                 $this->mDTopen = false;
 893                                 return $close . "<dd>";
 894                         }
 895                 }
 896                 return "<!-- ERR 2 -->";
 897         }
 898
 899         /* private */function closeList( $char )
 900         {
 901                 if ( "*" == $char ) { $text = "</li></ul>"; }
 902                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 903                 else if ( ":" == $char ) {
 904                         if ( $this->mDTopen ) {
 905                                 $this->mDTopen = false;
 906                                 $text = "</dt></dl>";
 907                         } else {
 908                                 $text = "</dd></dl>";
 909                         }
 910                 }
 911                 else {  return "<!-- ERR 3 -->"; }
 912                 return $text."\n";
 913         }
 914
 915         /* private */ function doBlockLevels( $text, $linestart )
 916         {
 917                 $fname = "OutputPage::doBlockLevels";
 918                 wfProfileIn( $fname );
 919                 # Parsing through the text line by line.  The main thing
 920                 # happening here is handling of block-level elements p, pre,
 921                 # and making lists from lines starting with * # : etc.
 922                 #
 923                 $a = explode( "\n", $text );
 924                 $text = $lastPref = "";
 925                 $this->mDTopen = $inBlockElem = false;
 926
 927                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 928                 foreach ( $a as $t ) {
 929                         if ( "" != $text ) { $text .= "\n"; }
 930
 931                         $oLine = $t;
 932                         $opl = strlen( $lastPref );
 933                         $npl = strspn( $t, "*#:;" );
 934                         $pref = substr( $t, 0, $npl );
 935                         $pref2 = str_replace( ";", ":", $pref );
 936                         $t = substr( $t, $npl );
 937
 938                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 939                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 940
 941                                 if ( ";" == substr( $pref, -1 ) ) {
 942                                         $cpos = strpos( $t, ":" );
 943                                         if ( ! ( false === $cpos ) ) {
 944                                                 $term = substr( $t, 0, $cpos );
 945                                                 $text .= $term . $this->nextItem( ":" );
 946                                                 $t = substr( $t, $cpos + 1 );
 947                                         }
 948                                 }
 949                         } else if (0 != $npl || 0 != $opl) {
 950                                 $cpl = $this->getCommon( $pref, $lastPref );
 951
 952                                 while ( $cpl < $opl ) {
 953                                         $text .= $this->closeList( $lastPref{$opl-1} );
 954                                         --$opl;
 955                                 }
 956                                 if ( $npl <= $cpl && $cpl > 0 ) {
 957                                         $text .= $this->nextItem( $pref{$cpl-1} );
 958                                 }
 959                                 while ( $npl > $cpl ) {
 960                                         $char = substr( $pref, $cpl, 1 );
 961                                         $text .= $this->openList( $char );
 962
 963                                         if ( ";" == $char ) {
 964                                                 $cpos = strpos( $t, ":" );
 965                                                 if ( ! ( false === $cpos ) ) {
 966                                                         $term = substr( $t, 0, $cpos );
 967                                                         $text .= $term . $this->nextItem( ":" );
 968                                                         $t = substr( $t, $cpos + 1 );
 969                                                 }
 970                                         }
 971                                         ++$cpl;
 972                                 }
 973                                 $lastPref = $pref2;
 974                         }
 975                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 976                                 if ( preg_match(
 977                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 978                                         $text .= $this->closeParagraph();
 979                                         $inBlockElem = true;
 980                                 }
 981                                 if ( ! $inBlockElem ) {
 982                                         if ( " " == $t{0} ) {
 983                                                 $newSection = "pre";
 984                                                 # $t = wfEscapeHTML( $t );
 985                                         }
 986                                         else { $newSection = "p"; }
 987
 988                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 989                                                 $text .= $this->closeParagraph();
 990                                                 $text .= "<" . $newSection . ">";
 991                                         } else if ( 0 != strcmp( $this->mLastSection,
 992                                           $newSection ) ) {
 993                                                 $text .= $this->closeParagraph();
 994                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 995                                                         $text .= "<" . $newSection . ">";
 996                                                 }
 997                                         }
 998                                         $this->mLastSection = $newSection;
 999                                 }
1000                                 if ( $inBlockElem &&
1001                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1002                                         $inBlockElem = false;
1003                                 }
1004                         }
1005                         $text .= $t;
1006                 }
1007                 while ( $npl ) {
1008                         $text .= $this->closeList( $pref2{$npl-1} );
1009                         --$npl;
1010                 }
1011                 if ( "" != $this->mLastSection ) {
1012                         if ( "p" != $this->mLastSection ) {
1013                                 $text .= "</" . $this->mLastSection . ">";
1014                         }
1015                         $this->mLastSection = "";
1016                 }
1017                 wfProfileOut( $fname );
1018                 return $text;
1019         }
1020
1021         /* private */ function replaceVariables( $text )
1022         {
1023                 global $wgLang, $wgCurOut;
1024                 $fname = "OutputPage::replaceVariables";
1025                 wfProfileIn( $fname );
1026
1027                 $magic = array();
1028
1029                 # Basic variables
1030                 # See Language.php for the definition of each magic word
1031                 # As with sigs, this uses the server's local time -- ensure
1032                 # this is appropriate for your audience!
1033
1034                 $magic[MAG_CURRENTMONTH] = date( "m" );
1035                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1036                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1037                 $magic[MAG_CURRENTDAY] = date("j");
1038                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1039                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1040                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1041
1042                 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1043
1044                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1045                 if ( $mw->match( $text ) ) {
1046                         $v = wfNumberOfArticles();
1047                         $text = $mw->replace( $v, $text );
1048                         if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1049                 }
1050
1051                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1052                 # The callbacks are at the bottom of this file
1053                 $wgCurOut = $this;
1054                 $mw =& MagicWord::get( MAG_MSG );
1055                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1056                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1057
1058                 $mw =& MagicWord::get( MAG_MSGNW );
1059                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1060                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1061
1062                 wfProfileOut( $fname );
1063                 return $text;
1064         }
1065
1066         # Cleans up HTML, removes dangerous tags and attributes
1067         /* private */ function removeHTMLtags( $text )
1068         {
1069                 $fname = "OutputPage::removeHTMLtags";
1070                 wfProfileIn( $fname );
1071                 $htmlpairs = array( # Tags that must be closed
1072                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1073                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1074                         "strike", "strong", "tt", "var", "div", "center",
1075                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1076                         "ruby", "rt" , "rb" , "rp"
1077                 );
1078                 $htmlsingle = array(
1079                         "br", "p", "hr", "li", "dt", "dd"
1080                 );
1081                 $htmlnest = array( # Tags that can be nested--??
1082                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1083                         "dl", "font", "big", "small", "sub", "sup"
1084                 );
1085                 $tabletags = array( # Can only appear inside table
1086                         "td", "th", "tr"
1087                 );
1088
1089                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1090                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1091
1092                 $htmlattrs = $this->getHTMLattrs () ;
1093
1094                 # Remove HTML comments
1095                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1096
1097                 $bits = explode( "<", $text );
1098                 $text = array_shift( $bits );
1099                 $tagstack = array(); $tablestack = array();
1100
1101                 foreach ( $bits as $x ) {
1102                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1103                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1104                           $x, $regs );
1105                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1106                         error_reporting( $prev );
1107
1108                         $badtag = 0 ;
1109                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1110                                 # Check our stack
1111                                 if ( $slash ) {
1112                                         # Closing a tag...
1113                                         if ( ! in_array( $t, $htmlsingle ) &&
1114                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1115                                                 array_push( $tagstack, $ot );
1116                                                 $badtag = 1;
1117                                         } else {
1118                                                 if ( $t == "table" ) {
1119                                                         $tagstack = array_pop( $tablestack );
1120                                                 }
1121                                                 $newparams = "";
1122                                         }
1123                                 } else {
1124                                         # Keep track for later
1125                                         if ( in_array( $t, $tabletags ) &&
1126                                           ! in_array( "table", $tagstack ) ) {
1127                                                 $badtag = 1;
1128                                         } else if ( in_array( $t, $tagstack ) &&
1129                                           ! in_array ( $t , $htmlnest ) ) {
1130                                                 $badtag = 1 ;
1131                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1132                                                 if ( $t == "table" ) {
1133                                                         array_push( $tablestack, $tagstack );
1134                                                         $tagstack = array();
1135                                                 }
1136                                                 array_push( $tagstack, $t );
1137                                         }
1138                                         # Strip non-approved attributes from the tag
1139                                         $newparams = $this->fixTagAttributes($params);
1140
1141                                 }
1142                                 if ( ! $badtag ) {
1143                                         $rest = str_replace( ">", "&gt;", $rest );
1144                                         $text .= "<$slash$t $newparams$brace$rest";
1145                                         continue;
1146                                 }
1147                         }
1148                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1149                 }
1150                 # Close off any remaining tags
1151                 while ( $t = array_pop( $tagstack ) ) {
1152                         $text .= "</$t>\n";
1153                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1154                 }
1155                 wfProfileOut( $fname );
1156                 return $text;
1157         }
1158
1159 /*
1160  *
1161  * This function accomplishes several tasks:
1162  * 1) Auto-number headings if that option is enabled
1163  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1164  * 3) Add a Table of contents on the top for users who have enabled the option
1165  * 4) Auto-anchor headings
1166  *
1167  * It loops through all headlines, collects the necessary data, then splits up the
1168  * string and re-inserts the newly formatted headlines.
1169  *
1170  * */
1171         /* private */ function formatHeadings( $text )
1172         {
1173                 $nh=$this->mOptions->getNumberHeadings();
1174                 $st=$this->mOptions->getShowToc();
1175                 if(!$this->mTitle->userCanEdit()) {
1176                         $es=0;
1177                         $esr=0;
1178                 } else {
1179                         $es=$this->mOptions->getEditSection();
1180                         $esr=$this->mOptions->getEditSectionOnRightClick();
1181                 }
1182
1183                 # Inhibit editsection links if requested in the page
1184                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1185                 if ($esw->matchAndRemove( $text )) {
1186                         $es=0;
1187                 }
1188                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1189                 # do not add TOC
1190                 $mw =& MagicWord::get( MAG_NOTOC );
1191                 if ($mw->matchAndRemove( $text ))
1192                 {
1193                         $st = 0;
1194                 }
1195
1196                 # never add the TOC to the Main Page. This is an entry page that should not
1197                 # be more than 1-2 screens large anyway
1198                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1199
1200                 # We need this to perform operations on the HTML
1201                 $sk =& $this->mOptions->getSkin();
1202
1203                 # Get all headlines for numbering them and adding funky stuff like [edit]
1204                 # links
1205                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1206
1207                 # headline counter
1208                 $c=0;
1209
1210                 # Ugh .. the TOC should have neat indentation levels which can be
1211                 # passed to the skin functions. These are determined here
1212                 $toclevel = 0;
1213                 $toc = "";
1214                 $full = "";
1215                 $head = array();
1216                 foreach($matches[3] as $headline) {
1217                         if($level) { $prevlevel=$level;}
1218                         $level=$matches[1][$c];
1219                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1220
1221                                 $h[$level]=0; // reset when we enter a new level
1222                                 $toc.=$sk->tocIndent($level-$prevlevel);
1223                                 $toclevel+=$level-$prevlevel;
1224
1225                         }
1226                         if(($nh||$st) && $level<$prevlevel) {
1227                                 $h[$level+1]=0; // reset when we step back a level
1228                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1229                                 $toclevel-=$prevlevel-$level;
1230
1231                         }
1232                         $h[$level]++; // count number of headlines for each level
1233
1234                         if($nh||$st) {
1235                                 for($i=1;$i<=$level;$i++) {
1236                                         if($h[$i]) {
1237                                                 if($dot) {$numbering.=".";}
1238                                                 $numbering.=$h[$i];
1239                                                 $dot=1;
1240                                         }
1241                                 }
1242                         }
1243
1244                         // The canonized header is a version of the header text safe to use for links
1245                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1246                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1247                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1248                         $tocline = trim( $canonized_headline );
1249                         $canonized_headline=str_replace('"',"",$canonized_headline);
1250                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1251                         $refer[$c]=$canonized_headline;
1252                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1253                         $refcount[$c]=$refers[$canonized_headline];
1254
1255             // Prepend the number to the heading text
1256
1257                         if($nh||$st) {
1258                                 $tocline=$numbering ." ". $tocline;
1259
1260                                 // Don't number the heading if it is the only one (looks silly)
1261                                 if($nh && count($matches[3]) > 1) {
1262                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1263                                 }
1264                         }
1265
1266                         // Create the anchor for linking from the TOC to the section
1267                         $anchor=$canonized_headline;
1268                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1269                         if($st) {
1270                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1271                         }
1272                         if($es) {
1273                                 $head[$c].=$sk->editSectionLink($c+1);
1274                         }
1275
1276                         // Put it all together
1277
1278                         $head[$c].="<h".$level.$matches[2][$c]
1279                          ."<a name=\"".$anchor."\">"
1280                          .$headline
1281                          ."</a>"
1282                          ."</h".$level.">";
1283
1284                         // Add the edit section link
1285
1286                         if($esr) {
1287                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1288                         }
1289
1290                         $numbering="";
1291                         $c++;
1292                         $dot=0;
1293                 }
1294
1295                 if($st) {
1296                         $toclines=$c;
1297                         $toc.=$sk->tocUnindent($toclevel);
1298                         $toc=$sk->tocTable($toc);
1299                 }
1300
1301                 // split up and insert constructed headlines
1302
1303                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1304                 $i=0;
1305
1306                 foreach($blocks as $block) {
1307                         if(($es) && $c>0 && $i==0) {
1308                             # This is the [edit] link that appears for the top block of text when
1309                                 # section editing is enabled
1310                                 $full.=$sk->editSectionLink(0);
1311                         }
1312                         $full.=$block;
1313                         if($st && $toclines>3 && !$i) {
1314                                 # Let's add a top anchor just in case we want to link to the top of the page
1315                                 $full="<a name=\"top\"></a>".$full.$toc;
1316                         }
1317
1318                         if( !empty( $head[$i] ) ) {
1319                                 $full .= $head[$i];
1320                         }
1321                         $i++;
1322                 }
1323
1324                 return $full;
1325         }
1326
1327         /* private */ function doMagicISBN( &$tokenizer )
1328         {
1329                 global $wgLang;
1330
1331                 # Check whether next token is a text token
1332                 # If yes, fetch it and convert the text into a
1333                 # Special::BookSources link
1334                 $token = $tokenizer->previewToken();
1335                 while ( $token["type"] == "" )
1336                 {
1337                         $tokenizer->nextToken();
1338                         $token = $tokenizer->previewToken();
1339                 }
1340                 if ( $token["type"] == "text" )
1341                 {
1342                         $token = $tokenizer->nextToken();
1343                         $x = $token["text"];
1344                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1345
1346                         $isbn = $blank = "" ;
1347                         while ( " " == $x{0} ) {
1348                                 $blank .= " ";
1349                                 $x = substr( $x, 1 );
1350                         }
1351                         while ( strstr( $valid, $x{0} ) != false ) {
1352                                 $isbn .= $x{0};
1353                                 $x = substr( $x, 1 );
1354                         }
1355                         $num = str_replace( "-", "", $isbn );
1356                         $num = str_replace( " ", "", $num );
1357
1358                         if ( "" == $num ) {
1359                                 $text = "ISBN $blank$x";
1360                         } else {
1361                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1362                                 $text = "<a href=\"" .
1363                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1364                                         "\" class=\"internal\">ISBN $isbn</a>";
1365                                 $text .= $x;
1366                         }
1367                 } else {
1368                         $text = "ISBN ";
1369                 }
1370                 return $text;
1371         }
1372         /* private */ function doMagicRFC( &$tokenizer )
1373         {
1374                 global $wgLang;
1375
1376                 # Check whether next token is a text token
1377                 # If yes, fetch it and convert the text into a
1378                 # link to an RFC source
1379                 $token = $tokenizer->previewToken();
1380                 while ( $token["type"] == "" )
1381                 {
1382                         $tokenizer->nextToken();
1383                         $token = $tokenizer->previewToken();
1384                 }
1385                 if ( $token["type"] == "text" )
1386                 {
1387                         $token = $tokenizer->nextToken();
1388                         $x = $token["text"];
1389                         $valid = "0123456789";
1390
1391                         $rfc = $blank = "" ;
1392                         while ( " " == $x{0} ) {
1393                                 $blank .= " ";
1394                                 $x = substr( $x, 1 );
1395                         }
1396                         while ( strstr( $valid, $x{0} ) != false ) {
1397                                 $rfc .= $x{0};
1398                                 $x = substr( $x, 1 );
1399                         }
1400
1401                         if ( "" == $rfc ) {
1402                                 $text .= "RFC $blank$x";
1403                         } else {
1404                                 $url = wfmsg( "rfcurl" );
1405                                 $url = str_replace( "$1", $rfc, $url);
1406                                 $sk =& $this->mOptions->getSkin();
1407                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1408                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1409                         }
1410                 } else {
1411                         $text = "RFC ";
1412                 }
1413                 return $text;
1414         }
1415
1416         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1417         {
1418                 $this->mOptions = $options;
1419                 $this->mTitle = $title;
1420                 if ( $clearState ) {
1421                         $this->clearState();
1422                 }
1423
1424                 $stripState = false;
1425                 $text = $this->strip( $text, $stripState, false );
1426                 $text = $this->pstPass2( $text, $user );
1427                 $text = $this->unstrip( $text, $stripState );
1428                 return $text;
1429         }
1430
1431         /* private */ function pstPass2( $text, &$user )
1432         {
1433                 global $wgLang, $wgLocaltimezone;
1434
1435                 # Signatures
1436                 #
1437                 $n = $user->getName();
1438                 $k = $user->getOption( "nickname" );
1439                 if ( "" == $k ) { $k = $n; }
1440                 if(isset($wgLocaltimezone)) {
1441                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1442                 }
1443                 /* Note: this is an ugly timezone hack for the European wikis */
1444                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1445                   " (" . date( "T" ) . ")";
1446                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1447
1448                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1449                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1450                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1451                   Namespace::getUser() ) . ":$n|$k]]", $text );
1452
1453                 # Context links: [[|name]] and [[name (context)|]]
1454                 #
1455                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1456                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1457                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1458                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1459
1460                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1461                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1462                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1463                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1464                                                                                                                 # [[ns:page (cont)|]]
1465                 $context = "";
1466                 $t = $this->mTitle->getText();
1467                 if ( preg_match( $conpat, $t, $m ) ) {
1468                         $context = $m[2];
1469                 }
1470                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1471                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1472                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1473
1474                 if ( "" == $context ) {
1475                         $text = preg_replace( $p2, "[[\\1]]", $text );
1476                 } else {
1477                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1478                 }
1479
1480                 # {{SUBST:xxx}} variables
1481                 #
1482                 $mw =& MagicWord::get( MAG_SUBST );
1483                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1484
1485                 # Trim trailing whitespace
1486                 # MAG_END (__END__) tag allows for trailing
1487                 # whitespace to be deliberately included
1488                 $text = rtrim( $text );
1489                 $mw =& MagicWord::get( MAG_END );
1490                 $mw->matchAndRemove( $text );
1491
1492                 return $text;
1493         }
1494
1495
1496 }
1497
1498 class ParserOutput
1499 {
1500         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1501
1502         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1503                 $containsOldMagic = false )
1504         {
1505                 $this->mText = $text;
1506                 $this->mLanguageLinks = $languageLinks;
1507                 $this->mCategoryLinks = $categoryLinks;
1508                 $this->mContainsOldMagic = $containsOldMagic;
1509         }
1510
1511         function getText() { return $this->mText; }
1512         function getLanguageLinks() { return $this->mLanguageLinks; }
1513         function getCategoryLinks() { return $this->mCategoryLinks; }
1514         function containsOldMagic() { return $this->mContainsOldMagic; }
1515         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1516         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1517         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1518         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1519 }
1520
1521 class ParserOptions
1522 {
1523         # All variables are private
1524         var $mUseTeX;                    # Use texvc to expand <math> tags
1525         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1526         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1527         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1528         var $mAllowExternalImages;       # Allow external images inline
1529         var $mSkin;                      # Reference to the preferred skin
1530         var $mDateFormat;                # Date format index
1531         var $mEditSection;               # Create "edit section" links
1532         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1533         var $mPrintable;                 # Generate printable output
1534         var $mNumberHeadings;            # Automatically number headings
1535         var $mShowToc;                   # Show table of contents
1536
1537         function getUseTeX() { return $this->mUseTeX; }
1538         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1539         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1540         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1541         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1542         function getSkin() { return $this->mSkin; }
1543         function getDateFormat() { return $this->mDateFormat; }
1544         function getEditSection() { return $this->mEditSection; }
1545         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1546         function getPrintable() { return $this->mPrintable; }
1547         function getNumberHeadings() { return $this->mNumberHeadings; }
1548         function getShowToc() { return $this->mShowToc; }
1549
1550         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1551         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1552         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1553         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1554         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1555         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1556         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1557         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1558         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1559         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1560         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1561         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1562
1563         /* static */ function newFromUser( &$user )
1564         {
1565                 $popts = new ParserOptions;
1566                 $popts->initialiseFromUser( &$user );
1567                 return $popts;
1568         }
1569
1570         function initialiseFromUser( &$userInput )
1571         {
1572                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1573
1574                 if ( !$userInput ) {
1575                         $user = new User;
1576                 } else {
1577                         $user =& $userInput;
1578                 }
1579
1580                 $this->mUseTeX = $wgUseTeX;
1581                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1582                 $this->mUseDynamicDates = $wgUseDynamicDates;
1583                 $this->mInterwikiMagic = $wgInterwikiMagic;
1584                 $this->mAllowExternalImages = $wgAllowExternalImages;
1585                 $this->mSkin =& $user->getSkin();
1586                 $this->mDateFormat = $user->getOption( "date" );
1587                 $this->mEditSection = $user->getOption( "editsection" );
1588                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1589                 $this->mPrintable = false;
1590                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1591                 $this->mShowToc = $user->getOption( "showtoc" );
1592         }
1593
1594
1595 }
1596
1597 # Regex callbacks, used in OutputPage::replaceVariables
1598
1599 # Just get rid of the dangerous stuff
1600 # Necessary because replaceVariables is called after removeHTMLtags,
1601 # and message text can come from any user
1602 function wfReplaceMsgVar( $matches ) {
1603         global $wgCurOut, $wgLinkCache;
1604         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1605         $wgLinkCache->suspend();
1606         $text = $wgCurOut->replaceInternalLinks( $text );
1607         $wgLinkCache->resume();
1608         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1609         return $text;
1610 }
1611
1612 # Effective <nowiki></nowiki>
1613 # Not real <nowiki> because this is called after nowiki sections are processed
1614 function wfReplaceMsgnwVar( $matches ) {
1615         global $wgCurOut, $wgLinkCache;
1616         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1617         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1618         return $text;
1619 }
1620
1621
1622
1623 ?>