includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 215                         if ( $t != "" ) $t .= ":" ;
 216                         $t .= $x->cur_title ;
 217
 218                         $y = explode ( ":" , $t , 2 ) ;
 219                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 220                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 221                         } else {
 222                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 223                         }
 224                 }
 225                 wfFreeResult ( $res ) ;
 226
 227                 # Children
 228                 if ( count ( $children ) > 0 )
 229                 {
 230                         asort ( $children ) ;
 231                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 232                         $r .= implode ( ", " , $children ) ;
 233                 }
 234
 235                 # Articles
 236                 if ( count ( $articles ) > 0 )
 237                 {
 238                         asort ( $articles ) ;
 239                         $h =  wfMsg( "category_header", $ti[1] );
 240                         $r .= "<h2>{$h}</h2>\n" ;
 241                         $r .= implode ( ", " , $articles ) ;
 242                 }
 243
 244
 245                 return $r ;
 246         }
 247
 248         function getHTMLattrs ()
 249         {
 250                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 251                                 "title", "align", "lang", "dir", "width", "height",
 252                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 253                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 254                                 /* FONT */ "type", "start", "value", "compact",
 255                                 /* For various lists, mostly deprecated but safe */
 256                                 "summary", "width", "border", "frame", "rules",
 257                                 "cellspacing", "cellpadding", "valign", "char",
 258                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 259                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 260                                 "id", "class", "name", "style" /* For CSS */
 261                                 );
 262                 return $htmlattrs ;
 263         }
 264
 265         function fixTagAttributes ( $t )
 266         {
 267                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 268                 $htmlattrs = $this->getHTMLattrs() ;
 269
 270                 # Strip non-approved attributes from the tag
 271                 $t = preg_replace(
 272                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 273                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 274                         $t);
 275                 # Strip javascript "expression" from stylesheets. Brute force approach:
 276                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 277
 278                 if( preg_match(
 279                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 280                         wfMungeToUtf8( $t ) ) )
 281                 {
 282                         $t="";
 283                 }
 284
 285                 return trim ( $t ) ;
 286         }
 287
 288         function doTableStuff ( $t )
 289         {
 290                 $t = explode ( "\n" , $t ) ;
 291                 $td = array () ; # Is currently a td tag open?
 292                         $ltd = array () ; # Was it TD or TH?
 293                         $tr = array () ; # Is currently a tr tag open?
 294                         $ltr = array () ; # tr attributes
 295                         foreach ( $t AS $k => $x )
 296                         {
 297                                 $x = rtrim ( $x ) ;
 298                                 $fc = substr ( $x , 0 , 1 ) ;
 299                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 300                                 {
 301                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 302                                         array_push ( $td , false ) ;
 303                                         array_push ( $ltd , "" ) ;
 304                                         array_push ( $tr , false ) ;
 305                                         array_push ( $ltr , "" ) ;
 306                                 }
 307                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 308                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 309                                 {
 310                                         $z = "</table>\n" ;
 311                                         $l = array_pop ( $ltd ) ;
 312                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 313                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 314                                         array_pop ( $ltr ) ;
 315                                         $t[$k] = $z ;
 316                                 }
 317                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 318                                                 {
 319                                                 $z = trim ( substr ( $x , 2 ) ) ;
 320                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 321                                                 }*/
 322                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 323                                 {
 324                                         $x = substr ( $x , 1 ) ;
 325                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 326                                         $z = "" ;
 327                                         $l = array_pop ( $ltd ) ;
 328                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 329                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 330                                         array_pop ( $ltr ) ;
 331                                         $t[$k] = $z ;
 332                                         array_push ( $tr , false ) ;
 333                                         array_push ( $td , false ) ;
 334                                         array_push ( $ltd , "" ) ;
 335                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 336                                 }
 337                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 338                                 {
 339                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 340                                         {
 341                                                 $fc = "+" ;
 342                                                 $x = substr ( $x , 1 ) ;
 343                                         }
 344                                         $after = substr ( $x , 1 ) ;
 345                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 346                                         $after = explode ( "||" , $after ) ;
 347                                         $t[$k] = "" ;
 348                                         foreach ( $after AS $theline )
 349                                         {
 350                                                 $z = "" ;
 351                                                 if ( $fc != "+" )
 352                                                 {
 353                                                         $tra = array_pop ( $ltr ) ;
 354                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 355                                                         array_push ( $tr , true ) ;
 356                                                         array_push ( $ltr , "" ) ;
 357                                                 }
 358
 359                                                 $l = array_pop ( $ltd ) ;
 360                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 361                                                 if ( $fc == "|" ) $l = "TD" ;
 362                                                 else if ( $fc == "!" ) $l = "TH" ;
 363                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 364                                                 else $l = "" ;
 365                                                 array_push ( $ltd , $l ) ;
 366                                                 $y = explode ( "|" , $theline , 2 ) ;
 367                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 368                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 369                                                 $t[$k] .= $y ;
 370                                                 array_push ( $td , true ) ;
 371                                         }
 372                                 }
 373                         }
 374
 375                 # Closing open td, tr && table
 376                 while ( count ( $td ) > 0 )
 377                 {
 378                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 379                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 380                         $t[] = "</table>" ;
 381                 }
 382
 383                 $t = implode ( "\n" , $t ) ;
 384                 #               $t = $this->removeHTMLtags( $t );
 385                 return $t ;
 386         }
 387
 388         # Well, OK, it's actually about 14 passes.  But since all the
 389         # hard lifting is done inside PHP's regex code, it probably
 390         # wouldn't speed things up much to add a real parser.
 391         #
 392         function doWikiPass2( $text, $linestart )
 393         {
 394                 $fname = "OutputPage::doWikiPass2";
 395                 wfProfileIn( $fname );
 396
 397                 $text = $this->removeHTMLtags( $text );
 398                 $text = $this->replaceVariables( $text );
 399
 400                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 401                 $text = str_replace ( "<HR>", "<hr>", $text );
 402
 403                 $text = $this->doHeadings( $text );
 404                 $text = $this->doBlockLevels( $text, $linestart );
 405
 406                 if($this->mOptions->getUseDynamicDates()) {
 407                         global $wgDateFormatter;
 408                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 409                 }
 410
 411                 $text = $this->replaceExternalLinks( $text );
 412                 $text = $this->replaceInternalLinks ( $text );
 413                 $text = $this->doTableStuff ( $text ) ;
 414
 415                 $text = $this->formatHeadings( $text );
 416
 417                 $sk =& $this->mOptions->getSkin();
 418                 $text = $sk->transformContent( $text );
 419                 $text .= $this->categoryMagic () ;
 420
 421                 wfProfileOut( $fname );
 422                 return $text;
 423         }
 424
 425
 426         /* private */ function doHeadings( $text )
 427         {
 428                 for ( $i = 6; $i >= 1; --$i ) {
 429                         $h = substr( "======", 0, $i );
 430                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 431                           "<h{$i}>\\1</h{$i}>\\2", $text );
 432                 }
 433                 return $text;
 434         }
 435
 436         # Note: we have to do external links before the internal ones,
 437         # and otherwise take great care in the order of things here, so
 438         # that we don't end up interpreting some URLs twice.
 439
 440         /* private */ function replaceExternalLinks( $text )
 441         {
 442                 $fname = "OutputPage::replaceExternalLinks";
 443                 wfProfileIn( $fname );
 444                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 445                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 446                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 447                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 448                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 449                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 450                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 451                 wfProfileOut( $fname );
 452                 return $text;
 453         }
 454
 455         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 456         {
 457                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 458                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 459
 460                 # this is  the list of separators that should be ignored if they
 461                 # are the last character of an URL but that should be included
 462                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 463                 # in this case, the last comma should not become part of the URL,
 464                 # but in "www.foo.com/123,2342,32.htm" it should.
 465                 $sep = ",;\.:";
 466                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 467                 $images = "gif|png|jpg|jpeg";
 468
 469                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 470                 # they are interpreted as part of the string (used to tell PHP
 471                 # that the content of the string should be inserted there).
 472                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 473                   "((?i){$images})([^{$uc}]|$)/";
 474
 475                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 476                 $sk =& $this->mOptions->getSkin();
 477
 478                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 479                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 480                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 481                 }
 482                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 483                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 484                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 485                   "</a>\\5", $s );
 486                 $s = str_replace( $unique, $protocol, $s );
 487
 488                 $a = explode( "[{$protocol}:", " " . $s );
 489                 $s = array_shift( $a );
 490                 $s = substr( $s, 1 );
 491
 492                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 493                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 494
 495                 foreach ( $a as $line ) {
 496                         if ( preg_match( $e1, $line, $m ) ) {
 497                                 $link = "{$protocol}:{$m[1]}";
 498                                 $trail = $m[2];
 499                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 500                                 else { $text = wfEscapeHTML( $link ); }
 501                         } else if ( preg_match( $e2, $line, $m ) ) {
 502                                 $link = "{$protocol}:{$m[1]}";
 503                                 $text = $m[2];
 504                                 $trail = $m[3];
 505                         } else {
 506                                 $s .= "[{$protocol}:" . $line;
 507                                 continue;
 508                         }
 509                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 510                         else $paren = "";
 511                         $la = $sk->getExternalLinkAttributes( $link, $text );
 512                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 513
 514                 }
 515                 return $s;
 516         }
 517
 518         /* private */ function handle3Quotes( &$state, $token )
 519         {
 520                 if ( $state["strong"] ) {
 521                         if ( $state["em"] && $state["em"] > $state["strong"] )
 522                         {
 523                                 # ''' lala ''lala '''
 524                                 $s = "</em></strong><em>";
 525                         } else {
 526                                 $s = "</strong>";
 527                         }
 528                         $state["strong"] = FALSE;
 529                 } else {
 530                         $s = "<strong>";
 531                         $state["strong"] = $token["pos"];
 532                 }
 533                 return $s;
 534         }
 535
 536         /* private */ function handle2Quotes( &$state, $token )
 537         {
 538                 if ( $state["em"] ) {
 539                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 540                         {
 541                                 # ''lala'''lala'' ....'''
 542                                 $s = "</strong></em><strong>";
 543                         } else {
 544                                 $s = "</em>";
 545                         }
 546                         $state["em"] = FALSE;
 547                 } else {
 548                         $s = "<em>";
 549                         $state["em"] = $token["pos"];
 550                 }
 551                 return $s;
 552         }
 553
 554         /* private */ function handle5Quotes( &$state, $token )
 555         {
 556                 if ( $state["em"] && $state["strong"] ) {
 557                         if ( $state["em"] < $state["strong"] ) {
 558                                 $s .= "</strong></em>";
 559                         } else {
 560                                 $s .= "</em></strong>";
 561                         }
 562                         $state["strong"] = $state["em"] = FALSE;
 563                 } elseif ( $state["em"] ) {
 564                         $s .= "</em><strong>";
 565                         $state["em"] = FALSE;
 566                         $state["strong"] = $token["pos"];
 567                 } elseif ( $state["strong"] ) {
 568                         $s .= "</strong><em>";
 569                         $state["strong"] = FALSE;
 570                         $state["em"] = $token["pos"];
 571                 } else { # not $em and not $strong
 572                         $s .= "<strong><em>";
 573                         $state["strong"] = $state["em"] = $token["pos"];
 574                 }
 575                 return $s;
 576         }
 577
 578         /* private */ function replaceInternalLinks( $str )
 579         {
 580                 global $wgLang; # for language specific parser hook
 581
 582                 $tokenizer=Tokenizer::newFromString( $str );
 583                 $tokenStack = array();
 584
 585                 $s="";
 586                 $state["em"]      = FALSE;
 587                 $state["strong"]  = FALSE;
 588                 $tagIsOpen = FALSE;
 589
 590                 # The tokenizer splits the text into tokens and returns them one by one.
 591                 # Every call to the tokenizer returns a new token.
 592                 while ( $token = $tokenizer->nextToken() )
 593                 {
 594                         switch ( $token["type"] )
 595                         {
 596                                 case "text":
 597                                         # simple text with no further markup
 598                                         $txt = $token["text"];
 599                                         break;
 600                                 case "[[":
 601                                         # link opening tag.
 602                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 603                                         $tagIsOpen = TRUE;
 604                                         array_push( $tokenStack, $token );
 605                                         $txt="";
 606                                         break;
 607                                 case "]]":
 608                                         # link close tag.
 609                                         # get text from stack, glue it together, and call the code to handle a
 610                                         # link
 611                                         if ( count( $tokenStack ) == 0 )
 612                                         {
 613                                                 # stack empty. Found a ]] without an opening [[
 614                                                 $txt = "]]";
 615                                         } else {
 616                                                 $linkText = "";
 617                                                 $lastToken = array_pop( $tokenStack );
 618                                                 while ( $lastToken["type"] != "[[" )
 619                                                 {
 620                                                         if( !empty( $lastToken["text"] ) ) {
 621                                                                 $linkText = $lastToken["text"] . $linkText;
 622                                                         }
 623                                                         $lastToken = array_pop( $tokenStack );
 624                                                 }
 625                                                 $txt = $linkText ."]]";
 626                                                 if( isset( $lastToken["text"] ) ) {
 627                                                         $prefix = $lastToken["text"];
 628                                                 } else {
 629                                                         $prefix = "";
 630                                                 }
 631                                                 $nextToken = $tokenizer->previewToken();
 632                                                 if ( $nextToken["type"] == "text" )
 633                                                 {
 634                                                         # Preview just looks at it. Now we have to fetch it.
 635                                                         $nextToken = $tokenizer->nextToken();
 636                                                         $txt .= $nextToken["text"];
 637                                                 }
 638                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 639                                         }
 640                                         $tagIsOpen = (count( $tokenStack ) != 0);
 641                                         break;
 642                                 case "----":
 643                                         $txt = "\n<hr>\n";
 644                                         break;
 645                                 case "'''":
 646                                         # This and the three next ones handle quotes
 647                                         $txt = $this->handle3Quotes( $state, $token );
 648                                         break;
 649                                 case "''":
 650                                         $txt = $this->handle2Quotes( $state, $token );
 651                                         break;
 652                                 case "'''''":
 653                                         $txt = $this->handle5Quotes( $state, $token );
 654                                         break;
 655                                 case "":
 656                                         # empty token
 657                                         $txt="";
 658                                         break;
 659                                 case "RFC ":
 660                                         if ( $tagIsOpen ) {
 661                                                 $txt = "RFC ";
 662                                         } else {
 663                                                 $txt = $this->doMagicRFC( $tokenizer );
 664                                         }
 665                                         break;
 666                                 case "ISBN ":
 667                                         if ( $tagIsOpen ) {
 668                                                 $txt = "ISBN ";
 669                                         } else {
 670                                                 $txt = $this->doMagicISBN( $tokenizer );
 671                                         }
 672                                         break;
 673                                 default:
 674                                         # Call language specific Hook.
 675                                         $txt = $wgLang->processToken( $token, $tokenStack );
 676                                         if ( NULL == $txt ) {
 677                                                 # An unkown token. Highlight.
 678                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 679                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 680                                         }
 681                                         break;
 682                         }
 683                         # If we're parsing the interior of a link, don't append the interior to $s,
 684                         # but push it to the stack so it can be processed when a ]] token is found.
 685                         if ( $tagIsOpen  && $txt != "" ) {
 686                                 $token["type"] = "text";
 687                                 $token["text"] = $txt;
 688                                 array_push( $tokenStack, $token );
 689                         } else {
 690                                 $s .= $txt;
 691                         }
 692                 } #end while
 693                 if ( count( $tokenStack ) != 0 )
 694                 {
 695                         # still objects on stack. opened [[ tag without closing ]] tag.
 696                         $txt = "";
 697                         while ( $lastToken = array_pop( $tokenStack ) )
 698                         {
 699                                 if ( $lastToken["type"] == "text" )
 700                                 {
 701                                         $txt = $lastToken["text"] . $txt;
 702                                 } else {
 703                                         $txt = $lastToken["type"] . $txt;
 704                                 }
 705                         }
 706                         $s .= $txt;
 707                 }
 708                 return $s;
 709         }
 710
 711         /* private */ function handleInternalLink( $line, $prefix )
 712         {
 713                 global $wgLang, $wgLinkCache;
 714                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 715                 static $fname = "OutputPage::replaceInternalLinks" ;
 716                 wfProfileIn( $fname );
 717
 718                 wfProfileIn( "$fname-setup" );
 719                 static $tc = FALSE;
 720                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 721                 $sk =& $this->mOptions->getSkin();
 722
 723                 # Match a link having the form [[namespace:link|alternate]]trail
 724                 static $e1 = FALSE;
 725                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 726                 # Match the end of a line for a word that's not followed by whitespace,
 727                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 728                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 729                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 730                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 731
 732
 733                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 734                 static $image = FALSE;
 735                 static $special = FALSE;
 736                 static $media = FALSE;
 737                 static $category = FALSE;
 738                 if ( !$image ) { $image = Namespace::getImage(); }
 739                 if ( !$special ) { $special = Namespace::getSpecial(); }
 740                 if ( !$media ) { $media = Namespace::getMedia(); }
 741                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 742
 743                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 744
 745                 wfProfileOut( "$fname-setup" );
 746                 $s = "";
 747
 748                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 749                         $text = $m[2];
 750                         $trail = $m[3];
 751                 } else { # Invalid form; output directly
 752                         $s .= $prefix . "[[" . $line ;
 753                         return $s;
 754                 }
 755
 756                 /* Valid link forms:
 757                 Foobar -- normal
 758                 :Foobar -- override special treatment of prefix (images, language links)
 759                 /Foobar -- convert to CurrentPage/Foobar
 760                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 761                 */
 762                 $c = substr($m[1],0,1);
 763                 $noforce = ($c != ":");
 764                 if( $c == "/" ) { # subpage
 765                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 766                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 767                                 $noslash=$m[1];
 768                         } else {
 769                                 $noslash=substr($m[1],1);
 770                         }
 771                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 772                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 773                                 if( "" == $text ) {
 774                                         $text= $m[1];
 775                                 } # this might be changed for ugliness reasons
 776                         } else {
 777                                 $link = $noslash; # no subpage allowed, use standard link
 778                         }
 779                 } elseif( $noforce ) { # no subpage
 780                         $link = $m[1];
 781                 } else {
 782                         $link = substr( $m[1], 1 );
 783                 }
 784                 if( "" == $text )
 785                         $text = $link;
 786
 787                 $nt = Title::newFromText( $link );
 788                 if( !$nt ) {
 789                         $s .= $prefix . "[[" . $line;
 790                         return $s;
 791                 }
 792                 $ns = $nt->getNamespace();
 793                 $iw = $nt->getInterWiki();
 794                 if( $noforce ) {
 795                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 796                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 797                                 $s .= $prefix . $trail;
 798                                 return $s;
 799                         }
 800                         if( $ns == $image ) {
 801                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 802                                 $wgLinkCache->addImageLinkObj( $nt );
 803                                 return $s;
 804                         }
 805                 }
 806                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 807                     ( strpos( $link, "#" ) == FALSE ) ) {
 808                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 809                         return $s;
 810                 }
 811                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 812                         $t = explode ( ":" , $nt->getText() ) ;
 813                         array_shift ( $t ) ;
 814                         $t = implode ( ":" , $t ) ;
 815                         $t = $wgLang->ucFirst ( $t ) ;
 816 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 817                         $nnt = Title::newFromText ( $category.":".$t ) ;
 818                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 819                         $this->mCategoryLinks[] = $t ;
 820                         $s .= $prefix . $trail ;
 821                         return $s ;
 822                 }
 823                 if( $ns == $media ) {
 824                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 825                         $wgLinkCache->addImageLinkObj( $nt );
 826                         return $s;
 827                 } elseif( $ns == $special ) {
 828                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 829                         return $s;
 830                 }
 831                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 832
 833                 wfProfileOut( $fname );
 834                 return $s;
 835         }
 836
 837         # Some functions here used by doBlockLevels()
 838         #
 839         /* private */ function closeParagraph()
 840         {
 841                 $result = "";
 842                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 843                   0 != strcmp( "", $this->mLastSection ) ) {
 844                         $result = "</" . $this->mLastSection  . ">";
 845                 }
 846                 $this->mLastSection = "";
 847                 return $result."\n";
 848         }
 849         # getCommon() returns the length of the longest common substring
 850         # of both arguments, starting at the beginning of both.
 851         #
 852         /* private */ function getCommon( $st1, $st2 )
 853         {
 854                 $fl = strlen( $st1 );
 855                 $shorter = strlen( $st2 );
 856                 if ( $fl < $shorter ) { $shorter = $fl; }
 857
 858                 for ( $i = 0; $i < $shorter; ++$i ) {
 859                         if ( $st1{$i} != $st2{$i} ) { break; }
 860                 }
 861                 return $i;
 862         }
 863         # These next three functions open, continue, and close the list
 864         # element appropriate to the prefix character passed into them.
 865         #
 866         /* private */ function openList( $char )
 867     {
 868                 $result = $this->closeParagraph();
 869
 870                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 871                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 872                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 873                 else if ( ";" == $char ) {
 874                         $result .= "<dl><dt>";
 875                         $this->mDTopen = true;
 876                 }
 877                 else { $result = "<!-- ERR 1 -->"; }
 878
 879                 return $result;
 880         }
 881
 882         /* private */ function nextItem( $char )
 883         {
 884                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 885                 else if ( ":" == $char || ";" == $char ) {
 886                         $close = "</dd>";
 887                         if ( $this->mDTopen ) { $close = "</dt>"; }
 888                         if ( ";" == $char ) {
 889                                 $this->mDTopen = true;
 890                                 return $close . "<dt>";
 891                         } else {
 892                                 $this->mDTopen = false;
 893                                 return $close . "<dd>";
 894                         }
 895                 }
 896                 return "<!-- ERR 2 -->";
 897         }
 898
 899         /* private */function closeList( $char )
 900         {
 901                 if ( "*" == $char ) { $text = "</li></ul>"; }
 902                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 903                 else if ( ":" == $char ) {
 904                         if ( $this->mDTopen ) {
 905                                 $this->mDTopen = false;
 906                                 $text = "</dt></dl>";
 907                         } else {
 908                                 $text = "</dd></dl>";
 909                         }
 910                 }
 911                 else {  return "<!-- ERR 3 -->"; }
 912                 return $text."\n";
 913         }
 914
 915         /* private */ function doBlockLevels( $text, $linestart )
 916         {
 917                 $fname = "OutputPage::doBlockLevels";
 918                 wfProfileIn( $fname );
 919                 # Parsing through the text line by line.  The main thing
 920                 # happening here is handling of block-level elements p, pre,
 921                 # and making lists from lines starting with * # : etc.
 922                 #
 923                 $a = explode( "\n", $text );
 924                 $text = $lastPref = "";
 925                 $this->mDTopen = $inBlockElem = false;
 926
 927                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 928                 foreach ( $a as $t ) {
 929                         if ( "" != $text ) { $text .= "\n"; }
 930
 931                         $oLine = $t;
 932                         $opl = strlen( $lastPref );
 933                         $npl = strspn( $t, "*#:;" );
 934                         $pref = substr( $t, 0, $npl );
 935                         $pref2 = str_replace( ";", ":", $pref );
 936                         $t = substr( $t, $npl );
 937
 938                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 939                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 940
 941                                 if ( ";" == substr( $pref, -1 ) ) {
 942                                         $cpos = strpos( $t, ":" );
 943                                         if ( ! ( false === $cpos ) ) {
 944                                                 $term = substr( $t, 0, $cpos );
 945                                                 $text .= $term . $this->nextItem( ":" );
 946                                                 $t = substr( $t, $cpos + 1 );
 947                                         }
 948                                 }
 949                         } else if (0 != $npl || 0 != $opl) {
 950                                 $cpl = $this->getCommon( $pref, $lastPref );
 951
 952                                 while ( $cpl < $opl ) {
 953                                         $text .= $this->closeList( $lastPref{$opl-1} );
 954                                         --$opl;
 955                                 }
 956                                 if ( $npl <= $cpl && $cpl > 0 ) {
 957                                         $text .= $this->nextItem( $pref{$cpl-1} );
 958                                 }
 959                                 while ( $npl > $cpl ) {
 960                                         $char = substr( $pref, $cpl, 1 );
 961                                         $text .= $this->openList( $char );
 962
 963                                         if ( ";" == $char ) {
 964                                                 $cpos = strpos( $t, ":" );
 965                                                 if ( ! ( false === $cpos ) ) {
 966                                                         $term = substr( $t, 0, $cpos );
 967                                                         $text .= $term . $this->nextItem( ":" );
 968                                                         $t = substr( $t, $cpos + 1 );
 969                                                 }
 970                                         }
 971                                         ++$cpl;
 972                                 }
 973                                 $lastPref = $pref2;
 974                         }
 975                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 976                                 if ( preg_match(
 977                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 978                                         $text .= $this->closeParagraph();
 979                                         $inBlockElem = true;
 980                                 }
 981                                 if ( ! $inBlockElem ) {
 982                                         if ( " " == $t{0} ) {
 983                                                 $newSection = "pre";
 984                                                 # $t = wfEscapeHTML( $t );
 985                                         }
 986                                         else { $newSection = "p"; }
 987
 988                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 989                                                 $text .= $this->closeParagraph();
 990                                                 $text .= "<" . $newSection . ">";
 991                                         } else if ( 0 != strcmp( $this->mLastSection,
 992                                           $newSection ) ) {
 993                                                 $text .= $this->closeParagraph();
 994                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 995                                                         $text .= "<" . $newSection . ">";
 996                                                 }
 997                                         }
 998                                         $this->mLastSection = $newSection;
 999                                 }
1000                                 if ( $inBlockElem &&
1001                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1002                                         $inBlockElem = false;
1003                                 }
1004                         }
1005                         $text .= $t;
1006                 }
1007                 while ( $npl ) {
1008                         $text .= $this->closeList( $pref2{$npl-1} );
1009                         --$npl;
1010                 }
1011                 if ( "" != $this->mLastSection ) {
1012                         if ( "p" != $this->mLastSection ) {
1013                                 $text .= "</" . $this->mLastSection . ">";
1014                         }
1015                         $this->mLastSection = "";
1016                 }
1017                 wfProfileOut( $fname );
1018                 return $text;
1019         }
1020
1021         /* private */ function replaceVariables( $text )
1022         {
1023                 global $wgLang, $wgCurOut;
1024                 $fname = "OutputPage::replaceVariables";
1025                 wfProfileIn( $fname );
1026
1027                 $magic = array();
1028
1029                 # Basic variables
1030                 # See Language.php for the definition of each magic word
1031                 # As with sigs, this uses the server's local time -- ensure
1032                 # this is appropriate for your audience!
1033
1034                 $magic[MAG_CURRENTMONTH] = date( "m" );
1035                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1036                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1037                 $magic[MAG_CURRENTDAY] = date("j");
1038                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1039                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1040                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1041
1042                 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1043
1044                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1045                 if ( $mw->match( $text ) ) {
1046                         $v = wfNumberOfArticles();
1047                         $text = $mw->replace( $v, $text );
1048                         if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1049                 }
1050
1051                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1052                 # The callbacks are at the bottom of this file
1053                 $wgCurOut = $this;
1054                 $mw =& MagicWord::get( MAG_MSG );
1055                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1056                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1057
1058                 $mw =& MagicWord::get( MAG_MSGNW );
1059                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1060                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1061
1062                 wfProfileOut( $fname );
1063                 return $text;
1064         }
1065
1066         # Cleans up HTML, removes dangerous tags and attributes
1067         /* private */ function removeHTMLtags( $text )
1068         {
1069                 $fname = "OutputPage::removeHTMLtags";
1070                 wfProfileIn( $fname );
1071                 $htmlpairs = array( # Tags that must be closed
1072                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1073                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1074                         "strike", "strong", "tt", "var", "div", "center",
1075                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1076                         "ruby", "rt" , "rb" , "rp"
1077                 );
1078                 $htmlsingle = array(
1079                         "br", "p", "hr", "li", "dt", "dd"
1080                 );
1081                 $htmlnest = array( # Tags that can be nested--??
1082                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1083                         "dl", "font", "big", "small", "sub", "sup"
1084                 );
1085                 $tabletags = array( # Can only appear inside table
1086                         "td", "th", "tr"
1087                 );
1088
1089                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1090                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1091
1092                 $htmlattrs = $this->getHTMLattrs () ;
1093
1094                 # Remove HTML comments
1095                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1096
1097                 $bits = explode( "<", $text );
1098                 $text = array_shift( $bits );
1099                 $tagstack = array(); $tablestack = array();
1100
1101                 foreach ( $bits as $x ) {
1102                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1103                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1104                           $x, $regs );
1105                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1106                         error_reporting( $prev );
1107
1108                         $badtag = 0 ;
1109                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1110                                 # Check our stack
1111                                 if ( $slash ) {
1112                                         # Closing a tag...
1113                                         if ( ! in_array( $t, $htmlsingle ) &&
1114                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1115                                                 array_push( $tagstack, $ot );
1116                                                 $badtag = 1;
1117                                         } else {
1118                                                 if ( $t == "table" ) {
1119                                                         $tagstack = array_pop( $tablestack );
1120                                                 }
1121                                                 $newparams = "";
1122                                         }
1123                                 } else {
1124                                         # Keep track for later
1125                                         if ( in_array( $t, $tabletags ) &&
1126                                           ! in_array( "table", $tagstack ) ) {
1127                                                 $badtag = 1;
1128                                         } else if ( in_array( $t, $tagstack ) &&
1129                                           ! in_array ( $t , $htmlnest ) ) {
1130                                                 $badtag = 1 ;
1131                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1132                                                 if ( $t == "table" ) {
1133                                                         array_push( $tablestack, $tagstack );
1134                                                         $tagstack = array();
1135                                                 }
1136                                                 array_push( $tagstack, $t );
1137                                         }
1138                                         # Strip non-approved attributes from the tag
1139                                         $newparams = $this->fixTagAttributes($params);
1140
1141                                 }
1142                                 if ( ! $badtag ) {
1143                                         $rest = str_replace( ">", "&gt;", $rest );
1144                                         $text .= "<$slash$t $newparams$brace$rest";
1145                                         continue;
1146                                 }
1147                         }
1148                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1149                 }
1150                 # Close off any remaining tags
1151                 while ( $t = array_pop( $tagstack ) ) {
1152                         $text .= "</$t>\n";
1153                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1154                 }
1155                 wfProfileOut( $fname );
1156                 return $text;
1157         }
1158
1159 /*
1160  *
1161  * This function accomplishes several tasks:
1162  * 1) Auto-number headings if that option is enabled
1163  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1164  * 3) Add a Table of contents on the top for users who have enabled the option
1165  * 4) Auto-anchor headings
1166  *
1167  * It loops through all headlines, collects the necessary data, then splits up the
1168  * string and re-inserts the newly formatted headlines.
1169  *
1170  * */
1171         /* private */ function formatHeadings( $text )
1172         {
1173                 $nh=$this->mOptions->getNumberHeadings();
1174                 $st=$this->mOptions->getShowToc();
1175                 if(!$this->mTitle->userCanEdit()) {
1176                         $es=0;
1177                         $esr=0;
1178                 } else {
1179                         $es=$this->mOptions->getEditSection();
1180                         $esr=$this->mOptions->getEditSectionOnRightClick();
1181                 }
1182
1183                 # Inhibit editsection links if requested in the page
1184                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1185                 if ($esw->matchAndRemove( $text )) {
1186                         $es=0;
1187                 }
1188                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1189                 # do not add TOC
1190                 $mw =& MagicWord::get( MAG_NOTOC );
1191                 if ($mw->matchAndRemove( $text ))
1192                 {
1193                         $st = 0;
1194                 }
1195
1196                 # never add the TOC to the Main Page. This is an entry page that should not
1197                 # be more than 1-2 screens large anyway
1198                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1199
1200                 # We need this to perform operations on the HTML
1201                 $sk =& $this->mOptions->getSkin();
1202
1203                 # Get all headlines for numbering them and adding funky stuff like [edit]
1204                 # links
1205                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1206
1207                 # headline counter
1208                 $c=0;
1209
1210                 # Ugh .. the TOC should have neat indentation levels which can be
1211                 # passed to the skin functions. These are determined here
1212                 $toclevel = 0;
1213                 $toc = "";
1214                 $full = "";
1215                 $head = array();
1216                 foreach($matches[3] as $headline) {
1217                         if($level) { $prevlevel=$level;}
1218                         $level=$matches[1][$c];
1219                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1220
1221                                 $h[$level]=0; // reset when we enter a new level
1222                                 $toc.=$sk->tocIndent($level-$prevlevel);
1223                                 $toclevel+=$level-$prevlevel;
1224
1225                         }
1226                         if(($nh||$st) && $level<$prevlevel) {
1227                                 $h[$level+1]=0; // reset when we step back a level
1228                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1229                                 $toclevel-=$prevlevel-$level;
1230
1231                         }
1232                         $h[$level]++; // count number of headlines for each level
1233
1234                         if($nh||$st) {
1235                                 for($i=1;$i<=$level;$i++) {
1236                                         if($h[$i]) {
1237                                                 if($dot) {$numbering.=".";}
1238                                                 $numbering.=$h[$i];
1239                                                 $dot=1;
1240                                         }
1241                                 }
1242                         }
1243
1244                         // The canonized header is a version of the header text safe to use for links
1245                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1246                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1247                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1248                         $tocline = trim( $canonized_headline );
1249                         $canonized_headline=str_replace('"',"",$canonized_headline);
1250                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1251                         $refer[$c]=$canonized_headline;
1252                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1253                         $refcount[$c]=$refers[$canonized_headline];
1254
1255             // Prepend the number to the heading text
1256
1257                         if($nh||$st) {
1258                                 $tocline=$numbering ." ". $tocline;
1259
1260                                 // Don't number the heading if it is the only one (looks silly)
1261                                 if($nh && count($matches[3]) > 1) {
1262                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1263                                 }
1264                         }
1265
1266                         // Create the anchor for linking from the TOC to the section
1267                         $anchor=$canonized_headline;
1268                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1269                         if($st) {
1270                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1271                         }
1272                         if($es) {
1273                                 $head[$c].=$sk->editSectionLink($c+1);
1274                         }
1275
1276                         // Put it all together
1277
1278                         $head[$c].="<h".$level.$matches[2][$c]
1279                          ."<a name=\"".$anchor."\">"
1280                          .$headline
1281                          ."</a>"
1282                          ."</h".$level.">";
1283
1284                         // Add the edit section link
1285
1286                         if($esr) {
1287                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1288                         }
1289
1290                         $numbering="";
1291                         $c++;
1292                         $dot=0;
1293                 }
1294
1295                 if($st) {
1296                         $toclines=$c;
1297                         $toc.=$sk->tocUnindent($toclevel);
1298                         $toc=$sk->tocTable($toc);
1299                 }
1300
1301                 // split up and insert constructed headlines
1302
1303                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1304                 $i=0;
1305
1306                 foreach($blocks as $block) {
1307                         if(($es) && $c>0 && $i==0) {
1308                             # This is the [edit] link that appears for the top block of text when
1309                                 # section editing is enabled
1310                                 $full.=$sk->editSectionLink(0);
1311                         }
1312                         $full.=$block;
1313                         if($st && $toclines>3 && !$i) {
1314                                 # Let's add a top anchor just in case we want to link to the top of the page
1315                                 $full="<a name=\"top\"></a>".$full.$toc;
1316                         }
1317
1318                         if( !empty( $head[$i] ) ) {
1319                                 $full .= $head[$i];
1320                         }
1321                         $i++;
1322                 }
1323
1324                 return $full;
1325         }
1326
1327         /* private */ function doMagicISBN( &$tokenizer )
1328         {
1329                 global $wgLang;
1330
1331                 # Check whether next token is a text token
1332                 # If yes, fetch it and convert the text into a
1333                 # Special::BookSources link
1334                 $token = $tokenizer->previewToken();
1335                 while ( $token["type"] == "" )
1336                 {
1337                         $tokenizer->nextToken();
1338                         $token = $tokenizer->previewToken();
1339                 }
1340                 if ( $token["type"] == "text" )
1341                 {
1342                         $token = $tokenizer->nextToken();
1343                         $x = $token["text"];
1344                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1345
1346                         $isbn = $blank = "" ;
1347                         while ( " " == $x{0} ) {
1348                                 $blank .= " ";
1349                                 $x = substr( $x, 1 );
1350                         }
1351                         while ( strstr( $valid, $x{0} ) != false ) {
1352                                 $isbn .= $x{0};
1353                                 $x = substr( $x, 1 );
1354                         }
1355                         $num = str_replace( "-", "", $isbn );
1356                         $num = str_replace( " ", "", $num );
1357
1358                         if ( "" == $num ) {
1359                                 $text = "ISBN $blank$x";
1360                         } else {
1361                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1362                                 $text = "<a href=\"" .
1363                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1364                                         "\" class=\"internal\">ISBN $isbn</a>";
1365                                 $text .= $x;
1366                         }
1367                 } else {
1368                         $text = "ISBN ";
1369                 }
1370                 return $text;
1371         }
1372         /* private */ function doMagicRFC( &$tokenizer )
1373         {
1374                 global $wgLang;
1375
1376                 # Check whether next token is a text token
1377                 # If yes, fetch it and convert the text into a
1378                 # link to an RFC source
1379                 $token = $tokenizer->previewToken();
1380                 while ( $token["type"] == "" )
1381                 {
1382                         $tokenizer->nextToken();
1383                         $token = $tokenizer->previewToken();
1384                 }
1385                 if ( $token["type"] == "text" )
1386                 {
1387                         $token = $tokenizer->nextToken();
1388                         $x = $token["text"];
1389                         $valid = "0123456789";
1390
1391                         $rfc = $blank = "" ;
1392                         while ( " " == $x{0} ) {
1393                                 $blank .= " ";
1394                                 $x = substr( $x, 1 );
1395                         }
1396                         while ( strstr( $valid, $x{0} ) != false ) {
1397                                 $rfc .= $x{0};
1398                                 $x = substr( $x, 1 );
1399                         }
1400
1401                         if ( "" == $rfc ) {
1402                                 $text .= "RFC $blank$x";
1403                         } else {
1404                                 $url = wfmsg( "rfcurl" );
1405                                 $url = str_replace( "$1", $rfc, $url);
1406                                 $sk =& $this->mOptions->getSkin();
1407                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1408                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1409                         }
1410                 } else {
1411                         $text = "RFC ";
1412                 }
1413                 return $text;
1414         }
1415
1416         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1417         {
1418                 $this->mOptions = $options;
1419                 $this->mTitle = $title;
1420                 if ( $clearState ) {
1421                         $this->clearState();
1422                 }
1423
1424                 $stripState = false;
1425                 $text = str_replace("\r\n", "\n", $text);
1426                 $text = $this->strip( $text, $stripState, false );
1427                 $text = $this->pstPass2( $text, $user );
1428                 $text = $this->unstrip( $text, $stripState );
1429                 return $text;
1430         }
1431
1432         /* private */ function pstPass2( $text, &$user )
1433         {
1434                 global $wgLang, $wgLocaltimezone;
1435
1436                 # Signatures
1437                 #
1438                 $n = $user->getName();
1439                 $k = $user->getOption( "nickname" );
1440                 if ( "" == $k ) { $k = $n; }
1441                 if(isset($wgLocaltimezone)) {
1442                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1443                 }
1444                 /* Note: this is an ugly timezone hack for the European wikis */
1445                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1446                   " (" . date( "T" ) . ")";
1447                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1448
1449                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1450                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1451                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1452                   Namespace::getUser() ) . ":$n|$k]]", $text );
1453
1454                 # Context links: [[|name]] and [[name (context)|]]
1455                 #
1456                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1457                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1458                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1459                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1460
1461                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1462                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1463                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1464                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1465                                                                                                                 # [[ns:page (cont)|]]
1466                 $context = "";
1467                 $t = $this->mTitle->getText();
1468                 if ( preg_match( $conpat, $t, $m ) ) {
1469                         $context = $m[2];
1470                 }
1471                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1472                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1473                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1474
1475                 if ( "" == $context ) {
1476                         $text = preg_replace( $p2, "[[\\1]]", $text );
1477                 } else {
1478                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1479                 }
1480
1481                 # {{SUBST:xxx}} variables
1482                 #
1483                 $mw =& MagicWord::get( MAG_SUBST );
1484                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1485
1486                 # Trim trailing whitespace
1487                 # MAG_END (__END__) tag allows for trailing
1488                 # whitespace to be deliberately included
1489                 $text = rtrim( $text );
1490                 $mw =& MagicWord::get( MAG_END );
1491                 $mw->matchAndRemove( $text );
1492
1493                 return $text;
1494         }
1495
1496
1497 }
1498
1499 class ParserOutput
1500 {
1501         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1502
1503         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1504                 $containsOldMagic = false )
1505         {
1506                 $this->mText = $text;
1507                 $this->mLanguageLinks = $languageLinks;
1508                 $this->mCategoryLinks = $categoryLinks;
1509                 $this->mContainsOldMagic = $containsOldMagic;
1510         }
1511
1512         function getText() { return $this->mText; }
1513         function getLanguageLinks() { return $this->mLanguageLinks; }
1514         function getCategoryLinks() { return $this->mCategoryLinks; }
1515         function containsOldMagic() { return $this->mContainsOldMagic; }
1516         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1517         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1518         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1519         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1520 }
1521
1522 class ParserOptions
1523 {
1524         # All variables are private
1525         var $mUseTeX;                    # Use texvc to expand <math> tags
1526         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1527         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1528         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1529         var $mAllowExternalImages;       # Allow external images inline
1530         var $mSkin;                      # Reference to the preferred skin
1531         var $mDateFormat;                # Date format index
1532         var $mEditSection;               # Create "edit section" links
1533         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1534         var $mPrintable;                 # Generate printable output
1535         var $mNumberHeadings;            # Automatically number headings
1536         var $mShowToc;                   # Show table of contents
1537
1538         function getUseTeX() { return $this->mUseTeX; }
1539         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1540         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1541         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1542         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1543         function getSkin() { return $this->mSkin; }
1544         function getDateFormat() { return $this->mDateFormat; }
1545         function getEditSection() { return $this->mEditSection; }
1546         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1547         function getPrintable() { return $this->mPrintable; }
1548         function getNumberHeadings() { return $this->mNumberHeadings; }
1549         function getShowToc() { return $this->mShowToc; }
1550
1551         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1552         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1553         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1554         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1555         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1556         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1557         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1558         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1559         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1560         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1561         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1562         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1563
1564         /* static */ function newFromUser( &$user )
1565         {
1566                 $popts = new ParserOptions;
1567                 $popts->initialiseFromUser( &$user );
1568                 return $popts;
1569         }
1570
1571         function initialiseFromUser( &$userInput )
1572         {
1573                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1574
1575                 if ( !$userInput ) {
1576                         $user = new User;
1577                 } else {
1578                         $user =& $userInput;
1579                 }
1580
1581                 $this->mUseTeX = $wgUseTeX;
1582                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1583                 $this->mUseDynamicDates = $wgUseDynamicDates;
1584                 $this->mInterwikiMagic = $wgInterwikiMagic;
1585                 $this->mAllowExternalImages = $wgAllowExternalImages;
1586                 $this->mSkin =& $user->getSkin();
1587                 $this->mDateFormat = $user->getOption( "date" );
1588                 $this->mEditSection = $user->getOption( "editsection" );
1589                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1590                 $this->mPrintable = false;
1591                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1592                 $this->mShowToc = $user->getOption( "showtoc" );
1593         }
1594
1595
1596 }
1597
1598 # Regex callbacks, used in OutputPage::replaceVariables
1599
1600 # Just get rid of the dangerous stuff
1601 # Necessary because replaceVariables is called after removeHTMLtags,
1602 # and message text can come from any user
1603 function wfReplaceMsgVar( $matches ) {
1604         global $wgCurOut, $wgLinkCache;
1605         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1606         $wgLinkCache->suspend();
1607         $text = $wgCurOut->replaceInternalLinks( $text );
1608         $wgLinkCache->resume();
1609         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1610         return $text;
1611 }
1612
1613 # Effective <nowiki></nowiki>
1614 # Not real <nowiki> because this is called after nowiki sections are processed
1615 function wfReplaceMsgnwVar( $matches ) {
1616         global $wgCurOut, $wgLinkCache;
1617         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1618         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1619         return $text;
1620 }
1621
1622
1623
1624 ?>