includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 215                         if ( $t != "" ) $t .= ":" ;
 216                         $t .= $x->cur_title ;
 217
 218                         $y = explode ( ":" , $t , 2 ) ;
 219                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 220                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 221                         } else {
 222                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 223                         }
 224                 }
 225                 wfFreeResult ( $res ) ;
 226
 227                 # Children
 228                 if ( count ( $children ) > 0 )
 229                 {
 230                         asort ( $children ) ;
 231                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 232                         $r .= implode ( ", " , $children ) ;
 233                 }
 234
 235                 # Articles
 236                 if ( count ( $articles ) > 0 )
 237                 {
 238                         asort ( $articles ) ;
 239                         $h =  wfMsg( "category_header", $ti[1] );
 240                         $r .= "<h2>{$h}</h2>\n" ;
 241                         $r .= implode ( ", " , $articles ) ;
 242                 }
 243
 244
 245                 return $r ;
 246         }
 247
 248         function getHTMLattrs ()
 249         {
 250                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 251                                 "title", "align", "lang", "dir", "width", "height",
 252                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 253                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 254                                 /* FONT */ "type", "start", "value", "compact",
 255                                 /* For various lists, mostly deprecated but safe */
 256                                 "summary", "width", "border", "frame", "rules",
 257                                 "cellspacing", "cellpadding", "valign", "char",
 258                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 259                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 260                                 "id", "class", "name", "style" /* For CSS */
 261                                 );
 262                 return $htmlattrs ;
 263         }
 264
 265         function fixTagAttributes ( $t )
 266         {
 267                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 268                 $htmlattrs = $this->getHTMLattrs() ;
 269
 270                 # Strip non-approved attributes from the tag
 271                 $t = preg_replace(
 272                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 273                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 274                         $t);
 275                 # Strip javascript "expression" from stylesheets. Brute force approach:
 276                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 277
 278                 if( preg_match(
 279                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 280                         wfMungeToUtf8( $t ) ) )
 281                 {
 282                         $t="";
 283                 }
 284
 285                 return trim ( $t ) ;
 286         }
 287
 288         function doTableStuff ( $t )
 289         {
 290                 $t = explode ( "\n" , $t ) ;
 291                 $td = array () ; # Is currently a td tag open?
 292                         $ltd = array () ; # Was it TD or TH?
 293                         $tr = array () ; # Is currently a tr tag open?
 294                         $ltr = array () ; # tr attributes
 295                         foreach ( $t AS $k => $x )
 296                         {
 297                                 $x = rtrim ( $x ) ;
 298                                 $fc = substr ( $x , 0 , 1 ) ;
 299                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 300                                 {
 301                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 302                                         array_push ( $td , false ) ;
 303                                         array_push ( $ltd , "" ) ;
 304                                         array_push ( $tr , false ) ;
 305                                         array_push ( $ltr , "" ) ;
 306                                 }
 307                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 308                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 309                                 {
 310                                         $z = "</table>\n" ;
 311                                         $l = array_pop ( $ltd ) ;
 312                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 313                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 314                                         array_pop ( $ltr ) ;
 315                                         $t[$k] = $z ;
 316                                 }
 317                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 318                                                 {
 319                                                 $z = trim ( substr ( $x , 2 ) ) ;
 320                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 321                                                 }*/
 322                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 323                                 {
 324                                         $x = substr ( $x , 1 ) ;
 325                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 326                                         $z = "" ;
 327                                         $l = array_pop ( $ltd ) ;
 328                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 329                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 330                                         array_pop ( $ltr ) ;
 331                                         $t[$k] = $z ;
 332                                         array_push ( $tr , false ) ;
 333                                         array_push ( $td , false ) ;
 334                                         array_push ( $ltd , "" ) ;
 335                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 336                                 }
 337                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 338                                 {
 339                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 340                                         {
 341                                                 $fc = "+" ;
 342                                                 $x = substr ( $x , 1 ) ;
 343                                         }
 344                                         $after = substr ( $x , 1 ) ;
 345                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 346                                         $after = explode ( "||" , $after ) ;
 347                                         $t[$k] = "" ;
 348                                         foreach ( $after AS $theline )
 349                                         {
 350                                                 $z = "" ;
 351                                                 if ( $fc != "+" )
 352                                                 {
 353                                                         $tra = array_pop ( $ltr ) ;
 354                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 355                                                         array_push ( $tr , true ) ;
 356                                                         array_push ( $ltr , "" ) ;
 357                                                 }
 358
 359                                                 $l = array_pop ( $ltd ) ;
 360                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 361                                                 if ( $fc == "|" ) $l = "TD" ;
 362                                                 else if ( $fc == "!" ) $l = "TH" ;
 363                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 364                                                 else $l = "" ;
 365                                                 array_push ( $ltd , $l ) ;
 366                                                 $y = explode ( "|" , $theline , 2 ) ;
 367                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 368                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 369                                                 $t[$k] .= $y ;
 370                                                 array_push ( $td , true ) ;
 371                                         }
 372                                 }
 373                         }
 374
 375                 # Closing open td, tr && table
 376                 while ( count ( $td ) > 0 )
 377                 {
 378                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 379                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 380                         $t[] = "</table>" ;
 381                 }
 382
 383                 $t = implode ( "\n" , $t ) ;
 384                 #               $t = $this->removeHTMLtags( $t );
 385                 return $t ;
 386         }
 387
 388         # Well, OK, it's actually about 14 passes.  But since all the
 389         # hard lifting is done inside PHP's regex code, it probably
 390         # wouldn't speed things up much to add a real parser.
 391         #
 392         function doWikiPass2( $text, $linestart )
 393         {
 394                 $fname = "OutputPage::doWikiPass2";
 395                 wfProfileIn( $fname );
 396
 397                 $text = $this->removeHTMLtags( $text );
 398                 $text = $this->replaceVariables( $text );
 399
 400                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 401                 $text = str_replace ( "<HR>", "<hr>", $text );
 402
 403                 $text = $this->doHeadings( $text );
 404                 $text = $this->doBlockLevels( $text, $linestart );
 405
 406                 if($this->mOptions->getUseDynamicDates()) {
 407                         global $wgDateFormatter;
 408                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 409                 }
 410
 411                 $text = $this->replaceExternalLinks( $text );
 412                 $text = $this->replaceInternalLinks ( $text );
 413                 $text = $this->doTableStuff ( $text ) ;
 414
 415                 $text = $this->formatHeadings( $text );
 416
 417                 $sk =& $this->mOptions->getSkin();
 418                 $text = $sk->transformContent( $text );
 419                 $text .= $this->categoryMagic () ;
 420
 421                 wfProfileOut( $fname );
 422                 return $text;
 423         }
 424
 425
 426         /* private */ function doHeadings( $text )
 427         {
 428                 for ( $i = 6; $i >= 1; --$i ) {
 429                         $h = substr( "======", 0, $i );
 430                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 431                           "<h{$i}>\\1</h{$i}>\\2", $text );
 432                 }
 433                 return $text;
 434         }
 435
 436         # Note: we have to do external links before the internal ones,
 437         # and otherwise take great care in the order of things here, so
 438         # that we don't end up interpreting some URLs twice.
 439
 440         /* private */ function replaceExternalLinks( $text )
 441         {
 442                 $fname = "OutputPage::replaceExternalLinks";
 443                 wfProfileIn( $fname );
 444                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 445                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 446                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 447                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 448                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 449                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 450                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 451                 wfProfileOut( $fname );
 452                 return $text;
 453         }
 454
 455         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 456         {
 457                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 458                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 459
 460                 # this is  the list of separators that should be ignored if they
 461                 # are the last character of an URL but that should be included
 462                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 463                 # in this case, the last comma should not become part of the URL,
 464                 # but in "www.foo.com/123,2342,32.htm" it should.
 465                 $sep = ",;\.:";
 466                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 467                 $images = "gif|png|jpg|jpeg";
 468
 469                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 470                 # they are interpreted as part of the string (used to tell PHP
 471                 # that the content of the string should be inserted there).
 472                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 473                   "((?i){$images})([^{$uc}]|$)/";
 474
 475                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 476                 $sk =& $this->mOptions->getSkin();
 477
 478                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 479                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 480                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 481                 }
 482                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 483                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 484                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 485                   "</a>\\5", $s );
 486                 $s = str_replace( $unique, $protocol, $s );
 487
 488                 $a = explode( "[{$protocol}:", " " . $s );
 489                 $s = array_shift( $a );
 490                 $s = substr( $s, 1 );
 491
 492                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 493                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 494
 495                 foreach ( $a as $line ) {
 496                         if ( preg_match( $e1, $line, $m ) ) {
 497                                 $link = "{$protocol}:{$m[1]}";
 498                                 $trail = $m[2];
 499                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 500                                 else { $text = wfEscapeHTML( $link ); }
 501                         } else if ( preg_match( $e2, $line, $m ) ) {
 502                                 $link = "{$protocol}:{$m[1]}";
 503                                 $text = $m[2];
 504                                 $trail = $m[3];
 505                         } else {
 506                                 $s .= "[{$protocol}:" . $line;
 507                                 continue;
 508                         }
 509                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 510                         else $paren = "";
 511                         $la = $sk->getExternalLinkAttributes( $link, $text );
 512                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 513
 514                 }
 515                 return $s;
 516         }
 517
 518         /* private */ function handle3Quotes( &$state, $token )
 519         {
 520                 if ( $state["strong"] ) {
 521                         if ( $state["em"] && $state["em"] > $state["strong"] )
 522                         {
 523                                 # ''' lala ''lala '''
 524                                 $s = "</em></strong><em>";
 525                         } else {
 526                                 $s = "</strong>";
 527                         }
 528                         $state["strong"] = FALSE;
 529                 } else {
 530                         $s = "<strong>";
 531                         $state["strong"] = $token["pos"];
 532                 }
 533                 return $s;
 534         }
 535
 536         /* private */ function handle2Quotes( &$state, $token )
 537         {
 538                 if ( $state["em"] ) {
 539                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 540                         {
 541                                 # ''lala'''lala'' ....'''
 542                                 $s = "</strong></em><strong>";
 543                         } else {
 544                                 $s = "</em>";
 545                         }
 546                         $state["em"] = FALSE;
 547                 } else {
 548                         $s = "<em>";
 549                         $state["em"] = $token["pos"];
 550                 }
 551                 return $s;
 552         }
 553
 554         /* private */ function handle5Quotes( &$state, $token )
 555         {
 556                 if ( $state["em"] && $state["strong"] ) {
 557                         if ( $state["em"] < $state["strong"] ) {
 558                                 $s .= "</strong></em>";
 559                         } else {
 560                                 $s .= "</em></strong>";
 561                         }
 562                         $state["strong"] = $state["em"] = FALSE;
 563                 } elseif ( $state["em"] ) {
 564                         $s .= "</em><strong>";
 565                         $state["em"] = FALSE;
 566                         $state["strong"] = $token["pos"];
 567                 } elseif ( $state["strong"] ) {
 568                         $s .= "</strong><em>";
 569                         $state["strong"] = FALSE;
 570                         $state["em"] = $token["pos"];
 571                 } else { # not $em and not $strong
 572                         $s .= "<strong><em>";
 573                         $state["strong"] = $state["em"] = $token["pos"];
 574                 }
 575                 return $s;
 576         }
 577
 578         /* private */ function replaceInternalLinks( $str )
 579         {
 580                 global $wgLang; # for language specific parser hook
 581
 582                 $tokenizer=Tokenizer::newFromString( $str );
 583                 $tokenStack = array();
 584
 585                 $s="";
 586                 $state["em"]      = FALSE;
 587                 $state["strong"]  = FALSE;
 588                 $tagIsOpen = FALSE;
 589
 590                 # The tokenizer splits the text into tokens and returns them one by one.
 591                 # Every call to the tokenizer returns a new token.
 592                 while ( $token = $tokenizer->nextToken() )
 593                 {
 594                         switch ( $token["type"] )
 595                         {
 596                                 case "text":
 597                                         # simple text with no further markup
 598                                         $txt = $token["text"];
 599                                         break;
 600                                 case "[[[":
 601                                         # remember the tag opened with 3 [
 602                                         $threeopen = true;
 603                                 case "[[":
 604                                         # link opening tag.
 605                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 606                                         $tagIsOpen = TRUE;
 607                                         array_push( $tokenStack, $token );
 608                                         $txt="";
 609                                         break;
 610
 611                                 case "]]]":
 612                                 case "]]":
 613                                         # link close tag.
 614                                         # get text from stack, glue it together, and call the code to handle a
 615                                         # link
 616
 617                                         if ( count( $tokenStack ) == 0 )
 618                                         {
 619                                                 # stack empty. Found a ]] without an opening [[
 620                                                 $txt = "]]";
 621                                         } else {
 622                                                 $linkText = "";
 623                                                 $lastToken = array_pop( $tokenStack );
 624                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 625                                                 {
 626                                                         if( !empty( $lastToken["text"] ) ) {
 627                                                                 $linkText = $lastToken["text"] . $linkText;
 628                                                         }
 629                                                         $lastToken = array_pop( $tokenStack );
 630                                                 }
 631
 632                                                 $txt = $linkText ."]]";
 633
 634                                                 if( isset( $lastToken["text"] ) ) {
 635                                                         $prefix = $lastToken["text"];
 636                                                 } else {
 637                                                         $prefix = "";
 638                                                 }
 639                                                 $nextToken = $tokenizer->previewToken();
 640                                                 if ( $nextToken["type"] == "text" )
 641                                                 {
 642                                                         # Preview just looks at it. Now we have to fetch it.
 643                                                         $nextToken = $tokenizer->nextToken();
 644                                                         $txt .= $nextToken["text"];
 645                                                 }
 646                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 647
 648                                                 # did the tag start with 3 [ ?
 649                                                 if($threeopen) {
 650                                                         # show the first as text
 651                                                         $txt = "[".$txt;
 652                                                         $threeopen=false;
 653                                                 }
 654
 655                                         }
 656                                         $tagIsOpen = (count( $tokenStack ) != 0);
 657                                         break;
 658                                 case "----":
 659                                         $txt = "\n<hr>\n";
 660                                         break;
 661                                 case "'''":
 662                                         # This and the three next ones handle quotes
 663                                         $txt = $this->handle3Quotes( $state, $token );
 664                                         break;
 665                                 case "''":
 666                                         $txt = $this->handle2Quotes( $state, $token );
 667                                         break;
 668                                 case "'''''":
 669                                         $txt = $this->handle5Quotes( $state, $token );
 670                                         break;
 671                                 case "":
 672                                         # empty token
 673                                         $txt="";
 674                                         break;
 675                                 case "RFC ":
 676                                         if ( $tagIsOpen ) {
 677                                                 $txt = "RFC ";
 678                                         } else {
 679                                                 $txt = $this->doMagicRFC( $tokenizer );
 680                                         }
 681                                         break;
 682                                 case "ISBN ":
 683                                         if ( $tagIsOpen ) {
 684                                                 $txt = "ISBN ";
 685                                         } else {
 686                                                 $txt = $this->doMagicISBN( $tokenizer );
 687                                         }
 688                                         break;
 689                                 default:
 690                                         # Call language specific Hook.
 691                                         $txt = $wgLang->processToken( $token, $tokenStack );
 692                                         if ( NULL == $txt ) {
 693                                                 # An unkown token. Highlight.
 694                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 695                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 696                                         }
 697                                         break;
 698                         }
 699                         # If we're parsing the interior of a link, don't append the interior to $s,
 700                         # but push it to the stack so it can be processed when a ]] token is found.
 701                         if ( $tagIsOpen  && $txt != "" ) {
 702                                 $token["type"] = "text";
 703                                 $token["text"] = $txt;
 704                                 array_push( $tokenStack, $token );
 705                         } else {
 706                                 $s .= $txt;
 707                         }
 708                 } #end while
 709                 if ( count( $tokenStack ) != 0 )
 710                 {
 711                         # still objects on stack. opened [[ tag without closing ]] tag.
 712                         $txt = "";
 713                         while ( $lastToken = array_pop( $tokenStack ) )
 714                         {
 715                                 if ( $lastToken["type"] == "text" )
 716                                 {
 717                                         $txt = $lastToken["text"] . $txt;
 718                                 } else {
 719                                         $txt = $lastToken["type"] . $txt;
 720                                 }
 721                         }
 722                         $s .= $txt;
 723                 }
 724                 return $s;
 725         }
 726
 727         /* private */ function handleInternalLink( $line, $prefix )
 728         {
 729                 global $wgLang, $wgLinkCache;
 730                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 731                 static $fname = "OutputPage::replaceInternalLinks" ;
 732                 wfProfileIn( $fname );
 733
 734                 wfProfileIn( "$fname-setup" );
 735                 static $tc = FALSE;
 736                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 737                 $sk =& $this->mOptions->getSkin();
 738
 739                 # Match a link having the form [[namespace:link|alternate]]trail
 740                 static $e1 = FALSE;
 741                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 742                 # Match the end of a line for a word that's not followed by whitespace,
 743                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 744                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 745                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 746                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 747
 748
 749                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 750                 static $image = FALSE;
 751                 static $special = FALSE;
 752                 static $media = FALSE;
 753                 static $category = FALSE;
 754                 if ( !$image ) { $image = Namespace::getImage(); }
 755                 if ( !$special ) { $special = Namespace::getSpecial(); }
 756                 if ( !$media ) { $media = Namespace::getMedia(); }
 757                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 758
 759                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 760
 761                 wfProfileOut( "$fname-setup" );
 762                 $s = "";
 763
 764                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 765                         $text = $m[2];
 766                         $trail = $m[3];
 767                 } else { # Invalid form; output directly
 768                         $s .= $prefix . "[[" . $line ;
 769                         return $s;
 770                 }
 771
 772                 /* Valid link forms:
 773                 Foobar -- normal
 774                 :Foobar -- override special treatment of prefix (images, language links)
 775                 /Foobar -- convert to CurrentPage/Foobar
 776                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 777                 */
 778                 $c = substr($m[1],0,1);
 779                 $noforce = ($c != ":");
 780                 if( $c == "/" ) { # subpage
 781                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 782                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 783                                 $noslash=$m[1];
 784                         } else {
 785                                 $noslash=substr($m[1],1);
 786                         }
 787                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 788                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 789                                 if( "" == $text ) {
 790                                         $text= $m[1];
 791                                 } # this might be changed for ugliness reasons
 792                         } else {
 793                                 $link = $noslash; # no subpage allowed, use standard link
 794                         }
 795                 } elseif( $noforce ) { # no subpage
 796                         $link = $m[1];
 797                 } else {
 798                         $link = substr( $m[1], 1 );
 799                 }
 800                 if( "" == $text )
 801                         $text = $link;
 802
 803                 $nt = Title::newFromText( $link );
 804                 if( !$nt ) {
 805                         $s .= $prefix . "[[" . $line;
 806                         return $s;
 807                 }
 808                 $ns = $nt->getNamespace();
 809                 $iw = $nt->getInterWiki();
 810                 if( $noforce ) {
 811                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 812                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 813                                 $s .= $prefix . $trail;
 814                                 return $s;
 815                         }
 816                         if( $ns == $image ) {
 817                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 818                                 $wgLinkCache->addImageLinkObj( $nt );
 819                                 return $s;
 820                         }
 821                 }
 822                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 823                     ( strpos( $link, "#" ) == FALSE ) ) {
 824                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 825                         return $s;
 826                 }
 827                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 828                         $t = explode ( ":" , $nt->getText() ) ;
 829                         array_shift ( $t ) ;
 830                         $t = implode ( ":" , $t ) ;
 831                         $t = $wgLang->ucFirst ( $t ) ;
 832 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 833                         $nnt = Title::newFromText ( $category.":".$t ) ;
 834                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 835                         $this->mCategoryLinks[] = $t ;
 836                         $s .= $prefix . $trail ;
 837                         return $s ;
 838                 }
 839                 if( $ns == $media ) {
 840                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 841                         $wgLinkCache->addImageLinkObj( $nt );
 842                         return $s;
 843                 } elseif( $ns == $special ) {
 844                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 845                         return $s;
 846                 }
 847                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 848
 849                 wfProfileOut( $fname );
 850                 return $s;
 851         }
 852
 853         # Some functions here used by doBlockLevels()
 854         #
 855         /* private */ function closeParagraph()
 856         {
 857                 $result = "";
 858                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 859                   0 != strcmp( "", $this->mLastSection ) ) {
 860                         $result = "</" . $this->mLastSection  . ">";
 861                 }
 862                 $this->mLastSection = "";
 863                 return $result."\n";
 864         }
 865         # getCommon() returns the length of the longest common substring
 866         # of both arguments, starting at the beginning of both.
 867         #
 868         /* private */ function getCommon( $st1, $st2 )
 869         {
 870                 $fl = strlen( $st1 );
 871                 $shorter = strlen( $st2 );
 872                 if ( $fl < $shorter ) { $shorter = $fl; }
 873
 874                 for ( $i = 0; $i < $shorter; ++$i ) {
 875                         if ( $st1{$i} != $st2{$i} ) { break; }
 876                 }
 877                 return $i;
 878         }
 879         # These next three functions open, continue, and close the list
 880         # element appropriate to the prefix character passed into them.
 881         #
 882         /* private */ function openList( $char )
 883     {
 884                 $result = $this->closeParagraph();
 885
 886                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 887                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 888                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 889                 else if ( ";" == $char ) {
 890                         $result .= "<dl><dt>";
 891                         $this->mDTopen = true;
 892                 }
 893                 else { $result = "<!-- ERR 1 -->"; }
 894
 895                 return $result;
 896         }
 897
 898         /* private */ function nextItem( $char )
 899         {
 900                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 901                 else if ( ":" == $char || ";" == $char ) {
 902                         $close = "</dd>";
 903                         if ( $this->mDTopen ) { $close = "</dt>"; }
 904                         if ( ";" == $char ) {
 905                                 $this->mDTopen = true;
 906                                 return $close . "<dt>";
 907                         } else {
 908                                 $this->mDTopen = false;
 909                                 return $close . "<dd>";
 910                         }
 911                 }
 912                 return "<!-- ERR 2 -->";
 913         }
 914
 915         /* private */function closeList( $char )
 916         {
 917                 if ( "*" == $char ) { $text = "</li></ul>"; }
 918                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 919                 else if ( ":" == $char ) {
 920                         if ( $this->mDTopen ) {
 921                                 $this->mDTopen = false;
 922                                 $text = "</dt></dl>";
 923                         } else {
 924                                 $text = "</dd></dl>";
 925                         }
 926                 }
 927                 else {  return "<!-- ERR 3 -->"; }
 928                 return $text."\n";
 929         }
 930
 931         /* private */ function doBlockLevels( $text, $linestart )
 932         {
 933                 $fname = "OutputPage::doBlockLevels";
 934                 wfProfileIn( $fname );
 935                 # Parsing through the text line by line.  The main thing
 936                 # happening here is handling of block-level elements p, pre,
 937                 # and making lists from lines starting with * # : etc.
 938                 #
 939                 $a = explode( "\n", $text );
 940                 $text = $lastPref = "";
 941                 $this->mDTopen = $inBlockElem = false;
 942
 943                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 944                 foreach ( $a as $t ) {
 945                         if ( "" != $text ) { $text .= "\n"; }
 946
 947                         $oLine = $t;
 948                         $opl = strlen( $lastPref );
 949                         $npl = strspn( $t, "*#:;" );
 950                         $pref = substr( $t, 0, $npl );
 951                         $pref2 = str_replace( ";", ":", $pref );
 952                         $t = substr( $t, $npl );
 953
 954                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 955                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 956
 957                                 if ( ";" == substr( $pref, -1 ) ) {
 958                                         $cpos = strpos( $t, ":" );
 959                                         if ( ! ( false === $cpos ) ) {
 960                                                 $term = substr( $t, 0, $cpos );
 961                                                 $text .= $term . $this->nextItem( ":" );
 962                                                 $t = substr( $t, $cpos + 1 );
 963                                         }
 964                                 }
 965                         } else if (0 != $npl || 0 != $opl) {
 966                                 $cpl = $this->getCommon( $pref, $lastPref );
 967
 968                                 while ( $cpl < $opl ) {
 969                                         $text .= $this->closeList( $lastPref{$opl-1} );
 970                                         --$opl;
 971                                 }
 972                                 if ( $npl <= $cpl && $cpl > 0 ) {
 973                                         $text .= $this->nextItem( $pref{$cpl-1} );
 974                                 }
 975                                 while ( $npl > $cpl ) {
 976                                         $char = substr( $pref, $cpl, 1 );
 977                                         $text .= $this->openList( $char );
 978
 979                                         if ( ";" == $char ) {
 980                                                 $cpos = strpos( $t, ":" );
 981                                                 if ( ! ( false === $cpos ) ) {
 982                                                         $term = substr( $t, 0, $cpos );
 983                                                         $text .= $term . $this->nextItem( ":" );
 984                                                         $t = substr( $t, $cpos + 1 );
 985                                                 }
 986                                         }
 987                                         ++$cpl;
 988                                 }
 989                                 $lastPref = $pref2;
 990                         }
 991                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 992                                 if ( preg_match(
 993                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 994                                         $text .= $this->closeParagraph();
 995                                         $inBlockElem = true;
 996                                 }
 997                                 if ( ! $inBlockElem ) {
 998                                         if ( " " == $t{0} ) {
 999                                                 $newSection = "pre";
1000                                                 # $t = wfEscapeHTML( $t );
1001                                         }
1002                                         else { $newSection = "p"; }
1003
1004                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
1005                                                 $text .= $this->closeParagraph();
1006                                                 $text .= "<" . $newSection . ">";
1007                                         } else if ( 0 != strcmp( $this->mLastSection,
1008                                           $newSection ) ) {
1009                                                 $text .= $this->closeParagraph();
1010                                                 if ( 0 != strcmp( "p", $newSection ) ) {
1011                                                         $text .= "<" . $newSection . ">";
1012                                                 }
1013                                         }
1014                                         $this->mLastSection = $newSection;
1015                                 }
1016                                 if ( $inBlockElem &&
1017                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1018                                         $inBlockElem = false;
1019                                 }
1020                         }
1021                         $text .= $t;
1022                 }
1023                 while ( $npl ) {
1024                         $text .= $this->closeList( $pref2{$npl-1} );
1025                         --$npl;
1026                 }
1027                 if ( "" != $this->mLastSection ) {
1028                         if ( "p" != $this->mLastSection ) {
1029                                 $text .= "</" . $this->mLastSection . ">";
1030                         }
1031                         $this->mLastSection = "";
1032                 }
1033                 wfProfileOut( $fname );
1034                 return $text;
1035         }
1036
1037         /* private */ function replaceVariables( $text )
1038         {
1039                 global $wgLang, $wgCurOut;
1040                 $fname = "OutputPage::replaceVariables";
1041                 wfProfileIn( $fname );
1042
1043                 $magic = array();
1044
1045                 # Basic variables
1046                 # See Language.php for the definition of each magic word
1047                 # As with sigs, this uses the server's local time -- ensure
1048                 # this is appropriate for your audience!
1049
1050                 $magic[MAG_CURRENTMONTH] = date( "m" );
1051                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1052                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1053                 $magic[MAG_CURRENTDAY] = date("j");
1054                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1055                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1056                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1057
1058                 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1059
1060                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1061                 if ( $mw->match( $text ) ) {
1062                         $v = wfNumberOfArticles();
1063                         $text = $mw->replace( $v, $text );
1064                         if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1065                 }
1066
1067                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1068                 # The callbacks are at the bottom of this file
1069                 $wgCurOut = $this;
1070                 $mw =& MagicWord::get( MAG_MSG );
1071                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1072                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1073
1074                 $mw =& MagicWord::get( MAG_MSGNW );
1075                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1076                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1077
1078                 wfProfileOut( $fname );
1079                 return $text;
1080         }
1081
1082         # Cleans up HTML, removes dangerous tags and attributes
1083         /* private */ function removeHTMLtags( $text )
1084         {
1085                 $fname = "OutputPage::removeHTMLtags";
1086                 wfProfileIn( $fname );
1087                 $htmlpairs = array( # Tags that must be closed
1088                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1089                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1090                         "strike", "strong", "tt", "var", "div", "center",
1091                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1092                         "ruby", "rt" , "rb" , "rp"
1093                 );
1094                 $htmlsingle = array(
1095                         "br", "p", "hr", "li", "dt", "dd"
1096                 );
1097                 $htmlnest = array( # Tags that can be nested--??
1098                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1099                         "dl", "font", "big", "small", "sub", "sup"
1100                 );
1101                 $tabletags = array( # Can only appear inside table
1102                         "td", "th", "tr"
1103                 );
1104
1105                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1106                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1107
1108                 $htmlattrs = $this->getHTMLattrs () ;
1109
1110                 # Remove HTML comments
1111                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1112
1113                 $bits = explode( "<", $text );
1114                 $text = array_shift( $bits );
1115                 $tagstack = array(); $tablestack = array();
1116
1117                 foreach ( $bits as $x ) {
1118                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1119                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1120                           $x, $regs );
1121                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1122                         error_reporting( $prev );
1123
1124                         $badtag = 0 ;
1125                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1126                                 # Check our stack
1127                                 if ( $slash ) {
1128                                         # Closing a tag...
1129                                         if ( ! in_array( $t, $htmlsingle ) &&
1130                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1131                                                 array_push( $tagstack, $ot );
1132                                                 $badtag = 1;
1133                                         } else {
1134                                                 if ( $t == "table" ) {
1135                                                         $tagstack = array_pop( $tablestack );
1136                                                 }
1137                                                 $newparams = "";
1138                                         }
1139                                 } else {
1140                                         # Keep track for later
1141                                         if ( in_array( $t, $tabletags ) &&
1142                                           ! in_array( "table", $tagstack ) ) {
1143                                                 $badtag = 1;
1144                                         } else if ( in_array( $t, $tagstack ) &&
1145                                           ! in_array ( $t , $htmlnest ) ) {
1146                                                 $badtag = 1 ;
1147                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1148                                                 if ( $t == "table" ) {
1149                                                         array_push( $tablestack, $tagstack );
1150                                                         $tagstack = array();
1151                                                 }
1152                                                 array_push( $tagstack, $t );
1153                                         }
1154                                         # Strip non-approved attributes from the tag
1155                                         $newparams = $this->fixTagAttributes($params);
1156
1157                                 }
1158                                 if ( ! $badtag ) {
1159                                         $rest = str_replace( ">", "&gt;", $rest );
1160                                         $text .= "<$slash$t $newparams$brace$rest";
1161                                         continue;
1162                                 }
1163                         }
1164                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1165                 }
1166                 # Close off any remaining tags
1167                 while ( $t = array_pop( $tagstack ) ) {
1168                         $text .= "</$t>\n";
1169                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1170                 }
1171                 wfProfileOut( $fname );
1172                 return $text;
1173         }
1174
1175 /*
1176  *
1177  * This function accomplishes several tasks:
1178  * 1) Auto-number headings if that option is enabled
1179  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1180  * 3) Add a Table of contents on the top for users who have enabled the option
1181  * 4) Auto-anchor headings
1182  *
1183  * It loops through all headlines, collects the necessary data, then splits up the
1184  * string and re-inserts the newly formatted headlines.
1185  *
1186  * */
1187         /* private */ function formatHeadings( $text )
1188         {
1189                 $nh=$this->mOptions->getNumberHeadings();
1190                 $st=$this->mOptions->getShowToc();
1191                 if(!$this->mTitle->userCanEdit()) {
1192                         $es=0;
1193                         $esr=0;
1194                 } else {
1195                         $es=$this->mOptions->getEditSection();
1196                         $esr=$this->mOptions->getEditSectionOnRightClick();
1197                 }
1198
1199                 # Inhibit editsection links if requested in the page
1200                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1201                 if ($esw->matchAndRemove( $text )) {
1202                         $es=0;
1203                 }
1204                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1205                 # do not add TOC
1206                 $mw =& MagicWord::get( MAG_NOTOC );
1207                 if ($mw->matchAndRemove( $text ))
1208                 {
1209                         $st = 0;
1210                 }
1211
1212                 # never add the TOC to the Main Page. This is an entry page that should not
1213                 # be more than 1-2 screens large anyway
1214                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1215
1216                 # We need this to perform operations on the HTML
1217                 $sk =& $this->mOptions->getSkin();
1218
1219                 # Get all headlines for numbering them and adding funky stuff like [edit]
1220                 # links
1221                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1222
1223                 # headline counter
1224                 $c=0;
1225
1226                 # Ugh .. the TOC should have neat indentation levels which can be
1227                 # passed to the skin functions. These are determined here
1228                 $toclevel = 0;
1229                 $toc = "";
1230                 $full = "";
1231                 $head = array();
1232                 foreach($matches[3] as $headline) {
1233                         if($level) { $prevlevel=$level;}
1234                         $level=$matches[1][$c];
1235                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1236
1237                                 $h[$level]=0; // reset when we enter a new level
1238                                 $toc.=$sk->tocIndent($level-$prevlevel);
1239                                 $toclevel+=$level-$prevlevel;
1240
1241                         }
1242                         if(($nh||$st) && $level<$prevlevel) {
1243                                 $h[$level+1]=0; // reset when we step back a level
1244                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1245                                 $toclevel-=$prevlevel-$level;
1246
1247                         }
1248                         $h[$level]++; // count number of headlines for each level
1249
1250                         if($nh||$st) {
1251                                 for($i=1;$i<=$level;$i++) {
1252                                         if($h[$i]) {
1253                                                 if($dot) {$numbering.=".";}
1254                                                 $numbering.=$h[$i];
1255                                                 $dot=1;
1256                                         }
1257                                 }
1258                         }
1259
1260                         // The canonized header is a version of the header text safe to use for links
1261                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1262                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1263                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1264                         $tocline = trim( $canonized_headline );
1265                         $canonized_headline=str_replace('"',"",$canonized_headline);
1266                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1267                         $refer[$c]=$canonized_headline;
1268                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1269                         $refcount[$c]=$refers[$canonized_headline];
1270
1271             // Prepend the number to the heading text
1272
1273                         if($nh||$st) {
1274                                 $tocline=$numbering ." ". $tocline;
1275
1276                                 // Don't number the heading if it is the only one (looks silly)
1277                                 if($nh && count($matches[3]) > 1) {
1278                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1279                                 }
1280                         }
1281
1282                         // Create the anchor for linking from the TOC to the section
1283                         $anchor=$canonized_headline;
1284                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1285                         if($st) {
1286                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1287                         }
1288                         if($es) {
1289                                 $head[$c].=$sk->editSectionLink($c+1);
1290                         }
1291
1292
1293                         // the headline might have a link
1294                         if(preg_match("/(.*)<a(.*)/",$headline, $headlinematches))
1295                         {
1296                                 // if so give an anchor name to the already existent link
1297                                 $headline = $headlinematches[1]
1298                                             ."<a name=\"".$anchor."\" ".$headlinematches[2];
1299                         } else {
1300                                 // else create an anchor link for the headline
1301                                 $headline = "<a name=\"".$anchor."\">"
1302                                             .$headline
1303                                             ."</a>";
1304                         }
1305
1306                         // give headline the correct <h#> tag
1307                         $head[$c].="<h".$level.$matches[2][$c] .$headline."</h".$level.">";
1308
1309                         // Add the edit section link
1310
1311                         if($esr) {
1312                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1313                         }
1314
1315                         $numbering="";
1316                         $c++;
1317                         $dot=0;
1318                 }
1319
1320                 if($st) {
1321                         $toclines=$c;
1322                         $toc.=$sk->tocUnindent($toclevel);
1323                         $toc=$sk->tocTable($toc);
1324                 }
1325
1326                 // split up and insert constructed headlines
1327
1328                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1329                 $i=0;
1330
1331                 foreach($blocks as $block) {
1332                         if(($es) && $c>0 && $i==0) {
1333                             # This is the [edit] link that appears for the top block of text when
1334                                 # section editing is enabled
1335                                 $full.=$sk->editSectionLink(0);
1336                         }
1337                         $full.=$block;
1338                         if($st && $toclines>3 && !$i) {
1339                                 # Let's add a top anchor just in case we want to link to the top of the page
1340                                 $full="<a name=\"top\"></a>".$full.$toc;
1341                         }
1342
1343                         if( !empty( $head[$i] ) ) {
1344                                 $full .= $head[$i];
1345                         }
1346                         $i++;
1347                 }
1348
1349                 return $full;
1350         }
1351
1352         /* private */ function doMagicISBN( &$tokenizer )
1353         {
1354                 global $wgLang;
1355
1356                 # Check whether next token is a text token
1357                 # If yes, fetch it and convert the text into a
1358                 # Special::BookSources link
1359                 $token = $tokenizer->previewToken();
1360                 while ( $token["type"] == "" )
1361                 {
1362                         $tokenizer->nextToken();
1363                         $token = $tokenizer->previewToken();
1364                 }
1365                 if ( $token["type"] == "text" )
1366                 {
1367                         $token = $tokenizer->nextToken();
1368                         $x = $token["text"];
1369                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1370
1371                         $isbn = $blank = "" ;
1372                         while ( " " == $x{0} ) {
1373                                 $blank .= " ";
1374                                 $x = substr( $x, 1 );
1375                         }
1376                         while ( strstr( $valid, $x{0} ) != false ) {
1377                                 $isbn .= $x{0};
1378                                 $x = substr( $x, 1 );
1379                         }
1380                         $num = str_replace( "-", "", $isbn );
1381                         $num = str_replace( " ", "", $num );
1382
1383                         if ( "" == $num ) {
1384                                 $text = "ISBN $blank$x";
1385                         } else {
1386                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1387                                 $text = "<a href=\"" .
1388                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1389                                         "\" class=\"internal\">ISBN $isbn</a>";
1390                                 $text .= $x;
1391                         }
1392                 } else {
1393                         $text = "ISBN ";
1394                 }
1395                 return $text;
1396         }
1397         /* private */ function doMagicRFC( &$tokenizer )
1398         {
1399                 global $wgLang;
1400
1401                 # Check whether next token is a text token
1402                 # If yes, fetch it and convert the text into a
1403                 # link to an RFC source
1404                 $token = $tokenizer->previewToken();
1405                 while ( $token["type"] == "" )
1406                 {
1407                         $tokenizer->nextToken();
1408                         $token = $tokenizer->previewToken();
1409                 }
1410                 if ( $token["type"] == "text" )
1411                 {
1412                         $token = $tokenizer->nextToken();
1413                         $x = $token["text"];
1414                         $valid = "0123456789";
1415
1416                         $rfc = $blank = "" ;
1417                         while ( " " == $x{0} ) {
1418                                 $blank .= " ";
1419                                 $x = substr( $x, 1 );
1420                         }
1421                         while ( strstr( $valid, $x{0} ) != false ) {
1422                                 $rfc .= $x{0};
1423                                 $x = substr( $x, 1 );
1424                         }
1425
1426                         if ( "" == $rfc ) {
1427                                 $text .= "RFC $blank$x";
1428                         } else {
1429                                 $url = wfmsg( "rfcurl" );
1430                                 $url = str_replace( "$1", $rfc, $url);
1431                                 $sk =& $this->mOptions->getSkin();
1432                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1433                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1434                         }
1435                 } else {
1436                         $text = "RFC ";
1437                 }
1438                 return $text;
1439         }
1440
1441         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1442         {
1443                 $this->mOptions = $options;
1444                 $this->mTitle = $title;
1445                 if ( $clearState ) {
1446                         $this->clearState();
1447                 }
1448
1449                 $stripState = false;
1450                 $text = str_replace("\r\n", "\n", $text);
1451                 $text = $this->strip( $text, $stripState, false );
1452                 $text = $this->pstPass2( $text, $user );
1453                 $text = $this->unstrip( $text, $stripState );
1454                 return $text;
1455         }
1456
1457         /* private */ function pstPass2( $text, &$user )
1458         {
1459                 global $wgLang, $wgLocaltimezone;
1460
1461                 # Signatures
1462                 #
1463                 $n = $user->getName();
1464                 $k = $user->getOption( "nickname" );
1465                 if ( "" == $k ) { $k = $n; }
1466                 if(isset($wgLocaltimezone)) {
1467                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1468                 }
1469                 /* Note: this is an ugly timezone hack for the European wikis */
1470                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1471                   " (" . date( "T" ) . ")";
1472                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1473
1474                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1475                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1476                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1477                   Namespace::getUser() ) . ":$n|$k]]", $text );
1478
1479                 # Context links: [[|name]] and [[name (context)|]]
1480                 #
1481                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1482                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1483                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1484                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1485
1486                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1487                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1488                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1489                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1490                                                                                                                 # [[ns:page (cont)|]]
1491                 $context = "";
1492                 $t = $this->mTitle->getText();
1493                 if ( preg_match( $conpat, $t, $m ) ) {
1494                         $context = $m[2];
1495                 }
1496                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1497                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1498                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1499
1500                 if ( "" == $context ) {
1501                         $text = preg_replace( $p2, "[[\\1]]", $text );
1502                 } else {
1503                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1504                 }
1505
1506                 # {{SUBST:xxx}} variables
1507                 #
1508                 $mw =& MagicWord::get( MAG_SUBST );
1509                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1510
1511                 # Trim trailing whitespace
1512                 # MAG_END (__END__) tag allows for trailing
1513                 # whitespace to be deliberately included
1514                 $text = rtrim( $text );
1515                 $mw =& MagicWord::get( MAG_END );
1516                 $mw->matchAndRemove( $text );
1517
1518                 return $text;
1519         }
1520
1521
1522 }
1523
1524 class ParserOutput
1525 {
1526         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1527
1528         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1529                 $containsOldMagic = false )
1530         {
1531                 $this->mText = $text;
1532                 $this->mLanguageLinks = $languageLinks;
1533                 $this->mCategoryLinks = $categoryLinks;
1534                 $this->mContainsOldMagic = $containsOldMagic;
1535         }
1536
1537         function getText() { return $this->mText; }
1538         function getLanguageLinks() { return $this->mLanguageLinks; }
1539         function getCategoryLinks() { return $this->mCategoryLinks; }
1540         function containsOldMagic() { return $this->mContainsOldMagic; }
1541         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1542         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1543         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1544         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1545 }
1546
1547 class ParserOptions
1548 {
1549         # All variables are private
1550         var $mUseTeX;                    # Use texvc to expand <math> tags
1551         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1552         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1553         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1554         var $mAllowExternalImages;       # Allow external images inline
1555         var $mSkin;                      # Reference to the preferred skin
1556         var $mDateFormat;                # Date format index
1557         var $mEditSection;               # Create "edit section" links
1558         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1559         var $mPrintable;                 # Generate printable output
1560         var $mNumberHeadings;            # Automatically number headings
1561         var $mShowToc;                   # Show table of contents
1562
1563         function getUseTeX() { return $this->mUseTeX; }
1564         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1565         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1566         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1567         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1568         function getSkin() { return $this->mSkin; }
1569         function getDateFormat() { return $this->mDateFormat; }
1570         function getEditSection() { return $this->mEditSection; }
1571         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1572         function getPrintable() { return $this->mPrintable; }
1573         function getNumberHeadings() { return $this->mNumberHeadings; }
1574         function getShowToc() { return $this->mShowToc; }
1575
1576         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1577         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1578         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1579         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1580         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1581         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1582         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1583         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1584         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1585         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1586         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1587         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1588
1589         /* static */ function newFromUser( &$user )
1590         {
1591                 $popts = new ParserOptions;
1592                 $popts->initialiseFromUser( &$user );
1593                 return $popts;
1594         }
1595
1596         function initialiseFromUser( &$userInput )
1597         {
1598                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1599
1600                 if ( !$userInput ) {
1601                         $user = new User;
1602                 } else {
1603                         $user =& $userInput;
1604                 }
1605
1606                 $this->mUseTeX = $wgUseTeX;
1607                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1608                 $this->mUseDynamicDates = $wgUseDynamicDates;
1609                 $this->mInterwikiMagic = $wgInterwikiMagic;
1610                 $this->mAllowExternalImages = $wgAllowExternalImages;
1611                 $this->mSkin =& $user->getSkin();
1612                 $this->mDateFormat = $user->getOption( "date" );
1613                 $this->mEditSection = $user->getOption( "editsection" );
1614                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1615                 $this->mPrintable = false;
1616                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1617                 $this->mShowToc = $user->getOption( "showtoc" );
1618         }
1619
1620
1621 }
1622
1623 # Regex callbacks, used in OutputPage::replaceVariables
1624
1625 # Just get rid of the dangerous stuff
1626 # Necessary because replaceVariables is called after removeHTMLtags,
1627 # and message text can come from any user
1628 function wfReplaceMsgVar( $matches ) {
1629         global $wgCurOut, $wgLinkCache;
1630         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1631         $wgLinkCache->suspend();
1632         $text = $wgCurOut->replaceInternalLinks( $text );
1633         $wgLinkCache->resume();
1634         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1635         return $text;
1636 }
1637
1638 # Effective <nowiki></nowiki>
1639 # Not real <nowiki> because this is called after nowiki sections are processed
1640 function wfReplaceMsgnwVar( $matches ) {
1641         global $wgCurOut, $wgLinkCache;
1642         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1643         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1644         return $text;
1645 }
1646
1647
1648
1649 ?>