includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # Globals used:
   6 #    objects:   $wgUser, $wgTitle, $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut, $wgArticle
   7 #
   8 #    query:     $wpPreview
   9 #
  10 #    settings:  $wgUseTex, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic,
  11 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgUseLinkPrefixCombination
  12
  13 class Parser
  14 {
  15         var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
  16
  17         function Parser()
  18         {
  19                 $this->clearState();
  20         }
  21
  22         function clearState()
  23         {
  24                 $this->mOutput = new ParserOutput;
  25                 $this->mAutonumber = 0;
  26                 $this->mLastSection = "";
  27                 $this->mDTopen = false;
  28         }
  29
  30         # First pass--just handle <nowiki> sections, pass the rest off
  31         # to doWikiPass2() which does all the real work.
  32         #
  33         # Returns a ParserOutput
  34         #
  35         function parse( $text, $linestart = true, $clearState = true )
  36         {
  37                 global $wgUseTeX;
  38                 $fname = "Parser::parse";
  39                 wfProfileIn( $fname );
  40                 $unique  = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
  41                 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
  42                 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
  43                 $nwlist = array();
  44                 $nwsecs = 0;
  45                 $mathlist = array();
  46                 $mathsecs = 0;
  47                 $prelist = array ();
  48                 $presecs = 0;
  49                 $stripped = "";
  50                 $stripped2 = "";
  51                 $stripped3 = "";
  52
  53                 if ( $clearState ) {
  54                         $this->clearState();
  55                 }
  56
  57                 # Replace any instances of the placeholders
  58                 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
  59                 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
  60                 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
  61
  62                 while ( "" != $text ) {
  63                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
  64                         $stripped .= $p[0];
  65                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $text = ""; }
  66                         else {
  67                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
  68                                 ++$nwsecs;
  69                                 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
  70                                 $stripped .= $unique . $nwsecs . "s";
  71                                 $text = $q[1];
  72                         }
  73                 }
  74
  75                 if( $wgUseTeX ) {
  76                         while ( "" != $stripped ) {
  77                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
  78                                 $stripped2 .= $p[0];
  79                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped = ""; }
  80                                 else {
  81                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
  82                                         ++$mathsecs;
  83                                         $mathlist[$mathsecs] = renderMath($q[0]);
  84                                         $stripped2 .= $unique2 . $mathsecs . "s";
  85                                         $stripped = $q[1];
  86                                 }
  87                         }
  88                 } else {
  89                         $stripped2 = $stripped;
  90                 }
  91
  92                 while ( "" != $stripped2 ) {
  93                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
  94                         $stripped3 .= $p[0];
  95                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped2 = ""; }
  96                         else {
  97                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
  98                                 ++$presecs;
  99                                 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 100                                 $stripped3 .= $unique3 . $presecs . "s";
 101                                 $stripped2 = $q[1];
 102                         }
 103                 }
 104
 105                 $text = $this->doWikiPass2( $stripped3, $linestart );
 106
 107                 $specialChars = array("\\", "$");
 108                 $escapedChars = array("\\\\", "\\$");
 109
 110                 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
 111                 for ( $i = $presecs; $i >= 1; --$i ) {
 112                         $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
 113                                 $escapedChars, $prelist[$i] ), $text );
 114                 }
 115
 116                 for ( $i = $mathsecs; $i >= 1; --$i ) {
 117                         $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
 118                                 $escapedChars, $mathlist[$i] ), $text );
 119                 }
 120
 121                 for ( $i = $nwsecs; $i >= 1; --$i ) {
 122                         $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
 123                                 $escapedChars, $nwlist[$i] ), $text );
 124                 }
 125
 126                 $this->mOutput->setText( $text );
 127                 wfProfileOut( $fname );
 128                 return $this->mOutput;
 129         }
 130
 131         function categoryMagic ()
 132         {
 133                 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
 134                 if ( !isset ( $wgUseCategoryMagic ) || !$wgUseCategoryMagic ) return ;
 135                 $id = $wgTitle->getArticleID() ;
 136                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 137                 $ti = $wgTitle->getText() ;
 138                 $ti = explode ( ":" , $ti , 2 ) ;
 139                 if ( $cat != $ti[0] ) return "" ;
 140                 $r = "<br break=all>\n" ;
 141
 142                 $articles = array() ;
 143                 $parents = array () ;
 144                 $children = array() ;
 145
 146
 147                 global $wgUser ;
 148                 $sk = $wgUser->getSkin() ;
 149
 150                 $doesexist = false ;
 151                 if ( $doesexist ) {
 152                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 153                 } else {
 154                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 155                 }
 156
 157                 $res = wfQuery ( $sql, DB_READ ) ;
 158                 while ( $x = wfFetchObject ( $res ) )
 159                 {
 160                 #  $t = new Title ;
 161                 #  $t->newFromDBkey ( $x->l_from ) ;
 162                 #  $t = $t->getText() ;
 163                         if ( $doesexist ) {
 164                                 $t = $x->l_from ;
 165                         } else {
 166                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 167                                 if ( $t != "" ) $t .= ":" ;
 168                                 $t .= $x->cur_title ;
 169                         }
 170
 171                         $y = explode ( ":" , $t , 2 ) ;
 172                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 173                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 174                         } else {
 175                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 176                         }
 177                 }
 178                 wfFreeResult ( $res ) ;
 179
 180                 # Children
 181                 if ( count ( $children ) > 0 )
 182                 {
 183                         asort ( $children ) ;
 184                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 185                         $r .= implode ( ", " , $children ) ;
 186                 }
 187
 188                 # Articles
 189                 if ( count ( $articles ) > 0 )
 190                 {
 191                         asort ( $articles ) ;
 192                         $h =  wfMsg( "category_header", $ti[1] );
 193                         $r .= "<h2>{$h}</h2>\n" ;
 194                         $r .= implode ( ", " , $articles ) ;
 195                 }
 196
 197
 198                 return $r ;
 199         }
 200
 201 function getHTMLattrs ()
 202 {
 203                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 204                         "title", "align", "lang", "dir", "width", "height",
 205                         "bgcolor", "clear", /* BR */ "noshade", /* HR */
 206                         "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 207                         /* FONT */ "type", "start", "value", "compact",
 208                         /* For various lists, mostly deprecated but safe */
 209                         "summary", "width", "border", "frame", "rules",
 210                         "cellspacing", "cellpadding", "valign", "char",
 211                         "charoff", "colgroup", "col", "span", "abbr", "axis",
 212                         "headers", "scope", "rowspan", "colspan", /* Tables */
 213                         "id", "class", "name", "style" /* For CSS */
 214                 );
 215 return $htmlattrs ;
 216 }
 217
 218 function fixTagAttributes ( $t )
 219 {
 220         if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 221         $htmlattrs = $this->getHTMLattrs() ;
 222
 223         # Strip non-approved attributes from the tag
 224         $t = preg_replace(
 225                 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 226                 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 227                 $t);
 228         # Strip javascript "expression" from stylesheets. Brute force approach:
 229         # If anythin offensive is found, all attributes of the HTML tag are dropped
 230
 231         if( preg_match(
 232                 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 233                 wfMungeToUtf8( $t ) ) )
 234         {
 235                 $t="";
 236         }
 237
 238         return trim ( $t ) ;
 239 }
 240
 241 function doTableStuff ( $t )
 242 {
 243         $t = explode ( "\n" , $t ) ;
 244         $td = array () ; # Is currently a td tag open?
 245                 $ltd = array () ; # Was it TD or TH?
 246                 $tr = array () ; # Is currently a tr tag open?
 247                 $ltr = array () ; # tr attributes
 248                 foreach ( $t AS $k => $x )
 249                 {
 250                         $x = rtrim ( $x ) ;
 251                         $fc = substr ( $x , 0 , 1 ) ;
 252                         if ( "{|" == substr ( $x , 0 , 2 ) )
 253                         {
 254                                 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 255                                 array_push ( $td , false ) ;
 256                                 array_push ( $ltd , "" ) ;
 257                                 array_push ( $tr , false ) ;
 258                                 array_push ( $ltr , "" ) ;
 259                         }
 260                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 261                         else if ( "|}" == substr ( $x , 0 , 2 ) )
 262                         {
 263                                 $z = "</table>\n" ;
 264                                 $l = array_pop ( $ltd ) ;
 265                                 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 266                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 267                                 array_pop ( $ltr ) ;
 268                                 $t[$k] = $z ;
 269                         }
 270                         /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 271                                         {
 272                                         $z = trim ( substr ( $x , 2 ) ) ;
 273                                         $t[$k] = "<caption>{$z}</caption>\n" ;
 274                                         }*/
 275                         else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 276                         {
 277                                 $x = substr ( $x , 1 ) ;
 278                                 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 279                                 $z = "" ;
 280                                 $l = array_pop ( $ltd ) ;
 281                                 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 282                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 283                                 array_pop ( $ltr ) ;
 284                                 $t[$k] = $z ;
 285                                 array_push ( $tr , false ) ;
 286                                 array_push ( $td , false ) ;
 287                                 array_push ( $ltd , "" ) ;
 288                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 289                         }
 290                         else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 291                         {
 292                                 if ( "|+" == substr ( $x , 0 , 2 ) )
 293                                 {
 294                                         $fc = "+" ;
 295                                         $x = substr ( $x , 1 ) ;
 296                                 }
 297                                 $after = substr ( $x , 1 ) ;
 298                                 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 299                                 $after = explode ( "||" , $after ) ;
 300                                 $t[$k] = "" ;
 301                                 foreach ( $after AS $theline )
 302                                 {
 303                                         $z = "" ;
 304                                         if ( $fc != "+" )
 305                                         {
 306                                                 $tra = array_pop ( $ltr ) ;
 307                                                 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 308                                                 array_push ( $tr , true ) ;
 309                                                 array_push ( $ltr , "" ) ;
 310                                         }
 311
 312                                         $l = array_pop ( $ltd ) ;
 313                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 314                                         if ( $fc == "|" ) $l = "TD" ;
 315                                         else if ( $fc == "!" ) $l = "TH" ;
 316                                         else if ( $fc == "+" ) $l = "CAPTION" ;
 317                                         else $l = "" ;
 318                                         array_push ( $ltd , $l ) ;
 319                                         $y = explode ( "|" , $theline , 2 ) ;
 320                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 321                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 322                                         $t[$k] .= $y ;
 323                                         array_push ( $td , true ) ;
 324                                 }
 325                         }
 326                 }
 327
 328         # Closing open td, tr && table
 329         while ( count ( $td ) > 0 )
 330         {
 331                 if ( array_pop ( $td ) ) $t[] = "</td>" ;
 332                 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 333                 $t[] = "</table>" ;
 334         }
 335
 336         $t = implode ( "\n" , $t ) ;
 337         #               $t = $this->removeHTMLtags( $t );
 338         return $t ;
 339 }
 340
 341         # Well, OK, it's actually about 14 passes.  But since all the
 342         # hard lifting is done inside PHP's regex code, it probably
 343         # wouldn't speed things up much to add a real parser.
 344         #
 345         function doWikiPass2( $text, $linestart )
 346         {
 347                 global $wgUser, $wgLang, $wgUseDynamicDates;
 348                 $fname = "OutputPage::doWikiPass2";
 349                 wfProfileIn( $fname );
 350
 351                 $text = $this->removeHTMLtags( $text );
 352                 $text = $this->replaceVariables( $text );
 353
 354                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 355                 $text = str_replace ( "<HR>", "<hr>", $text );
 356
 357                 $text = $this->doHeadings( $text );
 358                 $text = $this->doBlockLevels( $text, $linestart );
 359
 360                 if($wgUseDynamicDates) {
 361                         global $wgDateFormatter;
 362                         $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
 363                 }
 364
 365                 $text = $this->replaceExternalLinks( $text );
 366                 $text = $this->replaceInternalLinks ( $text );
 367                 $text = $this->doTableStuff ( $text ) ;
 368
 369                 $text = $this->magicISBN( $text );
 370                 $text = $this->magicRFC( $text );
 371                 $text = $this->formatHeadings( $text );
 372
 373                 $sk = $wgUser->getSkin();
 374                 $text = $sk->transformContent( $text );
 375                 $text .= $this->categoryMagic () ;
 376
 377                 wfProfileOut( $fname );
 378                 return $text;
 379         }
 380
 381
 382         /* private */ function doHeadings( $text )
 383         {
 384                 for ( $i = 6; $i >= 1; --$i ) {
 385                         $h = substr( "======", 0, $i );
 386                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 387                           "<h{$i}>\\1</h{$i}>\\2", $text );
 388                 }
 389                 return $text;
 390         }
 391
 392         # Note: we have to do external links before the internal ones,
 393         # and otherwise take great care in the order of things here, so
 394         # that we don't end up interpreting some URLs twice.
 395
 396         /* private */ function replaceExternalLinks( $text )
 397         {
 398                 $fname = "OutputPage::replaceExternalLinks";
 399                 wfProfileIn( $fname );
 400                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 401                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 402                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 403                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 404                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 405                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 406                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 407                 wfProfileOut( $fname );
 408                 return $text;
 409         }
 410
 411         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 412         {
 413                 global $wgUser, $printable;
 414                 global $wgAllowExternalImages;
 415
 416
 417                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 418                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 419
 420                 # this is  the list of separators that should be ignored if they
 421                 # are the last character of an URL but that should be included
 422                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 423                 # in this case, the last comma should not become part of the URL,
 424                 # but in "www.foo.com/123,2342,32.htm" it should.
 425                 $sep = ",;\.:";
 426                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 427                 $images = "gif|png|jpg|jpeg";
 428
 429                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 430                 # they are interpreted as part of the string (used to tell PHP
 431                 # that the content of the string should be inserted there).
 432                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 433                   "((?i){$images})([^{$uc}]|$)/";
 434
 435                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 436                 $sk = $wgUser->getSkin();
 437
 438                 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
 439                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 440                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 441                 }
 442                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 443                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 444                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 445                   "</a>\\5", $s );
 446                 $s = str_replace( $unique, $protocol, $s );
 447
 448                 $a = explode( "[{$protocol}:", " " . $s );
 449                 $s = array_shift( $a );
 450                 $s = substr( $s, 1 );
 451
 452                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 453                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 454
 455                 foreach ( $a as $line ) {
 456                         if ( preg_match( $e1, $line, $m ) ) {
 457                                 $link = "{$protocol}:{$m[1]}";
 458                                 $trail = $m[2];
 459                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 460                                 else { $text = wfEscapeHTML( $link ); }
 461                         } else if ( preg_match( $e2, $line, $m ) ) {
 462                                 $link = "{$protocol}:{$m[1]}";
 463                                 $text = $m[2];
 464                                 $trail = $m[3];
 465                         } else {
 466                                 $s .= "[{$protocol}:" . $line;
 467                                 continue;
 468                         }
 469                         if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 470                         else $paren = "";
 471                         $la = $sk->getExternalLinkAttributes( $link, $text );
 472                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 473
 474                 }
 475                 return $s;
 476         }
 477
 478         /* private */ function handle3Quotes( &$state, $token )
 479         {
 480                 if ( $state["strong"] ) {
 481                         if ( $state["em"] && $state["em"] > $state["strong"] )
 482                         {
 483                                 # ''' lala ''lala '''
 484                                 $s = "</em></strong><em>";
 485                         } else {
 486                                 $s = "</strong>";
 487                         }
 488                         $state["strong"] = FALSE;
 489                 } else {
 490                         $s = "<strong>";
 491                         $state["strong"] = $token["pos"];
 492                 }
 493                 return $s;
 494         }
 495
 496         /* private */ function handle2Quotes( &$state, $token )
 497         {
 498                 if ( $state["em"] ) {
 499                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 500                         {
 501                                 # ''lala'''lala'' ....'''
 502                                 $s = "</strong></em><strong>";
 503                         } else {
 504                                 $s = "</em>";
 505                         }
 506                         $state["em"] = FALSE;
 507                 } else {
 508                         $s = "<em>";
 509                         $state["em"] = $token["pos"];
 510                 }
 511                 return $s;
 512         }
 513
 514         /* private */ function handle5Quotes( &$state, $token )
 515         {
 516                 if ( $state["em"] && $state["strong"] ) {
 517                         if ( $state["em"] < $state["strong"] ) {
 518                                 $s .= "</strong></em>";
 519                         } else {
 520                                 $s .= "</em></strong>";
 521                         }
 522                         $state["strong"] = $state["em"] = FALSE;
 523                 } elseif ( $state["em"] ) {
 524                         $s .= "</em><strong>";
 525                         $state["em"] = FALSE;
 526                         $state["strong"] = $token["pos"];
 527                 } elseif ( $state["strong"] ) {
 528                         $s .= "</strong><em>";
 529                         $state["strong"] = FALSE;
 530                         $state["em"] = $token["pos"];
 531                 } else { # not $em and not $strong
 532                         $s .= "<strong><em>";
 533                         $state["strong"] = $state["em"] = $token["pos"];
 534                 }
 535                 return $s;
 536         }
 537
 538         /* private */ function replaceInternalLinks( $str )
 539         {
 540                 $tokenizer=Tokenizer::newFromString( $str );
 541                 $tokenStack = array();
 542
 543                 $s="";
 544                 $state["em"]      = FALSE;
 545                 $state["strong"]  = FALSE;
 546                 $tagIsOpen = FALSE;
 547
 548                 # The tokenizer splits the text into tokens and returns them one by one.
 549                 # Every call to the tokenizer returns a new token.
 550                 while ( $token = $tokenizer->nextToken() )
 551                 {
 552                         switch ( $token["type"] )
 553                         {
 554                                 case "text":
 555                                         # simple text with no further markup
 556                                         $txt = $token["text"];
 557                                         break;
 558                                 case "[[":
 559                                         # link opening tag.
 560                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 561                                         $tagIsOpen = TRUE;
 562                                         array_push( $tokenStack, $token );
 563                                         $txt="";
 564                                         break;
 565                                 case "]]":
 566                                         # link close tag.
 567                                         # get text from stack, glue it together, and call the code to handle a
 568                                         # link
 569                                         if ( count( $tokenStack ) == 0 )
 570                                         {
 571                                                 # stack empty. Found a ]] without an opening [[
 572                                                 $txt = "]]";
 573                                         } else {
 574                                                 $linkText = "";
 575                                                 $lastToken = array_pop( $tokenStack );
 576                                                 while ( $lastToken["type"] != "[[" )
 577                                                 {
 578                                                         $linkText = $lastToken["text"] . $linkText;
 579                                                         $lastToken = array_pop( $tokenStack );
 580                                                 }
 581                                                 $txt = $linkText ."]]";
 582                                                 $nextToken = $tokenizer->previewToken();
 583                                                 if ( $nextToken["type"] == "text" )
 584                                                 {
 585                                                         # Preview just looks at it. Now we have to fetch it.
 586                                                         $nextToken = $tokenizer->nextToken();
 587                                                         $txt .= $nextToken["text"];
 588                                                 }
 589                                                 $txt = $this->handleInternalLink( $txt );
 590                                                 #$txt = "<font color=\"#00FF00\"><b>&lt;" . $txt . "&gt;</b></font>";
 591                                         }
 592                                         $tagIsOpen = (count( $tokenStack ) != 0);
 593                                         break;
 594                                 case "'''":
 595                                         # This and the three next ones handle quotes
 596                                         $txt = $this->handle3Quotes( $state, $token );
 597                                         break;
 598                                 case "''":
 599                                         $txt = $this->handle2Quotes( $state, $token );
 600                                         break;
 601                                 case "'''''":
 602                                         $txt = $this->handle5Quotes( $state, $token );
 603                                         break;
 604                                 case "":
 605                                         # empty token
 606                                         $txt="";
 607                                         break;
 608                                 default:
 609                                         # An unkown token. Highlight.
 610                                         $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 611                                         $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 612                                         break;
 613                         }
 614                         # If we're parsing the interior of a link, don't append the interior to $s,
 615                         # but push it to the stack so it can be processed when a ]] token is found.
 616                         if ( $tagIsOpen  && $txt != "" ) {
 617                                 $token["type"] = "text";
 618                                 $token["text"] = $txt;
 619                                 array_push( $tokenStack, $token );
 620                         } else {
 621                                 $s .= $txt;
 622                         }
 623                 } #end while
 624                 if ( count( $tokenStack ) != 0 )
 625                 {
 626                         # still objects on stack. opened [[ tag without closing ]] tag.
 627                         $txt = "";
 628                         while ( $lastToken = array_pop( $tokenStack ) )
 629                         {
 630                                 if ( $lastToken["type"] == "text" )
 631                                 {
 632                                         $txt = $lastToken["text"] . $txt;
 633                                 } else {
 634                                         $txt = $lastToken["type"] . $txt;
 635                                 }
 636                         }
 637                         $s .= $txt;
 638                 }
 639                 return $s;
 640         }
 641
 642         /* private */ function handleInternalLink( $line )
 643         {
 644                 global $wgTitle, $wgUser, $wgLang;
 645                 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
 646                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 647                 static $fname = "OutputPage::replaceInternalLinks" ;
 648                 wfProfileIn( $fname );
 649
 650                 wfProfileIn( "$fname-setup" );
 651                 static $tc = FALSE;
 652                 static $sk = FALSE;
 653                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 654                 if ( !$sk ) { $sk = $wgUser->getSkin(); }
 655
 656                 # Match a link having the form [[namespace:link|alternate]]trail
 657                 static $e1 = FALSE;
 658                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 659                 # Match the end of a line for a word that's not followed by whitespace,
 660                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 661                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 662                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 663                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 664
 665
 666                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 667                 static $image = FALSE;
 668                 static $special = FALSE;
 669                 static $media = FALSE;
 670                 static $category = FALSE;
 671                 static $nottalk = "";
 672                 if ( !$image ) { $image = Namespace::getImage(); }
 673                 if ( !$special ) { $special = Namespace::getSpecial(); }
 674                 if ( !$media ) { $media = Namespace::getMedia(); }
 675                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 676                 if ( $nottalk=="" ) { $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() ); }
 677
 678
 679                 wfProfileOut( "$fname-setup" );
 680
 681                 $prefix = $new_prefix;
 682                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $line, $m ) ) {
 683                         $new_prefix = $m[2];
 684                         $line = $m[1];
 685                 } else {
 686                         $new_prefix = "";
 687                 }
 688                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 689                         $text = $m[2];
 690                         $trail = $m[3];
 691                 } else { # Invalid form; output directly
 692                         $s .= $prefix . "[[" . $line ;
 693                         return $s;
 694                 }
 695
 696                 /* Valid link forms:
 697                 Foobar -- normal
 698                 :Foobar -- override special treatment of prefix (images, language links)
 699                 /Foobar -- convert to CurrentPage/Foobar
 700                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 701                 */
 702                 $c = substr($m[1],0,1);
 703                 $noforce = ($c != ":");
 704                 if( $c == "/" ) { # subpage
 705                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 706                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 707                                 $noslash=$m[1];
 708                         } else {
 709                                 $noslash=substr($m[1],1);
 710                         }
 711                         if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
 712                                 $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
 713                                 if( "" == $text ) {
 714                                         $text= $m[1];
 715                                 } # this might be changed for ugliness reasons
 716                         } else {
 717                                 $link = $noslash; # no subpage allowed, use standard link
 718                         }
 719                 } elseif( $noforce ) { # no subpage
 720                         $link = $m[1];
 721                 } else {
 722                         $link = substr( $m[1], 1 );
 723                 }
 724                 if( "" == $text )
 725                         $text = $link;
 726
 727                 $nt = Title::newFromText( $link );
 728                 if( !$nt ) {
 729                         $s .= $prefix . "[[" . $line;
 730                         return $s;
 731                 }
 732                 $ns = $nt->getNamespace();
 733                 $iw = $nt->getInterWiki();
 734                 if( $noforce ) {
 735                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 736                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 737                                 $s .= $prefix . $trail;
 738                                 return $s;
 739                         }
 740                         if( $ns == $image ) {
 741                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 742                                 $wgLinkCache->addImageLinkObj( $nt );
 743                                 return $s;
 744                         }
 745                 }
 746                 if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
 747                     ( strpos( $link, "#" ) == FALSE ) ) {
 748                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 749                         return $s;
 750                 }
 751                 if ( $ns == $category && $wgUseCategoryMagic ) {
 752                         $t = explode ( ":" , $nt->getText() ) ;
 753                         array_shift ( $t ) ;
 754                         $t = implode ( ":" , $t ) ;
 755                         $t = $wgLang->ucFirst ( $t ) ;
 756 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 757                         $nnt = Title::newFromText ( $category.":".$t ) ;
 758                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 759                         $this->mCategoryLinks[] = $t ;
 760                         $s .= $prefix . $trail ;
 761                         return $s ;
 762                 }
 763                 if( $ns == $media ) {
 764                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 765                         $wgLinkCache->addImageLinkObj( $nt );
 766                         return $s;
 767                 } elseif( $ns == $special ) {
 768                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 769                         return $s;
 770                 }
 771                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 772
 773                 wfProfileOut( $fname );
 774                 return $s;
 775         }
 776
 777         # Some functions here used by doBlockLevels()
 778         #
 779         /* private */ function closeParagraph()
 780         {
 781                 $result = "";
 782                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 783                   0 != strcmp( "", $this->mLastSection ) ) {
 784                         $result = "</" . $this->mLastSection  . ">";
 785                 }
 786                 $this->mLastSection = "";
 787                 return $result."\n";
 788         }
 789         # getCommon() returns the length of the longest common substring
 790         # of both arguments, starting at the beginning of both.
 791         #
 792         /* private */ function getCommon( $st1, $st2 )
 793         {
 794                 $fl = strlen( $st1 );
 795                 $shorter = strlen( $st2 );
 796                 if ( $fl < $shorter ) { $shorter = $fl; }
 797
 798                 for ( $i = 0; $i < $shorter; ++$i ) {
 799                         if ( $st1{$i} != $st2{$i} ) { break; }
 800                 }
 801                 return $i;
 802         }
 803         # These next three functions open, continue, and close the list
 804         # element appropriate to the prefix character passed into them.
 805         #
 806         /* private */ function openList( $char )
 807     {
 808                 $result = $this->closeParagraph();
 809
 810                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 811                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 812                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 813                 else if ( ";" == $char ) {
 814                         $result .= "<dl><dt>";
 815                         $this->mDTopen = true;
 816                 }
 817                 else { $result = "<!-- ERR 1 -->"; }
 818
 819                 return $result;
 820         }
 821
 822         /* private */ function nextItem( $char )
 823         {
 824                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 825                 else if ( ":" == $char || ";" == $char ) {
 826                         $close = "</dd>";
 827                         if ( $this->mDTopen ) { $close = "</dt>"; }
 828                         if ( ";" == $char ) {
 829                                 $this->mDTopen = true;
 830                                 return $close . "<dt>";
 831                         } else {
 832                                 $this->mDTopen = false;
 833                                 return $close . "<dd>";
 834                         }
 835                 }
 836                 return "<!-- ERR 2 -->";
 837         }
 838
 839         /* private */function closeList( $char )
 840         {
 841                 if ( "*" == $char ) { $text = "</li></ul>"; }
 842                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 843                 else if ( ":" == $char ) {
 844                         if ( $this->mDTopen ) {
 845                                 $this->mDTopen = false;
 846                                 $text = "</dt></dl>";
 847                         } else {
 848                                 $text = "</dd></dl>";
 849                         }
 850                 }
 851                 else {  return "<!-- ERR 3 -->"; }
 852                 return $text."\n";
 853         }
 854
 855         /* private */ function doBlockLevels( $text, $linestart )
 856         {
 857                 $fname = "OutputPage::doBlockLevels";
 858                 wfProfileIn( $fname );
 859                 # Parsing through the text line by line.  The main thing
 860                 # happening here is handling of block-level elements p, pre,
 861                 # and making lists from lines starting with * # : etc.
 862                 #
 863                 $a = explode( "\n", $text );
 864                 $text = $lastPref = "";
 865                 $this->mDTopen = $inBlockElem = false;
 866
 867                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 868                 foreach ( $a as $t ) {
 869                         if ( "" != $text ) { $text .= "\n"; }
 870
 871                         $oLine = $t;
 872                         $opl = strlen( $lastPref );
 873                         $npl = strspn( $t, "*#:;" );
 874                         $pref = substr( $t, 0, $npl );
 875                         $pref2 = str_replace( ";", ":", $pref );
 876                         $t = substr( $t, $npl );
 877
 878                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 879                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 880
 881                                 if ( ";" == substr( $pref, -1 ) ) {
 882                                         $cpos = strpos( $t, ":" );
 883                                         if ( ! ( false === $cpos ) ) {
 884                                                 $term = substr( $t, 0, $cpos );
 885                                                 $text .= $term . $this->nextItem( ":" );
 886                                                 $t = substr( $t, $cpos + 1 );
 887                                         }
 888                                 }
 889                         } else if (0 != $npl || 0 != $opl) {
 890                                 $cpl = $this->getCommon( $pref, $lastPref );
 891
 892                                 while ( $cpl < $opl ) {
 893                                         $text .= $this->closeList( $lastPref{$opl-1} );
 894                                         --$opl;
 895                                 }
 896                                 if ( $npl <= $cpl && $cpl > 0 ) {
 897                                         $text .= $this->nextItem( $pref{$cpl-1} );
 898                                 }
 899                                 while ( $npl > $cpl ) {
 900                                         $char = substr( $pref, $cpl, 1 );
 901                                         $text .= $this->openList( $char );
 902
 903                                         if ( ";" == $char ) {
 904                                                 $cpos = strpos( $t, ":" );
 905                                                 if ( ! ( false === $cpos ) ) {
 906                                                         $term = substr( $t, 0, $cpos );
 907                                                         $text .= $term . $this->nextItem( ":" );
 908                                                         $t = substr( $t, $cpos + 1 );
 909                                                 }
 910                                         }
 911                                         ++$cpl;
 912                                 }
 913                                 $lastPref = $pref2;
 914                         }
 915                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 916                                 if ( preg_match(
 917                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 918                                         $text .= $this->closeParagraph();
 919                                         $inBlockElem = true;
 920                                 }
 921                                 if ( ! $inBlockElem ) {
 922                                         if ( " " == $t{0} ) {
 923                                                 $newSection = "pre";
 924                                                 # $t = wfEscapeHTML( $t );
 925                                         }
 926                                         else { $newSection = "p"; }
 927
 928                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 929                                                 $text .= $this->closeParagraph();
 930                                                 $text .= "<" . $newSection . ">";
 931                                         } else if ( 0 != strcmp( $this->mLastSection,
 932                                           $newSection ) ) {
 933                                                 $text .= $this->closeParagraph();
 934                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 935                                                         $text .= "<" . $newSection . ">";
 936                                                 }
 937                                         }
 938                                         $this->mLastSection = $newSection;
 939                                 }
 940                                 if ( $inBlockElem &&
 941                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 942                                         $inBlockElem = false;
 943                                 }
 944                         }
 945                         $text .= $t;
 946                 }
 947                 while ( $npl ) {
 948                         $text .= $this->closeList( $pref2{$npl-1} );
 949                         --$npl;
 950                 }
 951                 if ( "" != $this->mLastSection ) {
 952                         if ( "p" != $this->mLastSection ) {
 953                                 $text .= "</" . $this->mLastSection . ">";
 954                         }
 955                         $this->mLastSection = "";
 956                 }
 957                 wfProfileOut( $fname );
 958                 return $text;
 959         }
 960
 961         /* private */ function replaceVariables( $text )
 962         {
 963                 global $wgLang, $wgCurOut;
 964                 $fname = "OutputPage::replaceVariables";
 965                 wfProfileIn( $fname );
 966
 967                 $magic = array();
 968
 969                 # Basic variables
 970                 # See Language.php for the definition of each magic word
 971                 # As with sigs, this uses the server's local time -- ensure
 972                 # this is appropriate for your audience!
 973
 974                 $magic[MAG_CURRENTMONTH] = date( "m" );
 975                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
 976                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
 977                 $magic[MAG_CURRENTDAY] = date("j");
 978                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
 979                 $magic[MAG_CURRENTYEAR] = date( "Y" );
 980                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
 981
 982                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
 983
 984                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
 985                 if ( $mw->match( $text ) ) {
 986                         $v = wfNumberOfArticles();
 987                         $text = $mw->replace( $v, $text );
 988                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
 989                 }
 990
 991                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
 992                 # The callbacks are at the bottom of this file
 993                 $wgCurOut = $this;
 994                 $mw =& MagicWord::get( MAG_MSG );
 995                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
 996                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 997
 998                 $mw =& MagicWord::get( MAG_MSGNW );
 999                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1000                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1001
1002                 wfProfileOut( $fname );
1003                 return $text;
1004         }
1005
1006         # Cleans up HTML, removes dangerous tags and attributes
1007         /* private */ function removeHTMLtags( $text )
1008         {
1009                 $fname = "OutputPage::removeHTMLtags";
1010                 wfProfileIn( $fname );
1011                 $htmlpairs = array( # Tags that must be closed
1012                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1013                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1014                         "strike", "strong", "tt", "var", "div", "center",
1015                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1016                         "ruby", "rt" , "rb" , "rp"
1017                 );
1018                 $htmlsingle = array(
1019                         "br", "p", "hr", "li", "dt", "dd"
1020                 );
1021                 $htmlnest = array( # Tags that can be nested--??
1022                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1023                         "dl", "font", "big", "small", "sub", "sup"
1024                 );
1025                 $tabletags = array( # Can only appear inside table
1026                         "td", "th", "tr"
1027                 );
1028
1029                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1030                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1031
1032                 $htmlattrs = $this->getHTMLattrs () ;
1033
1034                 # Remove HTML comments
1035                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1036
1037                 $bits = explode( "<", $text );
1038                 $text = array_shift( $bits );
1039                 $tagstack = array(); $tablestack = array();
1040
1041                 foreach ( $bits as $x ) {
1042                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1043                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1044                           $x, $regs );
1045                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1046                         error_reporting( $prev );
1047
1048                         $badtag = 0 ;
1049                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1050                                 # Check our stack
1051                                 if ( $slash ) {
1052                                         # Closing a tag...
1053                                         if ( ! in_array( $t, $htmlsingle ) &&
1054                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1055                                                 array_push( $tagstack, $ot );
1056                                                 $badtag = 1;
1057                                         } else {
1058                                                 if ( $t == "table" ) {
1059                                                         $tagstack = array_pop( $tablestack );
1060                                                 }
1061                                                 $newparams = "";
1062                                         }
1063                                 } else {
1064                                         # Keep track for later
1065                                         if ( in_array( $t, $tabletags ) &&
1066                                           ! in_array( "table", $tagstack ) ) {
1067                                                 $badtag = 1;
1068                                         } else if ( in_array( $t, $tagstack ) &&
1069                                           ! in_array ( $t , $htmlnest ) ) {
1070                                                 $badtag = 1 ;
1071                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1072                                                 if ( $t == "table" ) {
1073                                                         array_push( $tablestack, $tagstack );
1074                                                         $tagstack = array();
1075                                                 }
1076                                                 array_push( $tagstack, $t );
1077                                         }
1078                                         # Strip non-approved attributes from the tag
1079                                         $newparams = $this->fixTagAttributes($params);
1080
1081                                 }
1082                                 if ( ! $badtag ) {
1083                                         $rest = str_replace( ">", "&gt;", $rest );
1084                                         $text .= "<$slash$t $newparams$brace$rest";
1085                                         continue;
1086                                 }
1087                         }
1088                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1089                 }
1090                 # Close off any remaining tags
1091                 while ( $t = array_pop( $tagstack ) ) {
1092                         $text .= "</$t>\n";
1093                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1094                 }
1095                 wfProfileOut( $fname );
1096                 return $text;
1097         }
1098
1099 /*
1100  *
1101  * This function accomplishes several tasks:
1102  * 1) Auto-number headings if that option is enabled
1103  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1104  * 3) Add a Table of contents on the top for users who have enabled the option
1105  * 4) Auto-anchor headings
1106  *
1107  * It loops through all headlines, collects the necessary data, then splits up the
1108  * string and re-inserts the newly formatted headlines.
1109  *
1110  * */
1111         /* private */ function formatHeadings( $text )
1112         {
1113                 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1114                 $nh=$wgUser->getOption( "numberheadings" );
1115                 $st=$wgUser->getOption( "showtoc" );
1116                 if(!$wgTitle->userCanEdit()) {
1117                         $es=0;
1118                         $esr=0;
1119                 } else {
1120                         $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1121                         $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1122                 }
1123
1124                 # Inhibit editsection links if requested in the page
1125                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1126                 if ($esw->matchAndRemove( $text )) {
1127                         $es=0;
1128                 }
1129                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1130                 # do not add TOC
1131                 $mw =& MagicWord::get( MAG_NOTOC );
1132                 if ($mw->matchAndRemove( $text ))
1133                 {
1134                         $st = 0;
1135                 }
1136
1137                 # never add the TOC to the Main Page. This is an entry page that should not
1138                 # be more than 1-2 screens large anyway
1139                 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1140
1141                 # We need this to perform operations on the HTML
1142                 $sk=$wgUser->getSkin();
1143
1144                 # Get all headlines for numbering them and adding funky stuff like [edit]
1145                 # links
1146                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1147
1148                 # headline counter
1149                 $c=0;
1150
1151                 # Ugh .. the TOC should have neat indentation levels which can be
1152                 # passed to the skin functions. These are determined here
1153                 foreach($matches[3] as $headline) {
1154                         if($level) { $prevlevel=$level;}
1155                         $level=$matches[1][$c];
1156                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1157
1158                                 $h[$level]=0; // reset when we enter a new level
1159                                 $toc.=$sk->tocIndent($level-$prevlevel);
1160                                 $toclevel+=$level-$prevlevel;
1161
1162                         }
1163                         if(($nh||$st) && $level<$prevlevel) {
1164                                 $h[$level+1]=0; // reset when we step back a level
1165                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1166                                 $toclevel-=$prevlevel-$level;
1167
1168                         }
1169                         $h[$level]++; // count number of headlines for each level
1170
1171                         if($nh||$st) {
1172                                 for($i=1;$i<=$level;$i++) {
1173                                         if($h[$i]) {
1174                                                 if($dot) {$numbering.=".";}
1175                                                 $numbering.=$h[$i];
1176                                                 $dot=1;
1177                                         }
1178                                 }
1179                         }
1180
1181                         // The canonized header is a version of the header text safe to use for links
1182
1183                         $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1184                         $tocline = trim( $canonized_headline );
1185                         $canonized_headline=str_replace('"',"",$canonized_headline);
1186                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1187                         $refer[$c]=$canonized_headline;
1188                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1189                         $refcount[$c]=$refers[$canonized_headline];
1190
1191             // Prepend the number to the heading text
1192
1193                         if($nh||$st) {
1194                                 $tocline=$numbering ." ". $tocline;
1195
1196                                 // Don't number the heading if it is the only one (looks silly)
1197                                 if($nh && count($matches[3]) > 1) {
1198                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1199                                 }
1200                         }
1201
1202                         // Create the anchor for linking from the TOC to the section
1203
1204                         $anchor=$canonized_headline;
1205                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1206                         if($st) {
1207                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1208                         }
1209                         if($es && !isset($wpPreview)) {
1210                                 $head[$c].=$sk->editSectionLink($c+1);
1211                         }
1212
1213                         // Put it all together
1214
1215                         $head[$c].="<h".$level.$matches[2][$c]
1216                          ."<a name=\"".$anchor."\">"
1217                          .$headline
1218                          ."</a>"
1219                          ."</h".$level.">";
1220
1221                         // Add the edit section link
1222
1223                         if($esr && !isset($wpPreview)) {
1224                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1225                         }
1226
1227                         $numbering="";
1228                         $c++;
1229                         $dot=0;
1230                 }
1231
1232                 if($st) {
1233                         $toclines=$c;
1234                         $toc.=$sk->tocUnindent($toclevel);
1235                         $toc=$sk->tocTable($toc);
1236                 }
1237
1238                 // split up and insert constructed headlines
1239
1240                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1241                 $i=0;
1242
1243                 foreach($blocks as $block) {
1244                         if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1245                             # This is the [edit] link that appears for the top block of text when
1246                                 # section editing is enabled
1247                                 $full.=$sk->editSectionLink(0);
1248                         }
1249                         $full.=$block;
1250                         if($st && $toclines>3 && !$i) {
1251                                 # Let's add a top anchor just in case we want to link to the top of the page
1252                                 $full="<a name=\"top\"></a>".$full.$toc;
1253                         }
1254
1255                         $full.=$head[$i];
1256                         $i++;
1257                 }
1258
1259                 return $full;
1260         }
1261
1262         /* private */ function magicISBN( $text )
1263         {
1264                 global $wgLang;
1265
1266                 $a = split( "ISBN ", " $text" );
1267                 if ( count ( $a ) < 2 ) return $text;
1268                 $text = substr( array_shift( $a ), 1);
1269                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1270
1271                 foreach ( $a as $x ) {
1272                         $isbn = $blank = "" ;
1273                         while ( " " == $x{0} ) {
1274                                 $blank .= " ";
1275                                 $x = substr( $x, 1 );
1276                         }
1277                         while ( strstr( $valid, $x{0} ) != false ) {
1278                                 $isbn .= $x{0};
1279                                 $x = substr( $x, 1 );
1280                         }
1281                         $num = str_replace( "-", "", $isbn );
1282                         $num = str_replace( " ", "", $num );
1283
1284                         if ( "" == $num ) {
1285                                 $text .= "ISBN $blank$x";
1286                         } else {
1287                                 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1288                                   "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1289                                 $text .= $x;
1290                         }
1291                 }
1292                 return $text;
1293         }
1294
1295         /* private */ function magicRFC( $text )
1296         {
1297                 return $text;
1298         }
1299
1300
1301 }
1302
1303 class ParserOutput
1304 {
1305         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1306
1307         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1308                 $containsOldMagic = false )
1309         {
1310                 $this->mText = $text;
1311                 $this->mLanguageLinks = $languageLinks;
1312                 $this->mCategoryLinks = $categoryLinks;
1313                 $this->mContainsOldMagic = $containsOldMagic;
1314         }
1315
1316         function getText() { return $this->mText; }
1317         function getLanguageLinks() { return $this->mLanguageLinks; }
1318         function getCategoryLinks() { return $this->mCategoryLinks; }
1319         function containsOldMagic() { return $this->mContainsOldMagic; }
1320         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1321         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1322         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1323         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1324 }
1325
1326 # Regex callbacks, used in OutputPage::replaceVariables
1327
1328 # Just get rid of the dangerous stuff
1329 # Necessary because replaceVariables is called after removeHTMLtags,
1330 # and message text can come from any user
1331 function wfReplaceMsgVar( $matches ) {
1332         global $wgCurOut, $wgLinkCache;
1333         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1334         $wgLinkCache->suspend();
1335         $text = $wgCurOut->replaceInternalLinks( $text );
1336         $wgLinkCache->resume();
1337         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1338         return $text;
1339 }
1340
1341 # Effective <nowiki></nowiki>
1342 # Not real <nowiki> because this is called after nowiki sections are processed
1343 function wfReplaceMsgnwVar( $matches ) {
1344         global $wgCurOut, $wgLinkCache;
1345         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1346         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1347         return $text;
1348 }
1349
1350
1351
1352 ?>