includes/Parser.php

   1 <?php
   2
   3 # Globals used:
   4 #    objects:   $wgUser, $wgTitle, $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut, $wgArticle
   5 #
   6 #    query:     $wpPreview
   7 #
   8 #    settings:  $wgUseTex, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic,
   9 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgUseLinkPrefixCombination
  10
  11 class Parser
  12 {
  13         var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
  14
  15         function Parser()
  16         {
  17                 $this->clearState();
  18         }
  19
  20         function clearState()
  21         {
  22                 $this->mOutput = new ParserOutput;
  23                 $this->mAutonumber = 0;
  24                 $this->mLastSection = "";
  25                 $this->mDTopen = false;
  26         }
  27
  28         # First pass--just handle <nowiki> sections, pass the rest off
  29         # to doWikiPass2() which does all the real work.
  30         #
  31         # Returns a ParserOutput
  32         #
  33         function parse( $text, $linestart = true, $clearState = true )
  34         {
  35                 global $wgUseTeX;
  36                 $fname = "Parser::parse";
  37                 wfProfileIn( $fname );
  38                 $unique  = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
  39                 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
  40                 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
  41                 $nwlist = array();
  42                 $nwsecs = 0;
  43                 $mathlist = array();
  44                 $mathsecs = 0;
  45                 $prelist = array ();
  46                 $presecs = 0;
  47                 $stripped = "";
  48                 $stripped2 = "";
  49                 $stripped3 = "";
  50
  51                 if ( $clearState ) {
  52                         $this->clearState();
  53                 }
  54
  55                 # Replace any instances of the placeholders
  56                 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
  57                 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
  58                 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
  59
  60                 while ( "" != $text ) {
  61                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
  62                         $stripped .= $p[0];
  63                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $text = ""; }
  64                         else {
  65                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
  66                                 ++$nwsecs;
  67                                 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
  68                                 $stripped .= $unique . $nwsecs . "s";
  69                                 $text = $q[1];
  70                         }
  71                 }
  72
  73                 if( $wgUseTeX ) {
  74                         while ( "" != $stripped ) {
  75                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
  76                                 $stripped2 .= $p[0];
  77                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped = ""; }
  78                                 else {
  79                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
  80                                         ++$mathsecs;
  81                                         $mathlist[$mathsecs] = renderMath($q[0]);
  82                                         $stripped2 .= $unique2 . $mathsecs . "s";
  83                                         $stripped = $q[1];
  84                                 }
  85                         }
  86                 } else {
  87                         $stripped2 = $stripped;
  88                 }
  89
  90                 while ( "" != $stripped2 ) {
  91                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
  92                         $stripped3 .= $p[0];
  93                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped2 = ""; }
  94                         else {
  95                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
  96                                 ++$presecs;
  97                                 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
  98                                 $stripped3 .= $unique3 . $presecs . "s";
  99                                 $stripped2 = $q[1];
 100                         }
 101                 }
 102
 103                 $text = $this->doWikiPass2( $stripped3, $linestart );
 104
 105                 $specialChars = array("\\", "$");
 106                 $escapedChars = array("\\\\", "\\$");
 107
 108                 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
 109                 for ( $i = $presecs; $i >= 1; --$i ) {
 110                         $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
 111                                 $escapedChars, $prelist[$i] ), $text );
 112                 }
 113
 114                 for ( $i = $mathsecs; $i >= 1; --$i ) {
 115                         $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
 116                                 $escapedChars, $mathlist[$i] ), $text );
 117                 }
 118
 119                 for ( $i = $nwsecs; $i >= 1; --$i ) {
 120                         $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
 121                                 $escapedChars, $nwlist[$i] ), $text );
 122                 }
 123
 124                 $this->mOutput->setText( $text );
 125                 wfProfileOut( $fname );
 126                 return $this->mOutput;
 127         }
 128
 129         function categoryMagic ()
 130         {
 131                 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
 132                 if ( !isset ( $wgUseCategoryMagic ) || !$wgUseCategoryMagic ) return ;
 133                 $id = $wgTitle->getArticleID() ;
 134                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 135                 $ti = $wgTitle->getText() ;
 136                 $ti = explode ( ":" , $ti , 2 ) ;
 137                 if ( $cat != $ti[0] ) return "" ;
 138                 $r = "<br break=all>\n" ;
 139
 140                 $articles = array() ;
 141                 $parents = array () ;
 142                 $children = array() ;
 143
 144
 145                 global $wgUser ;
 146                 $sk = $wgUser->getSkin() ;
 147
 148                 $doesexist = false ;
 149                 if ( $doesexist ) {
 150                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 151                 } else {
 152                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 153                 }
 154
 155                 $res = wfQuery ( $sql, DB_READ ) ;
 156                 while ( $x = wfFetchObject ( $res ) )
 157                 {
 158                 #  $t = new Title ;
 159                 #  $t->newFromDBkey ( $x->l_from ) ;
 160                 #  $t = $t->getText() ;
 161                         if ( $doesexist ) {
 162                                 $t = $x->l_from ;
 163                         } else {
 164                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 165                                 if ( $t != "" ) $t .= ":" ;
 166                                 $t .= $x->cur_title ;
 167                         }
 168
 169                         $y = explode ( ":" , $t , 2 ) ;
 170                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 171                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 172                         } else {
 173                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 174                         }
 175                 }
 176                 wfFreeResult ( $res ) ;
 177
 178                 # Children
 179                 if ( count ( $children ) > 0 )
 180                 {
 181                         asort ( $children ) ;
 182                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 183                         $r .= implode ( ", " , $children ) ;
 184                 }
 185
 186                 # Articles
 187                 if ( count ( $articles ) > 0 )
 188                 {
 189                         asort ( $articles ) ;
 190                         $h =  wfMsg( "category_header", $ti[1] );
 191                         $r .= "<h2>{$h}</h2>\n" ;
 192                         $r .= implode ( ", " , $articles ) ;
 193                 }
 194
 195
 196                 return $r ;
 197         }
 198
 199 function getHTMLattrs ()
 200 {
 201                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 202                         "title", "align", "lang", "dir", "width", "height",
 203                         "bgcolor", "clear", /* BR */ "noshade", /* HR */
 204                         "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 205                         /* FONT */ "type", "start", "value", "compact",
 206                         /* For various lists, mostly deprecated but safe */
 207                         "summary", "width", "border", "frame", "rules",
 208                         "cellspacing", "cellpadding", "valign", "char",
 209                         "charoff", "colgroup", "col", "span", "abbr", "axis",
 210                         "headers", "scope", "rowspan", "colspan", /* Tables */
 211                         "id", "class", "name", "style" /* For CSS */
 212                 );
 213 return $htmlattrs ;
 214 }
 215
 216 function fixTagAttributes ( $t )
 217 {
 218         if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 219         $htmlattrs = $this->getHTMLattrs() ;
 220
 221         # Strip non-approved attributes from the tag
 222         $t = preg_replace(
 223                 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 224                 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 225                 $t);
 226         # Strip javascript "expression" from stylesheets. Brute force approach:
 227         # If anythin offensive is found, all attributes of the HTML tag are dropped
 228
 229         if( preg_match(
 230                 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 231                 wfMungeToUtf8( $t ) ) )
 232         {
 233                 $t="";
 234         }
 235
 236         return trim ( $t ) ;
 237 }
 238
 239 function doTableStuff ( $t )
 240 {
 241         $t = explode ( "\n" , $t ) ;
 242         $td = array () ; # Is currently a td tag open?
 243                 $ltd = array () ; # Was it TD or TH?
 244                 $tr = array () ; # Is currently a tr tag open?
 245                 $ltr = array () ; # tr attributes
 246                 foreach ( $t AS $k => $x )
 247                 {
 248                         $x = rtrim ( $x ) ;
 249                         $fc = substr ( $x , 0 , 1 ) ;
 250                         if ( "{|" == substr ( $x , 0 , 2 ) )
 251                         {
 252                                 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 253                                 array_push ( $td , false ) ;
 254                                 array_push ( $ltd , "" ) ;
 255                                 array_push ( $tr , false ) ;
 256                                 array_push ( $ltr , "" ) ;
 257                         }
 258                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 259                         else if ( "|}" == substr ( $x , 0 , 2 ) )
 260                         {
 261                                 $z = "</table>\n" ;
 262                                 $l = array_pop ( $ltd ) ;
 263                                 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 264                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 265                                 array_pop ( $ltr ) ;
 266                                 $t[$k] = $z ;
 267                         }
 268                         /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 269                                         {
 270                                         $z = trim ( substr ( $x , 2 ) ) ;
 271                                         $t[$k] = "<caption>{$z}</caption>\n" ;
 272                                         }*/
 273                         else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 274                         {
 275                                 $x = substr ( $x , 1 ) ;
 276                                 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 277                                 $z = "" ;
 278                                 $l = array_pop ( $ltd ) ;
 279                                 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 280                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 281                                 array_pop ( $ltr ) ;
 282                                 $t[$k] = $z ;
 283                                 array_push ( $tr , false ) ;
 284                                 array_push ( $td , false ) ;
 285                                 array_push ( $ltd , "" ) ;
 286                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 287                         }
 288                         else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 289                         {
 290                                 if ( "|+" == substr ( $x , 0 , 2 ) )
 291                                 {
 292                                         $fc = "+" ;
 293                                         $x = substr ( $x , 1 ) ;
 294                                 }
 295                                 $after = substr ( $x , 1 ) ;
 296                                 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 297                                 $after = explode ( "||" , $after ) ;
 298                                 $t[$k] = "" ;
 299                                 foreach ( $after AS $theline )
 300                                 {
 301                                         $z = "" ;
 302                                         if ( $fc != "+" )
 303                                         {
 304                                                 $tra = array_pop ( $ltr ) ;
 305                                                 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 306                                                 array_push ( $tr , true ) ;
 307                                                 array_push ( $ltr , "" ) ;
 308                                         }
 309
 310                                         $l = array_pop ( $ltd ) ;
 311                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 312                                         if ( $fc == "|" ) $l = "TD" ;
 313                                         else if ( $fc == "!" ) $l = "TH" ;
 314                                         else if ( $fc == "+" ) $l = "CAPTION" ;
 315                                         else $l = "" ;
 316                                         array_push ( $ltd , $l ) ;
 317                                         $y = explode ( "|" , $theline , 2 ) ;
 318                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 319                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 320                                         $t[$k] .= $y ;
 321                                         array_push ( $td , true ) ;
 322                                 }
 323                         }
 324                 }
 325
 326         # Closing open td, tr && table
 327         while ( count ( $td ) > 0 )
 328         {
 329                 if ( array_pop ( $td ) ) $t[] = "</td>" ;
 330                 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 331                 $t[] = "</table>" ;
 332         }
 333
 334         $t = implode ( "\n" , $t ) ;
 335         #               $t = $this->removeHTMLtags( $t );
 336         return $t ;
 337 }
 338
 339         # Well, OK, it's actually about 14 passes.  But since all the
 340         # hard lifting is done inside PHP's regex code, it probably
 341         # wouldn't speed things up much to add a real parser.
 342         #
 343         function doWikiPass2( $text, $linestart )
 344         {
 345                 global $wgUser, $wgLang, $wgUseDynamicDates;
 346                 $fname = "OutputPage::doWikiPass2";
 347                 wfProfileIn( $fname );
 348
 349                 $text = $this->removeHTMLtags( $text );
 350                 $text = $this->replaceVariables( $text );
 351
 352                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 353                 $text = str_replace ( "<HR>", "<hr>", $text );
 354
 355                 $text = $this->doAllQuotes( $text );
 356                 $text = $this->doHeadings( $text );
 357                 $text = $this->doBlockLevels( $text, $linestart );
 358
 359                 if($wgUseDynamicDates) {
 360                         global $wgDateFormatter;
 361                         $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
 362                 }
 363
 364                 $text = $this->replaceExternalLinks( $text );
 365                 $text = $this->replaceInternalLinks ( $text );
 366                 $text = $this->doTableStuff ( $text ) ;
 367
 368                 $text = $this->magicISBN( $text );
 369                 $text = $this->magicRFC( $text );
 370                 $text = $this->formatHeadings( $text );
 371
 372                 $sk = $wgUser->getSkin();
 373                 $text = $sk->transformContent( $text );
 374                 $text .= $this->categoryMagic () ;
 375
 376                 wfProfileOut( $fname );
 377                 return $text;
 378         }
 379
 380         /* private */ function doAllQuotes( $text )
 381         {
 382                 $outtext = "";
 383                 $lines = explode( "\r\n", $text );
 384                 foreach ( $lines as $line ) {
 385                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
 386                 }
 387                 return $outtext;
 388         }
 389
 390         /* private */ function doQuotes( $pre, $text, $mode )
 391         {
 392                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 393                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 394                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 395                         if ( substr ($m[2], 0, 1) == "'" ) {
 396                                 $m[2] = substr ($m[2], 1);
 397                                 if ($mode == "em") {
 398                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 399                                 } else if ($mode == "strong") {
 400                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 401                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 402                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 403                                 } else if ($mode == "strongem") {
 404                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 405                                 } else {
 406                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 407                                 }
 408                         } else {
 409                                 if ($mode == "strong") {
 410                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 411                                 } else if ($mode == "em") {
 412                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 413                                 } else if ($mode == "emstrong") {
 414                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 415                                 } else if (($mode == "strongem") || ($mode == "both")) {
 416                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 417                                 } else {
 418                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 419                                 }
 420                         }
 421                 } else {
 422                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 423                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 424                         if ($mode == "") {
 425                                 return $pre . $text;
 426                         } else if ($mode == "em") {
 427                                 return $pre . $text_em;
 428                         } else if ($mode == "strong") {
 429                                 return $pre . $text_strong;
 430                         } else if ($mode == "strongem") {
 431                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 432                         } else {
 433                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 434                         }
 435                 }
 436         }
 437
 438         /* private */ function doHeadings( $text )
 439         {
 440                 for ( $i = 6; $i >= 1; --$i ) {
 441                         $h = substr( "======", 0, $i );
 442                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 443                           "<h{$i}>\\1</h{$i}>\\2", $text );
 444                 }
 445                 return $text;
 446         }
 447
 448         # Note: we have to do external links before the internal ones,
 449         # and otherwise take great care in the order of things here, so
 450         # that we don't end up interpreting some URLs twice.
 451
 452         /* private */ function replaceExternalLinks( $text )
 453         {
 454                 $fname = "OutputPage::replaceExternalLinks";
 455                 wfProfileIn( $fname );
 456                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 457                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 458                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 459                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 460                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 461                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 462                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 463                 wfProfileOut( $fname );
 464                 return $text;
 465         }
 466
 467         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 468         {
 469                 global $wgUser, $printable;
 470                 global $wgAllowExternalImages;
 471
 472
 473                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 474                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 475
 476                 # this is  the list of separators that should be ignored if they
 477                 # are the last character of an URL but that should be included
 478                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 479                 # in this case, the last comma should not become part of the URL,
 480                 # but in "www.foo.com/123,2342,32.htm" it should.
 481                 $sep = ",;\.:";
 482                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 483                 $images = "gif|png|jpg|jpeg";
 484
 485                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 486                 # they are interpreted as part of the string (used to tell PHP
 487                 # that the content of the string should be inserted there).
 488                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 489                   "((?i){$images})([^{$uc}]|$)/";
 490
 491                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 492                 $sk = $wgUser->getSkin();
 493
 494                 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
 495                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 496                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 497                 }
 498                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 499                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 500                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 501                   "</a>\\5", $s );
 502                 $s = str_replace( $unique, $protocol, $s );
 503
 504                 $a = explode( "[{$protocol}:", " " . $s );
 505                 $s = array_shift( $a );
 506                 $s = substr( $s, 1 );
 507
 508                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 509                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 510
 511                 foreach ( $a as $line ) {
 512                         if ( preg_match( $e1, $line, $m ) ) {
 513                                 $link = "{$protocol}:{$m[1]}";
 514                                 $trail = $m[2];
 515                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 516                                 else { $text = wfEscapeHTML( $link ); }
 517                         } else if ( preg_match( $e2, $line, $m ) ) {
 518                                 $link = "{$protocol}:{$m[1]}";
 519                                 $text = $m[2];
 520                                 $trail = $m[3];
 521                         } else {
 522                                 $s .= "[{$protocol}:" . $line;
 523                                 continue;
 524                         }
 525                         if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 526                         else $paren = "";
 527                         $la = $sk->getExternalLinkAttributes( $link, $text );
 528                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 529
 530                 }
 531                 return $s;
 532         }
 533
 534         /* private */ function replaceInternalLinks( $s )
 535         {
 536                 global $wgTitle, $wgUser, $wgLang;
 537                 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
 538                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 539                 wfProfileIn( $fname = "OutputPage::replaceInternalLinks" );
 540
 541                 wfProfileIn( "$fname-setup" );
 542                 $tc = Title::legalChars() . "#";
 543                 $sk = $wgUser->getSkin();
 544
 545                 $a = explode( "[[", " " . $s );
 546                 $s = array_shift( $a );
 547                 $s = substr( $s, 1 );
 548
 549                 # Match a link having the form [[namespace:link|alternate]]trail
 550                 $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD";
 551                 # Match the end of a line for a word that's not followed by whitespace,
 552                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 553                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 554                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 555                 $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 556
 557
 558                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 559                 $image = Namespace::getImage();
 560                 $special = Namespace::getSpecial();
 561                 $media = Namespace::getMedia();
 562                 $category = wfMsg ( "category" ) ;
 563                 $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() );
 564
 565                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 566                         $new_prefix = $m[2];
 567                         $s = $m[1];
 568                 } else {
 569                         $new_prefix="";
 570                 }
 571
 572                 wfProfileOut( "$fname-setup" );
 573
 574                 foreach ( $a as $line ) {
 575                         $prefix = $new_prefix;
 576                         if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $line, $m ) ) {
 577                                 $new_prefix = $m[2];
 578                                 $line = $m[1];
 579                         } else {
 580                                 $new_prefix = "";
 581                         }
 582                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 583                                 $text = $m[2];
 584                                 $trail = $m[3];
 585                         } else { # Invalid form; output directly
 586                                 $s .= $prefix . "[[" . $line ;
 587                                 continue;
 588                         }
 589
 590                         /* Valid link forms:
 591                         Foobar -- normal
 592                         :Foobar -- override special treatment of prefix (images, language links)
 593                         /Foobar -- convert to CurrentPage/Foobar
 594                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 595                         */
 596                         $c = substr($m[1],0,1);
 597                         $noforce = ($c != ":");
 598                         if( $c == "/" ) { # subpage
 599                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 600                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 601                                         $noslash=$m[1];
 602                                 } else {
 603                                         $noslash=substr($m[1],1);
 604                                 }
 605                                 if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
 606                                         $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
 607                                         if( "" == $text ) {
 608                                                 $text= $m[1];
 609                                         } # this might be changed for ugliness reasons
 610                                 } else {
 611                                         $link = $noslash; # no subpage allowed, use standard link
 612                                 }
 613                         } elseif( $noforce ) { # no subpage
 614                                 $link = $m[1];
 615                         } else {
 616                                 $link = substr( $m[1], 1 );
 617                         }
 618                         if( "" == $text )
 619                                 $text = $link;
 620
 621                         $nt = Title::newFromText( $link );
 622                         if( !$nt ) {
 623                                 $s .= $prefix . "[[" . $line;
 624                                 continue;
 625                         }
 626                         $ns = $nt->getNamespace();
 627                         $iw = $nt->getInterWiki();
 628                         if( $noforce ) {
 629                                 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 630                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 631                                         $s .= $prefix . $trail;
 632                                         continue;
 633                                 }
 634                                 if( $ns == $image ) {
 635                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 636                                         $wgLinkCache->addImageLinkObj( $nt );
 637                                         continue;
 638                                 }
 639                         }
 640                         if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
 641                             ( strpos( $link, "#" ) == FALSE ) ) {
 642                                 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 643                                 continue;
 644                         }
 645                         if ( $ns == $category && $wgUseCategoryMagic ) {
 646                           $t = explode ( ":" , $nt->getText() ) ;
 647                                 array_shift ( $t ) ;
 648                                 $t = implode ( ":" , $t ) ;
 649                                 $t = $wgLang->ucFirst ( $t ) ;
 650 #                               $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 651                                 $nnt = Title::newFromText ( $category.":".$t ) ;
 652                                 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 653                                 $this->mCategoryLinks[] = $t ;
 654                                 $s .= $prefix . $trail ;
 655                                 continue ;
 656                         }
 657                         if( $ns == $media ) {
 658                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 659                                 $wgLinkCache->addImageLinkObj( $nt );
 660                                 continue;
 661                         } elseif( $ns == $special ) {
 662                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 663                                 continue;
 664                         }
 665                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 666                 }
 667                 wfProfileOut( $fname );
 668                 return $s;
 669         }
 670
 671         # Some functions here used by doBlockLevels()
 672         #
 673         /* private */ function closeParagraph()
 674         {
 675                 $result = "";
 676                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 677                   0 != strcmp( "", $this->mLastSection ) ) {
 678                         $result = "</" . $this->mLastSection  . ">";
 679                 }
 680                 $this->mLastSection = "";
 681                 return $result."\n";
 682         }
 683         # getCommon() returns the length of the longest common substring
 684         # of both arguments, starting at the beginning of both.
 685         #
 686         /* private */ function getCommon( $st1, $st2 )
 687         {
 688                 $fl = strlen( $st1 );
 689                 $shorter = strlen( $st2 );
 690                 if ( $fl < $shorter ) { $shorter = $fl; }
 691
 692                 for ( $i = 0; $i < $shorter; ++$i ) {
 693                         if ( $st1{$i} != $st2{$i} ) { break; }
 694                 }
 695                 return $i;
 696         }
 697         # These next three functions open, continue, and close the list
 698         # element appropriate to the prefix character passed into them.
 699         #
 700         /* private */ function openList( $char )
 701     {
 702                 $result = $this->closeParagraph();
 703
 704                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 705                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 706                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 707                 else if ( ";" == $char ) {
 708                         $result .= "<dl><dt>";
 709                         $this->mDTopen = true;
 710                 }
 711                 else { $result = "<!-- ERR 1 -->"; }
 712
 713                 return $result;
 714         }
 715
 716         /* private */ function nextItem( $char )
 717         {
 718                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 719                 else if ( ":" == $char || ";" == $char ) {
 720                         $close = "</dd>";
 721                         if ( $this->mDTopen ) { $close = "</dt>"; }
 722                         if ( ";" == $char ) {
 723                                 $this->mDTopen = true;
 724                                 return $close . "<dt>";
 725                         } else {
 726                                 $this->mDTopen = false;
 727                                 return $close . "<dd>";
 728                         }
 729                 }
 730                 return "<!-- ERR 2 -->";
 731         }
 732
 733         /* private */function closeList( $char )
 734         {
 735                 if ( "*" == $char ) { $text = "</li></ul>"; }
 736                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 737                 else if ( ":" == $char ) {
 738                         if ( $this->mDTopen ) {
 739                                 $this->mDTopen = false;
 740                                 $text = "</dt></dl>";
 741                         } else {
 742                                 $text = "</dd></dl>";
 743                         }
 744                 }
 745                 else {  return "<!-- ERR 3 -->"; }
 746                 return $text."\n";
 747         }
 748
 749         /* private */ function doBlockLevels( $text, $linestart )
 750         {
 751                 $fname = "OutputPage::doBlockLevels";
 752                 wfProfileIn( $fname );
 753                 # Parsing through the text line by line.  The main thing
 754                 # happening here is handling of block-level elements p, pre,
 755                 # and making lists from lines starting with * # : etc.
 756                 #
 757                 $a = explode( "\n", $text );
 758                 $text = $lastPref = "";
 759                 $this->mDTopen = $inBlockElem = false;
 760
 761                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 762                 foreach ( $a as $t ) {
 763                         if ( "" != $text ) { $text .= "\n"; }
 764
 765                         $oLine = $t;
 766                         $opl = strlen( $lastPref );
 767                         $npl = strspn( $t, "*#:;" );
 768                         $pref = substr( $t, 0, $npl );
 769                         $pref2 = str_replace( ";", ":", $pref );
 770                         $t = substr( $t, $npl );
 771
 772                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 773                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 774
 775                                 if ( ";" == substr( $pref, -1 ) ) {
 776                                         $cpos = strpos( $t, ":" );
 777                                         if ( ! ( false === $cpos ) ) {
 778                                                 $term = substr( $t, 0, $cpos );
 779                                                 $text .= $term . $this->nextItem( ":" );
 780                                                 $t = substr( $t, $cpos + 1 );
 781                                         }
 782                                 }
 783                         } else if (0 != $npl || 0 != $opl) {
 784                                 $cpl = $this->getCommon( $pref, $lastPref );
 785
 786                                 while ( $cpl < $opl ) {
 787                                         $text .= $this->closeList( $lastPref{$opl-1} );
 788                                         --$opl;
 789                                 }
 790                                 if ( $npl <= $cpl && $cpl > 0 ) {
 791                                         $text .= $this->nextItem( $pref{$cpl-1} );
 792                                 }
 793                                 while ( $npl > $cpl ) {
 794                                         $char = substr( $pref, $cpl, 1 );
 795                                         $text .= $this->openList( $char );
 796
 797                                         if ( ";" == $char ) {
 798                                                 $cpos = strpos( $t, ":" );
 799                                                 if ( ! ( false === $cpos ) ) {
 800                                                         $term = substr( $t, 0, $cpos );
 801                                                         $text .= $term . $this->nextItem( ":" );
 802                                                         $t = substr( $t, $cpos + 1 );
 803                                                 }
 804                                         }
 805                                         ++$cpl;
 806                                 }
 807                                 $lastPref = $pref2;
 808                         }
 809                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 810                                 if ( preg_match(
 811                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 812                                         $text .= $this->closeParagraph();
 813                                         $inBlockElem = true;
 814                                 }
 815                                 if ( ! $inBlockElem ) {
 816                                         if ( " " == $t{0} ) {
 817                                                 $newSection = "pre";
 818                                                 # $t = wfEscapeHTML( $t );
 819                                         }
 820                                         else { $newSection = "p"; }
 821
 822                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 823                                                 $text .= $this->closeParagraph();
 824                                                 $text .= "<" . $newSection . ">";
 825                                         } else if ( 0 != strcmp( $this->mLastSection,
 826                                           $newSection ) ) {
 827                                                 $text .= $this->closeParagraph();
 828                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 829                                                         $text .= "<" . $newSection . ">";
 830                                                 }
 831                                         }
 832                                         $this->mLastSection = $newSection;
 833                                 }
 834                                 if ( $inBlockElem &&
 835                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 836                                         $inBlockElem = false;
 837                                 }
 838                         }
 839                         $text .= $t;
 840                 }
 841                 while ( $npl ) {
 842                         $text .= $this->closeList( $pref2{$npl-1} );
 843                         --$npl;
 844                 }
 845                 if ( "" != $this->mLastSection ) {
 846                         if ( "p" != $this->mLastSection ) {
 847                                 $text .= "</" . $this->mLastSection . ">";
 848                         }
 849                         $this->mLastSection = "";
 850                 }
 851                 wfProfileOut( $fname );
 852                 return $text;
 853         }
 854
 855         /* private */ function replaceVariables( $text )
 856         {
 857                 global $wgLang, $wgCurOut;
 858                 $fname = "OutputPage::replaceVariables";
 859                 wfProfileIn( $fname );
 860
 861                 $magic = array();
 862
 863                 # Basic variables
 864                 # See Language.php for the definition of each magic word
 865                 # As with sigs, this uses the server's local time -- ensure
 866                 # this is appropriate for your audience!
 867
 868                 $magic[MAG_CURRENTMONTH] = date( "m" );
 869                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
 870                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
 871                 $magic[MAG_CURRENTDAY] = date("j");
 872                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
 873                 $magic[MAG_CURRENTYEAR] = date( "Y" );
 874                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
 875
 876                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
 877
 878                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
 879                 if ( $mw->match( $text ) ) {
 880                         $v = wfNumberOfArticles();
 881                         $text = $mw->replace( $v, $text );
 882                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
 883                 }
 884
 885                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
 886                 # The callbacks are at the bottom of this file
 887                 $wgCurOut = $this;
 888                 $mw =& MagicWord::get( MAG_MSG );
 889                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
 890                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 891
 892                 $mw =& MagicWord::get( MAG_MSGNW );
 893                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
 894                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 895
 896                 wfProfileOut( $fname );
 897                 return $text;
 898         }
 899
 900         # Cleans up HTML, removes dangerous tags and attributes
 901         /* private */ function removeHTMLtags( $text )
 902         {
 903                 $fname = "OutputPage::removeHTMLtags";
 904                 wfProfileIn( $fname );
 905                 $htmlpairs = array( # Tags that must be closed
 906                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
 907                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
 908                         "strike", "strong", "tt", "var", "div", "center",
 909                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
 910                         "ruby", "rt" , "rb" , "rp"
 911                 );
 912                 $htmlsingle = array(
 913                         "br", "p", "hr", "li", "dt", "dd"
 914                 );
 915                 $htmlnest = array( # Tags that can be nested--??
 916                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
 917                         "dl", "font", "big", "small", "sub", "sup"
 918                 );
 919                 $tabletags = array( # Can only appear inside table
 920                         "td", "th", "tr"
 921                 );
 922
 923                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
 924                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
 925
 926                 $htmlattrs = $this->getHTMLattrs () ;
 927
 928                 # Remove HTML comments
 929                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
 930
 931                 $bits = explode( "<", $text );
 932                 $text = array_shift( $bits );
 933                 $tagstack = array(); $tablestack = array();
 934
 935                 foreach ( $bits as $x ) {
 936                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
 937                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
 938                           $x, $regs );
 939                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
 940                         error_reporting( $prev );
 941
 942                         $badtag = 0 ;
 943                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
 944                                 # Check our stack
 945                                 if ( $slash ) {
 946                                         # Closing a tag...
 947                                         if ( ! in_array( $t, $htmlsingle ) &&
 948                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
 949                                                 array_push( $tagstack, $ot );
 950                                                 $badtag = 1;
 951                                         } else {
 952                                                 if ( $t == "table" ) {
 953                                                         $tagstack = array_pop( $tablestack );
 954                                                 }
 955                                                 $newparams = "";
 956                                         }
 957                                 } else {
 958                                         # Keep track for later
 959                                         if ( in_array( $t, $tabletags ) &&
 960                                           ! in_array( "table", $tagstack ) ) {
 961                                                 $badtag = 1;
 962                                         } else if ( in_array( $t, $tagstack ) &&
 963                                           ! in_array ( $t , $htmlnest ) ) {
 964                                                 $badtag = 1 ;
 965                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
 966                                                 if ( $t == "table" ) {
 967                                                         array_push( $tablestack, $tagstack );
 968                                                         $tagstack = array();
 969                                                 }
 970                                                 array_push( $tagstack, $t );
 971                                         }
 972                                         # Strip non-approved attributes from the tag
 973                                         $newparams = $this->fixTagAttributes($params);
 974
 975                                 }
 976                                 if ( ! $badtag ) {
 977                                         $rest = str_replace( ">", "&gt;", $rest );
 978                                         $text .= "<$slash$t $newparams$brace$rest";
 979                                         continue;
 980                                 }
 981                         }
 982                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
 983                 }
 984                 # Close off any remaining tags
 985                 while ( $t = array_pop( $tagstack ) ) {
 986                         $text .= "</$t>\n";
 987                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
 988                 }
 989                 wfProfileOut( $fname );
 990                 return $text;
 991         }
 992
 993 /*
 994  *
 995  * This function accomplishes several tasks:
 996  * 1) Auto-number headings if that option is enabled
 997  * 2) Add an [edit] link to sections for logged in users who have enabled the option
 998  * 3) Add a Table of contents on the top for users who have enabled the option
 999  * 4) Auto-anchor headings
1000  *
1001  * It loops through all headlines, collects the necessary data, then splits up the
1002  * string and re-inserts the newly formatted headlines.
1003  *
1004  * */
1005         /* private */ function formatHeadings( $text )
1006         {
1007                 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1008                 $nh=$wgUser->getOption( "numberheadings" );
1009                 $st=$wgUser->getOption( "showtoc" );
1010                 if(!$wgTitle->userCanEdit()) {
1011                         $es=0;
1012                         $esr=0;
1013                 } else {
1014                         $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1015                         $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1016                 }
1017
1018                 # Inhibit editsection links if requested in the page
1019                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1020                 if ($esw->matchAndRemove( $text )) {
1021                         $es=0;
1022                 }
1023                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1024                 # do not add TOC
1025                 $mw =& MagicWord::get( MAG_NOTOC );
1026                 if ($mw->matchAndRemove( $text ))
1027                 {
1028                         $st = 0;
1029                 }
1030
1031                 # never add the TOC to the Main Page. This is an entry page that should not
1032                 # be more than 1-2 screens large anyway
1033                 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1034
1035                 # We need this to perform operations on the HTML
1036                 $sk=$wgUser->getSkin();
1037
1038                 # Get all headlines for numbering them and adding funky stuff like [edit]
1039                 # links
1040                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1041
1042                 # headline counter
1043                 $c=0;
1044
1045                 # Ugh .. the TOC should have neat indentation levels which can be
1046                 # passed to the skin functions. These are determined here
1047                 foreach($matches[3] as $headline) {
1048                         if($level) { $prevlevel=$level;}
1049                         $level=$matches[1][$c];
1050                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1051
1052                                 $h[$level]=0; // reset when we enter a new level
1053                                 $toc.=$sk->tocIndent($level-$prevlevel);
1054                                 $toclevel+=$level-$prevlevel;
1055
1056                         }
1057                         if(($nh||$st) && $level<$prevlevel) {
1058                                 $h[$level+1]=0; // reset when we step back a level
1059                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1060                                 $toclevel-=$prevlevel-$level;
1061
1062                         }
1063                         $h[$level]++; // count number of headlines for each level
1064
1065                         if($nh||$st) {
1066                                 for($i=1;$i<=$level;$i++) {
1067                                         if($h[$i]) {
1068                                                 if($dot) {$numbering.=".";}
1069                                                 $numbering.=$h[$i];
1070                                                 $dot=1;
1071                                         }
1072                                 }
1073                         }
1074
1075                         // The canonized header is a version of the header text safe to use for links
1076
1077                         $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1078                         $tocline = trim( $canonized_headline );
1079                         $canonized_headline=str_replace('"',"",$canonized_headline);
1080                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1081                         $refer[$c]=$canonized_headline;
1082                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1083                         $refcount[$c]=$refers[$canonized_headline];
1084
1085             // Prepend the number to the heading text
1086
1087                         if($nh||$st) {
1088                                 $tocline=$numbering ." ". $tocline;
1089
1090                                 // Don't number the heading if it is the only one (looks silly)
1091                                 if($nh && count($matches[3]) > 1) {
1092                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1093                                 }
1094                         }
1095
1096                         // Create the anchor for linking from the TOC to the section
1097
1098                         $anchor=$canonized_headline;
1099                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1100                         if($st) {
1101                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1102                         }
1103                         if($es && !isset($wpPreview)) {
1104                                 $head[$c].=$sk->editSectionLink($c+1);
1105                         }
1106
1107                         // Put it all together
1108
1109                         $head[$c].="<h".$level.$matches[2][$c]
1110                          ."<a name=\"".$anchor."\">"
1111                          .$headline
1112                          ."</a>"
1113                          ."</h".$level.">";
1114
1115                         // Add the edit section link
1116
1117                         if($esr && !isset($wpPreview)) {
1118                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1119                         }
1120
1121                         $numbering="";
1122                         $c++;
1123                         $dot=0;
1124                 }
1125
1126                 if($st) {
1127                         $toclines=$c;
1128                         $toc.=$sk->tocUnindent($toclevel);
1129                         $toc=$sk->tocTable($toc);
1130                 }
1131
1132                 // split up and insert constructed headlines
1133
1134                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1135                 $i=0;
1136
1137                 foreach($blocks as $block) {
1138                         if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1139                             # This is the [edit] link that appears for the top block of text when
1140                                 # section editing is enabled
1141                                 $full.=$sk->editSectionLink(0);
1142                         }
1143                         $full.=$block;
1144                         if($st && $toclines>3 && !$i) {
1145                                 # Let's add a top anchor just in case we want to link to the top of the page
1146                                 $full="<a name=\"top\"></a>".$full.$toc;
1147                         }
1148
1149                         $full.=$head[$i];
1150                         $i++;
1151                 }
1152
1153                 return $full;
1154         }
1155
1156         /* private */ function magicISBN( $text )
1157         {
1158                 global $wgLang;
1159
1160                 $a = split( "ISBN ", " $text" );
1161                 if ( count ( $a ) < 2 ) return $text;
1162                 $text = substr( array_shift( $a ), 1);
1163                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1164
1165                 foreach ( $a as $x ) {
1166                         $isbn = $blank = "" ;
1167                         while ( " " == $x{0} ) {
1168                                 $blank .= " ";
1169                                 $x = substr( $x, 1 );
1170                         }
1171                         while ( strstr( $valid, $x{0} ) != false ) {
1172                                 $isbn .= $x{0};
1173                                 $x = substr( $x, 1 );
1174                         }
1175                         $num = str_replace( "-", "", $isbn );
1176                         $num = str_replace( " ", "", $num );
1177
1178                         if ( "" == $num ) {
1179                                 $text .= "ISBN $blank$x";
1180                         } else {
1181                                 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1182                                   "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1183                                 $text .= $x;
1184                         }
1185                 }
1186                 return $text;
1187         }
1188
1189         /* private */ function magicRFC( $text )
1190         {
1191                 return $text;
1192         }
1193
1194
1195 }
1196
1197 class ParserOutput
1198 {
1199         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1200
1201         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1202                 $containsOldMagic = false )
1203         {
1204                 $this->mText = $text;
1205                 $this->mLanguageLinks = $languageLinks;
1206                 $this->mCategoryLinks = $categoryLinks;
1207                 $this->mContainsOldMagic = $containsOldMagic;
1208         }
1209
1210         function getText() { return $this->mText; }
1211         function getLanguageLinks() { return $this->mLanguageLinks; }
1212         function getCategoryLinks() { return $this->mCategoryLinks; }
1213         function containsOldMagic() { return $this->mContainsOldMagic; }
1214         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1215         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1216         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1217         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1218 }
1219
1220 # Regex callbacks, used in OutputPage::replaceVariables
1221
1222 # Just get rid of the dangerous stuff
1223 # Necessary because replaceVariables is called after removeHTMLtags,
1224 # and message text can come from any user
1225 function wfReplaceMsgVar( $matches ) {
1226         global $wgCurOut, $wgLinkCache;
1227         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1228         $wgLinkCache->suspend();
1229         $text = $wgCurOut->replaceInternalLinks( $text );
1230         $wgLinkCache->resume();
1231         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1232         return $text;
1233 }
1234
1235 # Effective <nowiki></nowiki>
1236 # Not real <nowiki> because this is called after nowiki sections are processed
1237 function wfReplaceMsgnwVar( $matches ) {
1238         global $wgCurOut, $wgLinkCache;
1239         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1240         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1241         return $text;
1242 }
1243
1244
1245
1246 ?>