includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80                 $this->mInPre = false;
  81                 $this->mInNowiki = false;
  82         }
  83
  84         # First pass--just handle <nowiki> sections, pass the rest off
  85         # to internalParse() which does all the real work.
  86         #
  87         # Returns a ParserOutput
  88         #
  89         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  90         {
  91                 global $wgUseTidy;
  92                 $fname = "Parser::parse";
  93                 wfProfileIn( $fname );
  94
  95                 if ( $clearState ) {
  96                         $this->clearState();
  97                 }
  98
  99                 $this->mOptions = $options;
 100                 $this->mTitle =& $title;
 101                 $this->mOutputType = OT_HTML;
 102
 103                 $stripState = NULL;
 104                 $text = $this->strip( $text, $this->mStripState );
 105                 $text = $this->internalParse( $text, $linestart );
 106                 $text = $this->unstrip( $text, $this->mStripState );
 107                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 108                 if(!$wgUseTidy) {
 109                         $fixtags = array(
 110                                 # french spaces, last one Guillemet-left
 111                                 # only if there is something before the space
 112                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 113                                 # french spaces, Guillemet-right
 114                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 115                                 "/<hr *>/i" => '<hr />',
 116                                 "/<br *>/i" => '<br />',
 117                                 "/<center *>/i"=>'<div class="center">',
 118                                 "/<\\/center *>/i" => '</div>',
 119                                 # Clean up spare ampersands; note that we probably ought to be
 120                                 # more careful about named entities.
 121                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 122                         );
 123                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 124                 } else {
 125                         $fixtags = array(
 126                                 # french spaces, last one Guillemet-left
 127                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 128                                 # french spaces, Guillemet-right
 129                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 130                                 "/<center *>/i"=>'<div class="center">',
 131                                 "/<\\/center *>/i" => '</div>'
 132                         );
 133                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 134                 }
 135                 # only once and last
 136                 $text = $this->doBlockLevels( $text, $linestart );
 137                 if($wgUseTidy) {
 138                         $text = $this->tidy($text);
 139                 }
 140                 $this->mOutput->setText( $text );
 141                 wfProfileOut( $fname );
 142                 return $this->mOutput;
 143         }
 144
 145         /* static */ function getRandomString()
 146         {
 147                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 148         }
 149
 150         # Replaces all occurrences of <$tag>content</$tag> in the text
 151         # with a random marker and returns the new text. the output parameter
 152         # $content will be an associative array filled with data on the form
 153         # $unique_marker => content.
 154
 155         # If $content is already set, the additional entries will be appended
 156
 157         # If $tag is set to STRIP_COMMENTS, the function will extract
 158         # <!-- HTML comments -->
 159
 160         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 161                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 162                 if ( !$content ) {
 163                         $content = array( );
 164                 }
 165                 $n = 1;
 166                 $stripped = "";
 167
 168                 while ( "" != $text ) {
 169                         if($tag==STRIP_COMMENTS) {
 170                                 $p = preg_split( "/<!--/i", $text, 2 );
 171                         } else {
 172                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 173                         }
 174                         $stripped .= $p[0];
 175                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 176                                 $text = "";
 177                         } else {
 178                                 if($tag==STRIP_COMMENTS) {
 179                                         $q = preg_split( "/-->/i", $p[1], 2 );
 180                                 } else {
 181                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 182                                 }
 183                                 $marker = $rnd . sprintf("%08X", $n++);
 184                                 $content[$marker] = $q[0];
 185                                 $stripped .= $marker;
 186                                 $text = $q[1];
 187                         }
 188                 }
 189                 return $stripped;
 190         }
 191
 192         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 193         # If $render is set, performs necessary rendering operations on plugins
 194         # Returns the text, and fills an array with data needed in unstrip()
 195         # If the $state is already a valid strip state, it adds to the state
 196
 197         # When $stripcomments is set, HTML comments <!-- like this -->
 198         # will be stripped in addition to other tags. This is important
 199         # for section editing, where these comments cause confusion when
 200         # counting the sections in the wikisource
 201         function strip( $text, &$state, $stripcomments = false )
 202         {
 203                 $render = ($this->mOutputType == OT_HTML);
 204                 $nowiki_content = array();
 205                 $hiero_content = array();
 206                 $timeline_content = array();
 207                 $math_content = array();
 208                 $pre_content = array();
 209                 $comment_content = array();
 210
 211                 # Replace any instances of the placeholders
 212                 $uniq_prefix = UNIQ_PREFIX;
 213                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 214
 215                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 216                 foreach( $nowiki_content as $marker => $content ){
 217                         if( $render ){
 218                                 //# use span to mark nowiki areas, note the trailing whitespace in span to avoid collisions with other spans
 219                                 //$nowiki_content[$marker] = '<span class="nowiki">'.wfEscapeHTMLTagsOnly( $content )."</span  >";
 220                                 $nowiki_content[$marker] = $content;
 221                         } else {
 222                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 223                         }
 224                 }
 225
 226                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 227                 foreach( $hiero_content as $marker => $content ){
 228                         if( $render ) {
 229                                 if( $GLOBALS['wgUseWikiHiero'] ) {
 230                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 231                                 } else {
 232                                         $hiero_content[$marker] = "&lt;hiero&gt;$content&lt;/hiero&gt;";
 233                                 }
 234                         } else {
 235                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 236                         }
 237                 }
 238
 239                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 240                 foreach( $timeline_content as $marker => $content ){
 241                         if( $render ) {
 242                                 if( $render && $GLOBALS['wgUseTimeline']){
 243                                         $timeline_content[$marker] = renderTimeline( $content );
 244                                 } else {
 245                                         $timeline_content[$marker] = "&lt;timeline&gt;$content&lt;/timeline&gt;";
 246                                 }
 247                         } else {
 248                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 249                         }
 250                 }
 251
 252                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 253                 foreach( $math_content as $marker => $content ){
 254                         if( $render ) {
 255                                 if( $this->mOptions->getUseTeX() ) {
 256                                         $math_content[$marker] = renderMath( $content );
 257                                 } else {
 258                                         $math_content[$marker] = "&lt;math&gt;$content&lt;/math&gt;";
 259                                 }
 260                         } else {
 261                                 $math_content[$marker] = "<math>$content</math>";
 262                         }
 263                 }
 264
 265                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 266                 foreach( $pre_content as $marker => $content ){
 267                         if( $render ) {
 268                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 269                         } else {
 270                                 $pre_content[$marker] = "<pre>$content</pre>";
 271                         }
 272                 }
 273                 if($stripcomments) {
 274                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 275                         foreach( $comment_content as $marker => $content ){
 276                                 $comment_content[$marker] = "<!--$content-->";
 277                         }
 278                 }
 279
 280                 # Merge state with the pre-existing state, if there is one
 281                 if ( $state ) {
 282                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 283                         $state['hiero'] = $state['hiero'] + $hiero_content;
 284                         $state['timeline'] = $state['timeline'] + $timeline_content;
 285                         $state['math'] = $state['math'] + $math_content;
 286                         $state['pre'] = $state['pre'] + $pre_content;
 287                         $state['comment'] = $state['comment'] + $comment_content;
 288                 } else {
 289                         $state = array(
 290                           'nowiki' => $nowiki_content,
 291                           'hiero' => $hiero_content,
 292                           'timeline' => $timeline_content,
 293                           'math' => $math_content,
 294                           'pre' => $pre_content,
 295                           'comment' => $comment_content
 296                         );
 297                 }
 298                 return $text;
 299         }
 300
 301         function unstrip( $text, &$state )
 302         {
 303                 # Must expand in reverse order, otherwise nested tags will be corrupted
 304                 $contentDict = end( $state );
 305                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 306                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 307                                 $text = str_replace( key( $contentDict ), $content, $text );
 308                         }
 309                 }
 310
 311                 return $text;
 312         }
 313
 314         # Add an item to the strip state
 315         # Returns the unique tag which must be inserted into the stripped text
 316         # The tag will be replaced with the original text in unstrip()
 317
 318         function insertStripItem( $text, &$state )
 319         {
 320                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 321                 if ( !$state ) {
 322                         $state = array(
 323                           'nowiki' => array(),
 324                           'hiero' => array(),
 325                           'math' => array(),
 326                           'pre' => array()
 327                         );
 328                 }
 329                 $state['item'][$rnd] = $text;
 330                 return $rnd;
 331         }
 332
 333         # This method generates the list of subcategories and pages for a category
 334         function categoryMagic ()
 335         {
 336                 global $wgLang , $wgUser ;
 337                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 338
 339                 $cns = Namespace::getCategory() ;
 340                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 341
 342                 $r = "<br style=\"clear:both;\"/>\n";
 343
 344
 345                 $sk =& $wgUser->getSkin() ;
 346
 347                 $articles = array() ;
 348                 $children = array() ;
 349                 $data = array () ;
 350                 $id = $this->mTitle->getArticleID() ;
 351
 352                 # FIXME: add limits
 353                 $t = wfStrencode( $this->mTitle->getDBKey() );
 354                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 355                 $res = wfQuery ( $sql, DB_READ ) ;
 356                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 357
 358                 # For all pages that link to this category
 359                 foreach ( $data AS $x )
 360                 {
 361                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 362                         if ( $t != "" ) $t .= ":" ;
 363                         $t .= $x->cur_title ;
 364
 365                         if ( $x->cur_namespace == $cns ) {
 366                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 367                         } else {
 368                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 369                         }
 370                 }
 371                 wfFreeResult ( $res ) ;
 372
 373                 # Showing subcategories
 374                 if ( count ( $children ) > 0 ) {
 375                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 376                         $r .= implode ( ", " , $children ) ;
 377                 }
 378
 379                 # Showing pages in this category
 380                 if ( count ( $articles ) > 0 ) {
 381                         $ti = $this->mTitle->getText() ;
 382                         $h =  wfMsg( "category_header", $ti );
 383                         $r .= "<h2>{$h}</h2>\n" ;
 384                         $r .= implode ( ", " , $articles ) ;
 385                 }
 386
 387
 388                 return $r ;
 389         }
 390
 391         function getHTMLattrs ()
 392         {
 393                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 394                                 "title", "align", "lang", "dir", "width", "height",
 395                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 396                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 397                                 /* FONT */ "type", "start", "value", "compact",
 398                                 /* For various lists, mostly deprecated but safe */
 399                                 "summary", "width", "border", "frame", "rules",
 400                                 "cellspacing", "cellpadding", "valign", "char",
 401                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 402                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 403                                 "id", "class", "name", "style" /* For CSS */
 404                                 );
 405                 return $htmlattrs ;
 406         }
 407
 408         function fixTagAttributes ( $t )
 409         {
 410                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 411                 $htmlattrs = $this->getHTMLattrs() ;
 412
 413                 # Strip non-approved attributes from the tag
 414                 $t = preg_replace(
 415                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 416                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 417                         $t);
 418                 # Strip javascript "expression" from stylesheets. Brute force approach:
 419                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 420
 421                 if( preg_match(
 422                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 423                         wfMungeToUtf8( $t ) ) )
 424                 {
 425                         $t="";
 426                 }
 427
 428                 return trim ( $t ) ;
 429         }
 430
 431         /* interface with html tidy, used if $wgUseTidy = true */
 432         function tidy ( $text ) {
 433                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 434                 global $wgInputEncoding, $wgOutputEncoding;
 435                 $fname = "Parser::tidy";
 436                 wfProfileIn( $fname );
 437
 438                 $cleansource = '';
 439                 switch(strtoupper($wgOutputEncoding)) {
 440                         case 'ISO-8859-1':
 441                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 442                                 break;
 443                         case 'UTF-8':
 444                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 445                                 break;
 446                         default:
 447                                 $wgTidyOpts .= ' -raw';
 448                         }
 449
 450                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 451 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 452 '<head><title>test</title></head><body>'.$text.'</body></html>';
 453                 $descriptorspec = array(
 454                         0 => array("pipe", "r"),
 455                         1 => array("pipe", "w"),
 456                         2 => array("file", "/dev/null", "a")
 457                 );
 458                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 459                 if (is_resource($process)) {
 460                         fwrite($pipes[0], $wrappedtext);
 461                         fclose($pipes[0]);
 462                         while (!feof($pipes[1])) {
 463                                 $cleansource .= fgets($pipes[1], 1024);
 464                         }
 465                         fclose($pipes[1]);
 466                         $return_value = proc_close($process);
 467                 }
 468
 469                 wfProfileOut( $fname );
 470
 471                 if( $cleansource == '' && $text != '') {
 472                         wfDebug( "Tidy error detected!\n" );
 473                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 474                 } else {
 475                         return $cleansource;
 476                 }
 477         }
 478
 479         function doTableStuff ( $t )
 480         {
 481                 $t = explode ( "\n" , $t ) ;
 482                 $td = array () ; # Is currently a td tag open?
 483                         $ltd = array () ; # Was it TD or TH?
 484                         $tr = array () ; # Is currently a tr tag open?
 485                         $ltr = array () ; # tr attributes
 486                         foreach ( $t AS $k => $x )
 487                         {
 488                                 $x = trim ( $x ) ;
 489                                 $fc = substr ( $x , 0 , 1 ) ;
 490                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 491                                 {
 492                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 493                                         array_push ( $td , false ) ;
 494                                         array_push ( $ltd , "" ) ;
 495                                         array_push ( $tr , false ) ;
 496                                         array_push ( $ltr , "" ) ;
 497                                 }
 498                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 499                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 500                                 {
 501                                         $z = "</table>\n" ;
 502                                         $l = array_pop ( $ltd ) ;
 503                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 504                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 505                                         array_pop ( $ltr ) ;
 506                                         $t[$k] = $z ;
 507                                 }
 508                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 509                                                 {
 510                                                 $z = trim ( substr ( $x , 2 ) ) ;
 511                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 512                                                 }*/
 513                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 514                                 {
 515                                         $x = substr ( $x , 1 ) ;
 516                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 517                                         $z = "" ;
 518                                         $l = array_pop ( $ltd ) ;
 519                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 520                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 521                                         array_pop ( $ltr ) ;
 522                                         $t[$k] = $z ;
 523                                         array_push ( $tr , false ) ;
 524                                         array_push ( $td , false ) ;
 525                                         array_push ( $ltd , "" ) ;
 526                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 527                                 }
 528                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 529                                 {
 530                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 531                                         {
 532                                                 $fc = "+" ;
 533                                                 $x = substr ( $x , 1 ) ;
 534                                         }
 535                                         $after = substr ( $x , 1 ) ;
 536                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 537                                         $after = explode ( "||" , $after ) ;
 538                                         $t[$k] = "" ;
 539                                         foreach ( $after AS $theline )
 540                                         {
 541                                                 $z = "" ;
 542                                                 if ( $fc != "+" )
 543                                                 {
 544                                                         $tra = array_pop ( $ltr ) ;
 545                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 546                                                         array_push ( $tr , true ) ;
 547                                                         array_push ( $ltr , "" ) ;
 548                                                 }
 549
 550                                                 $l = array_pop ( $ltd ) ;
 551                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 552                                                 if ( $fc == "|" ) $l = "td" ;
 553                                                 else if ( $fc == "!" ) $l = "th" ;
 554                                                 else if ( $fc == "+" ) $l = "caption" ;
 555                                                 else $l = "" ;
 556                                                 array_push ( $ltd , $l ) ;
 557                                                 $y = explode ( "|" , $theline , 2 ) ;
 558                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 559                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 560                                                 $t[$k] .= $y ;
 561                                                 array_push ( $td , true ) ;
 562                                         }
 563                                 }
 564                         }
 565
 566                 # Closing open td, tr && table
 567                 while ( count ( $td ) > 0 )
 568                 {
 569                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 570                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 571                         $t[] = "</table>" ;
 572                 }
 573
 574                 $t = implode ( "\n" , $t ) ;
 575                 #               $t = $this->removeHTMLtags( $t );
 576                 return $t ;
 577         }
 578
 579         # Parses the text and adds the result to the strip state
 580         # Returns the strip tag
 581         function stripParse( $text, $newline, $args )
 582         {
 583                 $text = $this->strip( $text, $this->mStripState );
 584                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 585                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 586         }
 587
 588         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 589         {
 590                 $fname = "Parser::internalParse";
 591                 wfProfileIn( $fname );
 592
 593                 $text = $this->removeHTMLtags( $text );
 594                 $text = $this->replaceVariables( $text, $args );
 595
 596                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 597
 598                 $text = $this->doHeadings( $text );
 599                 if($this->mOptions->getUseDynamicDates()) {
 600                         global $wgDateFormatter;
 601                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 602                 }
 603                 $text = $this->doAllQuotes( $text );
 604                 $text = $this->replaceExternalLinks( $text );
 605                 $text = $this->replaceInternalLinks ( $text );
 606                 $text = $this->replaceInternalLinks ( $text );
 607                 //$text = $this->doTokenizedParser ( $text );
 608                 $text = $this->doTableStuff ( $text ) ;
 609                 $text = $this->magicISBN( $text );
 610                 $text = $this->magicRFC( $text );
 611                 $text = $this->formatHeadings( $text, $isMain );
 612                 $sk =& $this->mOptions->getSkin();
 613                 $text = $sk->transformContent( $text );
 614
 615                 if ( !isset ( $this->categoryMagicDone ) ) {
 616                         $text .= $this->categoryMagic () ;
 617                         $this->categoryMagicDone = true ;
 618                 }
 619
 620                 wfProfileOut( $fname );
 621                 return $text;
 622         }
 623
 624
 625         /* private */ function doHeadings( $text )
 626         {
 627                 for ( $i = 6; $i >= 1; --$i ) {
 628                         $h = substr( "======", 0, $i );
 629                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 630                           "<h{$i}>\\1</h{$i}>\\2", $text );
 631                 }
 632                 return $text;
 633         }
 634
 635         /* private */ function doAllQuotes( $text )
 636         {
 637                 $outtext = "";
 638                 $lines = explode( "\n", $text );
 639                 foreach ( $lines as $line ) {
 640                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 641                 }
 642                 return substr($outtext, 0,-1);
 643         }
 644
 645         /* private */ function doQuotes( $pre, $text, $mode )
 646         {
 647                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 648                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 649                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 650                         if ( substr ($m[2], 0, 1) == "'" ) {
 651                                 $m[2] = substr ($m[2], 1);
 652                                 if ($mode == "em") {
 653                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 654                                 } else if ($mode == "strong") {
 655                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 656                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 657                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 658                                 } else if ($mode == "strongem") {
 659                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 660                                 } else {
 661                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 662                                 }
 663                         } else {
 664                                 if ($mode == "strong") {
 665                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 666                                 } else if ($mode == "em") {
 667                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 668                                 } else if ($mode == "emstrong") {
 669                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 670                                 } else if (($mode == "strongem") || ($mode == "both")) {
 671                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 672                                 } else {
 673                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 674                                 }
 675                         }
 676                 } else {
 677                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 678                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 679                         if ($mode == "") {
 680                                 return $pre . $text;
 681                         } else if ($mode == "em") {
 682                                 return $pre . $text_em;
 683                         } else if ($mode == "strong") {
 684                                 return $pre . $text_strong;
 685                         } else if ($mode == "strongem") {
 686                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 687                         } else {
 688                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 689                         }
 690                 }
 691         }
 692
 693         # Note: we have to do external links before the internal ones,
 694         # and otherwise take great care in the order of things here, so
 695         # that we don't end up interpreting some URLs twice.
 696
 697         /* private */ function replaceExternalLinks( $text )
 698         {
 699                 $fname = "Parser::replaceExternalLinks";
 700                 wfProfileIn( $fname );
 701                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 702                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 703                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 704                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 705                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 706                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 707                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 708                 wfProfileOut( $fname );
 709                 return $text;
 710         }
 711
 712         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 713         {
 714                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 715                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 716
 717                 # this is  the list of separators that should be ignored if they
 718                 # are the last character of an URL but that should be included
 719                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 720                 # in this case, the last comma should not become part of the URL,
 721                 # but in "www.foo.com/123,2342,32.htm" it should.
 722                 $sep = ",;\.:";
 723                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 724                 $images = "gif|png|jpg|jpeg";
 725
 726                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 727                 # they are interpreted as part of the string (used to tell PHP
 728                 # that the content of the string should be inserted there).
 729                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 730                   "((?i){$images})([^{$uc}]|$)/";
 731
 732                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 733                 $sk =& $this->mOptions->getSkin();
 734
 735                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 736                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 737                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 738                 }
 739                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 740                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 741                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 742                   "</a>\\5", $s );
 743                 $s = str_replace( $unique, $protocol, $s );
 744
 745                 $a = explode( "[{$protocol}:", " " . $s );
 746                 $s = array_shift( $a );
 747                 $s = substr( $s, 1 );
 748
 749                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 750                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 751
 752                 foreach ( $a as $line ) {
 753                         if ( preg_match( $e1, $line, $m ) ) {
 754                                 $link = "{$protocol}:{$m[1]}";
 755                                 $trail = $m[2];
 756                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 757                                 else { $text = wfEscapeHTML( $link ); }
 758                         } else if ( preg_match( $e2, $line, $m ) ) {
 759                                 $link = "{$protocol}:{$m[1]}";
 760                                 $text = $m[2];
 761                                 $trail = $m[3];
 762                         } else {
 763                                 $s .= "[{$protocol}:" . $line;
 764                                 continue;
 765                         }
 766                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 767                                 $paren = "";
 768                         } else {
 769                                 # Expand the URL for printable version
 770                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 771                         }
 772                         $la = $sk->getExternalLinkAttributes( $link, $text );
 773                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 774
 775                 }
 776                 return $s;
 777         }
 778
 779
 780         /* private */ function replaceInternalLinks( $s )
 781         {
 782                 global $wgLang, $wgLinkCache;
 783                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 784                 static $fname = "Parser::replaceInternalLink" ;
 785                 wfProfileIn( $fname );
 786
 787                 wfProfileIn( "$fname-setup" );
 788                 static $tc = FALSE;
 789                 # the % is needed to support urlencoded titles as well
 790                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 791                 $sk =& $this->mOptions->getSkin();
 792
 793                 $a = explode( "[[", " " . $s );
 794                 $s = array_shift( $a );
 795                 $s = substr( $s, 1 );
 796
 797                 # Match a link having the form [[namespace:link|alternate]]trail
 798                 static $e1 = FALSE;
 799                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 800                 # Match the end of a line for a word that's not followed by whitespace,
 801                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 802                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 803                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 804                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 805
 806
 807                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 808                 static $image = FALSE;
 809                 static $special = FALSE;
 810                 static $media = FALSE;
 811                 static $category = FALSE;
 812                 if ( !$image ) { $image = Namespace::getImage(); }
 813                 if ( !$special ) { $special = Namespace::getSpecial(); }
 814                 if ( !$media ) { $media = Namespace::getMedia(); }
 815                 if ( !$category ) { $category = Namespace::getCategory(); }
 816
 817                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 818
 819                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 820                         $new_prefix = $m[2];
 821                         $s = $m[1];
 822                 } else {
 823                         $new_prefix="";
 824                 }
 825
 826                 wfProfileOut( "$fname-setup" );
 827
 828                 foreach ( $a as $line ) {
 829                         $prefix = $new_prefix;
 830
 831                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 832                                 $text = $m[2];
 833                                 # fix up urlencoded title texts
 834                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 835                                 $trail = $m[3];
 836                         } else { # Invalid form; output directly
 837                                 $s .= $prefix . "[[" . $line ;
 838                                 wfProfileOut( $fname );
 839                                 continue;
 840                         }
 841
 842                         /* Valid link forms:
 843                         Foobar -- normal
 844                         :Foobar -- override special treatment of prefix (images, language links)
 845                         /Foobar -- convert to CurrentPage/Foobar
 846                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 847                         */
 848                         $c = substr($m[1],0,1);
 849                         $noforce = ($c != ":");
 850                         if( $c == "/" ) { # subpage
 851                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 852                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 853                                         $noslash=$m[1];
 854                                 } else {
 855                                         $noslash=substr($m[1],1);
 856                                 }
 857                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 858                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 859                                         if( "" == $text ) {
 860                                                 $text= $m[1];
 861                                         } # this might be changed for ugliness reasons
 862                                 } else {
 863                                         $link = $noslash; # no subpage allowed, use standard link
 864                                 }
 865                         } elseif( $noforce ) { # no subpage
 866                                 $link = $m[1];
 867                         } else {
 868                                 $link = substr( $m[1], 1 );
 869                         }
 870                         $wasblank = ( "" == $text );
 871                         if( $wasblank )
 872                         $text = $link;
 873
 874                         $nt = Title::newFromText( $link );
 875                         if( !$nt ) {
 876                                 $s .= $prefix . "[[" . $line;
 877                                 wfProfileOut( $fname );
 878                                 continue;
 879                         }
 880                         $ns = $nt->getNamespace();
 881                         $iw = $nt->getInterWiki();
 882                         if( $noforce ) {
 883                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 884                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 885                                         $tmp = $prefix . $trail ;
 886                                         wfProfileOut( $fname );
 887                                         $s .= (trim($tmp) == '')? '': $tmp;
 888                                         continue;
 889                                 }
 890                                 if ( $ns == $image ) {
 891                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 892                                         $wgLinkCache->addImageLinkObj( $nt );
 893                                         wfProfileOut( $fname );
 894                                         continue;
 895                                 }
 896                                 if ( $ns == $category ) {
 897                                         $t = $nt->getText() ;
 898                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 899
 900                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 901                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 902                                         $wgLinkCache->resume();
 903
 904                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 905                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 906                                         $this->mOutput->mCategoryLinks[] = $t ;
 907                                         $s .= $prefix . $trail ;
 908                                         wfProfileOut( $fname );
 909                                         continue;
 910                                 }
 911                         }
 912                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 913                         ( strpos( $link, "#" ) == FALSE ) ) {
 914                                 # Self-links are handled specially; generally de-link and change to bold.
 915                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 916                                 wfProfileOut( $fname );
 917                                 continue;
 918                         }
 919
 920                         if( $ns == $media ) {
 921                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 922                                 $wgLinkCache->addImageLinkObj( $nt );
 923                                 wfProfileOut( $fname );
 924                                 continue;
 925                         } elseif( $ns == $special ) {
 926                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 927                                 wfProfileOut( $fname );
 928                                 continue;
 929                         }
 930                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 931                 }
 932                 wfProfileOut( $fname );
 933                 return $s;
 934         }
 935
 936         # Some functions here used by doBlockLevels()
 937         #
 938         /* private */ function closeParagraph()
 939         {
 940                 $result = "";
 941                 if ( '' != $this->mLastSection ) {
 942                         $result = "</" . $this->mLastSection  . ">\n";
 943                 }
 944                 $this->mInPre = false;
 945                 $this->mLastSection = "";
 946                 return $result;
 947         }
 948         # getCommon() returns the length of the longest common substring
 949         # of both arguments, starting at the beginning of both.
 950         #
 951         /* private */ function getCommon( $st1, $st2 )
 952         {
 953                 $fl = strlen( $st1 );
 954                 $shorter = strlen( $st2 );
 955                 if ( $fl < $shorter ) { $shorter = $fl; }
 956
 957                 for ( $i = 0; $i < $shorter; ++$i ) {
 958                         if ( $st1{$i} != $st2{$i} ) { break; }
 959                 }
 960                 return $i;
 961         }
 962         # These next three functions open, continue, and close the list
 963         # element appropriate to the prefix character passed into them.
 964         #
 965         /* private */ function openList( $char )
 966     {
 967                 $result = $this->closeParagraph();
 968
 969                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 970                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 971                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 972                 else if ( ";" == $char ) {
 973                         $result .= "<dl><dt>";
 974                         $this->mDTopen = true;
 975                 }
 976                 else { $result = "<!-- ERR 1 -->"; }
 977
 978                 return $result;
 979         }
 980
 981         /* private */ function nextItem( $char )
 982         {
 983                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 984                 else if ( ":" == $char || ";" == $char ) {
 985                         $close = "</dd>";
 986                         if ( $this->mDTopen ) { $close = "</dt>"; }
 987                         if ( ";" == $char ) {
 988                                 $this->mDTopen = true;
 989                                 return $close . "<dt>";
 990                         } else {
 991                                 $this->mDTopen = false;
 992                                 return $close . "<dd>";
 993                         }
 994                 }
 995                 return "<!-- ERR 2 -->";
 996         }
 997
 998         /* private */function closeList( $char )
 999         {
1000                 if ( "*" == $char ) { $text = "</li></ul>"; }
1001                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1002                 else if ( ":" == $char ) {
1003                         if ( $this->mDTopen ) {
1004                                 $this->mDTopen = false;
1005                                 $text = "</dt></dl>";
1006                         } else {
1007                                 $text = "</dd></dl>";
1008                         }
1009                 }
1010                 else {  return "<!-- ERR 3 -->"; }
1011                 return $text."\n";
1012         }
1013
1014         /* private */ function doBlockLevels( $text, $linestart ) {
1015                 $fname = "Parser::doBlockLevels";
1016                 wfProfileIn( $fname );
1017
1018                 # Parsing through the text line by line.  The main thing
1019                 # happening here is handling of block-level elements p, pre,
1020                 # and making lists from lines starting with * # : etc.
1021                 #
1022
1023                 // Strip nowiki's again.
1024                 $text = $this->strip($text,$dblStripState);
1025                 $textLines = explode( "\n", $text );
1026
1027                 $lastPrefix = $output = $lastLine = '';
1028                 $this->mDTopen = $inBlockElem = false;
1029                 $prefixLength = 0;
1030                 $paragraphStack = false;
1031
1032                 if ( !$linestart ) {
1033                         $output .= array_shift( $textLines );
1034                 }
1035                 foreach ( $textLines as $oLine ) {
1036                         $lastPrefixLength = strlen( $lastPrefix );
1037                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1038                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1039                         $nowikiOpenMatch = preg_match("/<span class=\"nowiki\"/", $oLine );
1040                         $nowikiCloseMatch = preg_match("/<\\/span  >/", $oLine );
1041                         if($nowikiOpenMatch) $nowikiFullMatch = preg_match("/^(.*)<span class=\"nowiki\"/", $oLine, $nowikiOpenMatches );
1042                         if (!$this->mInPre) {
1043                                 $this->mInPre = !empty($preOpenMatch);
1044                         }
1045                         if (!$this->mInNowiki) {
1046                                 $this->mInNowiki = !empty($nowikiOpenMatch);
1047                         }
1048                         if (
1049                                 !$this->mInPre && (!$this->mInNowiki ||
1050                                 ($nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0) )
1051                         )
1052                         {
1053                                 # Multiple prefixes may abut each other for nested lists.
1054                                 $prefixLength = strspn( $oLine, "*#:;" );
1055                                 $pref = substr( $oLine, 0, $prefixLength );
1056
1057                                 # eh?
1058                                 $pref2 = str_replace( ";", ":", $pref );
1059                                 $t = substr( $oLine, $prefixLength );
1060                         } else {
1061                                 # Don't interpret any other prefixes in preformatted text
1062                                 $prefixLength = 0;
1063                                 $pref = $pref2 = '';
1064                                 $t = $oLine;
1065                         }
1066
1067                         # List generation
1068                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1069                                 # Same as the last item, so no need to deal with nesting or opening stuff
1070                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1071                                 $paragraphStack = false;
1072
1073                                 if ( ";" == substr( $pref, -1 ) ) {
1074                                         # The one nasty exception: definition lists work like this:
1075                                         # ; title : definition text
1076                                         # So we check for : in the remainder text to split up the
1077                                         # title and definition, without b0rking links.
1078                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1079                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1080                                                 $term = $match[1];
1081                                                 $output .= $term . $this->nextItem( ":" );
1082                                                 $t = $match[2];
1083                                         }
1084                                 }
1085                         } elseif( $prefixLength || $lastPrefixLength ) {
1086                                 # Either open or close a level...
1087                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1088                                 $paragraphStack = false;
1089
1090                                 while( $commonPrefixLength < $lastPrefixLength ) {
1091                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1092                                         --$lastPrefixLength;
1093                                 }
1094                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1095                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1096                                 }
1097                                 while ( $prefixLength > $commonPrefixLength ) {
1098                                         $char = substr( $pref, $commonPrefixLength, 1 );
1099                                         $output .= $this->openList( $char );
1100
1101                                         if ( ";" == $char ) {
1102                                                 # FIXME: This is dupe of code above
1103                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1104                                                         $term = $match[1];
1105                                                         $output .= $term . $this->nextItem( ":" );
1106                                                         $t = $match[2];
1107                                                 }
1108                                         }
1109                                         ++$commonPrefixLength;
1110                                 }
1111                                 $lastPrefix = $pref2;
1112                         }
1113                         if( 0 == $prefixLength ) {
1114                                 # No prefix (not in list)--go to paragraph mode
1115                                 $uniq_prefix = UNIQ_PREFIX;
1116                                 // XXX: use a stack for nestable elements like span, table and div
1117                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1118                                 $closematch = preg_match(
1119                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1120                                         "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1121                                 if ( $openmatch or $closematch ) {
1122                                         $paragraphStack = false;
1123                                         $output .= $this->closeParagraph();
1124                                         if($preOpenMatch and !$preCloseMatch) {
1125                                                 $this->mInPre = true;
1126                                         }
1127                                         if ( $closematch  ) {
1128                                                 $inBlockElem = false;
1129                                         } else {
1130                                                 $inBlockElem = true;
1131                                         }
1132                                 } else if (
1133                                         !$inBlockElem && !$this->mInPre &&
1134                                         (!$this->mInNowiki || ($nowikiOpenMatch && trim($nowikiOpenMatches[1]) == ''  ) ) )
1135                                         {
1136                                         if ( " " == $t{0} and trim($t) != '' and (!$this->mInNowiki || $nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0 ) ) {
1137                                                 // pre
1138                                                 if ($this->mLastSection != 'pre') {
1139                                                         $paragraphStack = false;
1140                                                         $output .= $this->closeParagraph().'<pre>';
1141                                                         $this->mLastSection = 'pre';
1142                                                 }
1143                                         } else {
1144                                                 // paragraph
1145                                                 if ( '' == trim($t) ) {
1146                                                         if ( $paragraphStack ) {
1147                                                                 $output .= $paragraphStack.'<br />';
1148                                                                 $paragraphStack = false;
1149                                                                 $this->mLastSection = 'p';
1150                                                         } else {
1151                                                                 if ($this->mLastSection != 'p' ) {
1152                                                                         $output .= $this->closeParagraph();
1153                                                                         $this->mLastSection = '';
1154                                                                         $paragraphStack = "<p>";
1155                                                                 } else {
1156                                                                         $paragraphStack = '</p><p>';
1157                                                                 }
1158                                                         }
1159                                                 } else {
1160                                                         if ( $paragraphStack ) {
1161                                                                 $output .= $paragraphStack;
1162                                                                 $paragraphStack = false;
1163                                                                 $this->mLastSection = 'p';
1164                                                         } else if ($this->mLastSection != 'p') {
1165                                                                 $output .= $this->closeParagraph().'<p>';
1166                                                                 $this->mLastSection = 'p';
1167                                                         }
1168                                                 }
1169                                         }
1170                                 }
1171                         }
1172                         if($nowikiCloseMatch) $this->mInNowiki = false;
1173                         if ($paragraphStack === false) {
1174                                 $output .= $t."\n";
1175                         }
1176                 }
1177                 while ( $prefixLength ) {
1178                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1179                         --$prefixLength;
1180                 }
1181                 if ( "" != $this->mLastSection ) {
1182                         $output .= "</" . $this->mLastSection . ">";
1183                         $this->mLastSection = "";
1184                 }
1185                 $output = $this->unstrip( $output, $dblStripState );
1186
1187                 wfProfileOut( $fname );
1188                 return $output;
1189         }
1190
1191         function getVariableValue( $index ) {
1192                 global $wgLang, $wgSitename, $wgServer;
1193
1194                 switch ( $index ) {
1195                         case MAG_CURRENTMONTH:
1196                                 return date( "m" );
1197                         case MAG_CURRENTMONTHNAME:
1198                                 return $wgLang->getMonthName( date("n") );
1199                         case MAG_CURRENTMONTHNAMEGEN:
1200                                 return $wgLang->getMonthNameGen( date("n") );
1201                         case MAG_CURRENTDAY:
1202                                 return date("j");
1203                         case MAG_PAGENAME:
1204                                 return $this->mTitle->getText();
1205                         case MAG_NAMESPACE:
1206                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1207                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1208                         case MAG_CURRENTDAYNAME:
1209                                 return $wgLang->getWeekdayName( date("w")+1 );
1210                         case MAG_CURRENTYEAR:
1211                                 return date( "Y" );
1212                         case MAG_CURRENTTIME:
1213                                 return $wgLang->time( wfTimestampNow(), false );
1214                         case MAG_NUMBEROFARTICLES:
1215                                 return wfNumberOfArticles();
1216                         case MAG_SITENAME:
1217                                 return $wgSitename;
1218                         case MAG_SERVER:
1219                                 return $wgServer;
1220                         default:
1221                                 return NULL;
1222                 }
1223         }
1224
1225         function initialiseVariables()
1226         {
1227                 global $wgVariableIDs;
1228                 $this->mVariables = array();
1229                 foreach ( $wgVariableIDs as $id ) {
1230                         $mw =& MagicWord::get( $id );
1231                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1232                 }
1233         }
1234
1235         /* private */ function replaceVariables( $text, $args = array() )
1236         {
1237                 global $wgLang, $wgScript, $wgArticlePath;
1238
1239                 $fname = "Parser::replaceVariables";
1240                 wfProfileIn( $fname );
1241
1242                 $bail = false;
1243                 if ( !$this->mVariables ) {
1244                         $this->initialiseVariables();
1245                 }
1246                 $titleChars = Title::legalChars();
1247                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1248
1249                 # This function is called recursively. To keep track of arguments we need a stack:
1250                 array_push( $this->mArgStack, $args );
1251
1252                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1253                 $GLOBALS['wgCurParser'] =& $this;
1254
1255
1256                 if ( $this->mOutputType == OT_HTML ) {
1257                         # Variable substitution
1258                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1259
1260                         # Argument substitution
1261                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1262                 }
1263                 # Template substitution
1264                 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1265                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1266
1267                 array_pop( $this->mArgStack );
1268
1269                 wfProfileOut( $fname );
1270                 return $text;
1271         }
1272
1273         function variableSubstitution( $matches )
1274         {
1275                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1276                         $text = $this->mVariables[$matches[1]];
1277                         $this->mOutput->mContainsOldMagic = true;
1278                 } else {
1279                         $text = $matches[0];
1280                 }
1281                 return $text;
1282         }
1283
1284         function braceSubstitution( $matches )
1285         {
1286                 global $wgLinkCache, $wgLang;
1287                 $fname = "Parser::braceSubstitution";
1288                 $found = false;
1289                 $nowiki = false;
1290                 $noparse = false;
1291
1292                 $title = NULL;
1293
1294                 # $newline is an optional newline character before the braces
1295                 # $part1 is the bit before the first |, and must contain only title characters
1296                 # $args is a list of arguments, starting from index 0, not including $part1
1297
1298                 $newline = $matches[1];
1299                 $part1 = $matches[2];
1300                 # If the third subpattern matched anything, it will start with |
1301                 if ( $matches[3] !== "" ) {
1302                         $args = explode( "|", substr( $matches[3], 1 ) );
1303                 } else {
1304                         $args = array();
1305                 }
1306                 $argc = count( $args );
1307
1308                 # {{{}}}
1309                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1310                         $text = $matches[0];
1311                         $found = true;
1312                         $noparse = true;
1313                 }
1314
1315                 # SUBST
1316                 if ( !$found ) {
1317                         $mwSubst =& MagicWord::get( MAG_SUBST );
1318                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1319                                 if ( $this->mOutputType != OT_WIKI ) {
1320                                         # Invalid SUBST not replaced at PST time
1321                                         # Return without further processing
1322                                         $text = $matches[0];
1323                                         $found = true;
1324                                         $noparse= true;
1325                                 }
1326                         } elseif ( $this->mOutputType == OT_WIKI ) {
1327                                 # SUBST not found in PST pass, do nothing
1328                                 $text = $matches[0];
1329                                 $found = true;
1330                         }
1331                 }
1332
1333                 # MSG, MSGNW and INT
1334                 if ( !$found ) {
1335                         # Check for MSGNW:
1336                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1337                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1338                                 $nowiki = true;
1339                         } else {
1340                                 # Remove obsolete MSG:
1341                                 $mwMsg =& MagicWord::get( MAG_MSG );
1342                                 $mwMsg->matchStartAndRemove( $part1 );
1343                         }
1344
1345                         # Check if it is an internal message
1346                         $mwInt =& MagicWord::get( MAG_INT );
1347                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1348                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1349                                         $text = wfMsgReal( $part1, $args, true );
1350                                         $found = true;
1351                                 }
1352                         }
1353                 }
1354
1355                 # NS
1356                 if ( !$found ) {
1357                         # Check for NS: (namespace expansion)
1358                         $mwNs = MagicWord::get( MAG_NS );
1359                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1360                                 if ( intval( $part1 ) ) {
1361                                         $text = $wgLang->getNsText( intval( $part1 ) );
1362                                         $found = true;
1363                                 } else {
1364                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1365                                         if ( !is_null( $index ) ) {
1366                                                 $text = $wgLang->getNsText( $index );
1367                                                 $found = true;
1368                                         }
1369                                 }
1370                         }
1371                 }
1372
1373                 # LOCALURL and LOCALURLE
1374                 if ( !$found ) {
1375                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1376                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1377
1378                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1379                                 $func = 'getLocalURL';
1380                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1381                                 $func = 'escapeLocalURL';
1382                         } else {
1383                                 $func = '';
1384                         }
1385
1386                         if ( $func !== '' ) {
1387                                 $title = Title::newFromText( $part1 );
1388                                 if ( !is_null( $title ) ) {
1389                                         if ( $argc > 0 ) {
1390                                                 $text = $title->$func( $args[0] );
1391                                         } else {
1392                                                 $text = $title->$func();
1393                                         }
1394                                         $found = true;
1395                                 }
1396                         }
1397                 }
1398
1399                 # Internal variables
1400                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1401                         $text = $this->mVariables[$part1];
1402                         $found = true;
1403                         $this->mOutput->mContainsOldMagic = true;
1404                 }
1405 /*
1406                 # Arguments input from the caller
1407                 $inputArgs = end( $this->mArgStack );
1408                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1409                         $text = $inputArgs[$part1];
1410                         $found = true;
1411                 }
1412 */
1413                 # Load from database
1414                 if ( !$found ) {
1415                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1416                         if ( !is_null( $title ) && !$title->isExternal() ) {
1417                                 # Check for excessive inclusion
1418                                 $dbk = $title->getPrefixedDBkey();
1419                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1420                                         $article = new Article( $title );
1421                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1422                                         if ( $articleContent !== false ) {
1423                                                 $found = true;
1424                                                 $text = $articleContent;
1425
1426                                         }
1427                                 }
1428
1429                                 # If the title is valid but undisplayable, make a link to it
1430                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1431                                         $text = "[[" . $title->getPrefixedText() . "]]";
1432                                         $found = true;
1433                                 }
1434                         }
1435                 }
1436
1437                 # Recursive parsing, escaping and link table handling
1438                 # Only for HTML output
1439                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1440                         $text = wfEscapeWikiText( $text );
1441                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1442                         # Clean up argument array
1443                         $assocArgs = array();
1444                         $index = 1;
1445                         foreach( $args as $arg ) {
1446                                 $eqpos = strpos( $arg, "=" );
1447                                 if ( $eqpos === false ) {
1448                                         $assocArgs[$index++] = $arg;
1449                                 } else {
1450                                         $name = trim( substr( $arg, 0, $eqpos ) );
1451                                         $value = trim( substr( $arg, $eqpos+1 ) );
1452                                         if ( $value === false ) {
1453                                                 $value = "";
1454                                         }
1455                                         if ( $name !== false ) {
1456                                                 $assocArgs[$name] = $value;
1457                                         }
1458                                 }
1459                         }
1460
1461                         # Do not enter included links in link table
1462                         if ( !is_null( $title ) ) {
1463                                 $wgLinkCache->suspend();
1464                         }
1465
1466                         # Run full parser on the included text
1467                         $text = $this->stripParse( $text, $newline, $assocArgs );
1468
1469                         # Resume the link cache and register the inclusion as a link
1470                         if ( !is_null( $title ) ) {
1471                                 $wgLinkCache->resume();
1472                                 $wgLinkCache->addLinkObj( $title );
1473                         }
1474                 }
1475
1476                 if ( !$found ) {
1477                         return $matches[0];
1478                 } else {
1479                         return $text;
1480                 }
1481         }
1482
1483         # Triple brace replacement -- used for template arguments
1484         function argSubstitution( $matches )
1485         {
1486                 $newline = $matches[1];
1487                 $arg = trim( $matches[2] );
1488                 $text = $matches[0];
1489                 $inputArgs = end( $this->mArgStack );
1490
1491                 if ( array_key_exists( $arg, $inputArgs ) ) {
1492                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1493                 }
1494
1495                 return $text;
1496         }
1497
1498         # Returns true if the function is allowed to include this entity
1499         function incrementIncludeCount( $dbk )
1500         {
1501                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1502                         $this->mIncludeCount[$dbk] = 0;
1503                 }
1504                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1505                         return true;
1506                 } else {
1507                         return false;
1508                 }
1509         }
1510
1511
1512         # Cleans up HTML, removes dangerous tags and attributes
1513         /* private */ function removeHTMLtags( $text )
1514         {
1515                 global $wgUseTidy, $wgUserHtml;
1516                 $fname = "Parser::removeHTMLtags";
1517                 wfProfileIn( $fname );
1518
1519                 if( $wgUserHtml ) {
1520                         $htmlpairs = array( # Tags that must be closed
1521                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1522                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1523                                 "strike", "strong", "tt", "var", "div", "center",
1524                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1525                                 "ruby", "rt" , "rb" , "rp", "p"
1526                         );
1527                         $htmlsingle = array(
1528                                 "br", "hr", "li", "dt", "dd"
1529                         );
1530                         $htmlnest = array( # Tags that can be nested--??
1531                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1532                                 "dl", "font", "big", "small", "sub", "sup"
1533                         );
1534                         $tabletags = array( # Can only appear inside table
1535                                 "td", "th", "tr"
1536                         );
1537                 } else {
1538                         $htmlpairs = array();
1539                         $htmlsingle = array();
1540                         $htmlnest = array();
1541                         $tabletags = array();
1542                 }
1543
1544                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1545                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1546
1547                 $htmlattrs = $this->getHTMLattrs () ;
1548
1549                 # Remove HTML comments
1550                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1551
1552                 $bits = explode( "<", $text );
1553                 $text = array_shift( $bits );
1554                 if(!$wgUseTidy) {
1555                         $tagstack = array(); $tablestack = array();
1556                         foreach ( $bits as $x ) {
1557                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1558                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1559                                 $x, $regs );
1560                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1561                                 error_reporting( $prev );
1562
1563                                 $badtag = 0 ;
1564                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1565                                         # Check our stack
1566                                         if ( $slash ) {
1567                                                 # Closing a tag...
1568                                                 if ( ! in_array( $t, $htmlsingle ) &&
1569                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1570                                                         @array_push( $tagstack, $ot );
1571                                                         $badtag = 1;
1572                                                 } else {
1573                                                         if ( $t == "table" ) {
1574                                                                 $tagstack = array_pop( $tablestack );
1575                                                         }
1576                                                         $newparams = "";
1577                                                 }
1578                                         } else {
1579                                                 # Keep track for later
1580                                                 if ( in_array( $t, $tabletags ) &&
1581                                                 ! in_array( "table", $tagstack ) ) {
1582                                                         $badtag = 1;
1583                                                 } else if ( in_array( $t, $tagstack ) &&
1584                                                 ! in_array ( $t , $htmlnest ) ) {
1585                                                         $badtag = 1 ;
1586                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1587                                                         if ( $t == "table" ) {
1588                                                                 array_push( $tablestack, $tagstack );
1589                                                                 $tagstack = array();
1590                                                         }
1591                                                         array_push( $tagstack, $t );
1592                                                 }
1593                                                 # Strip non-approved attributes from the tag
1594                                                 $newparams = $this->fixTagAttributes($params);
1595
1596                                         }
1597                                         if ( ! $badtag ) {
1598                                                 $rest = str_replace( ">", "&gt;", $rest );
1599                                                 $text .= "<$slash$t $newparams$brace$rest";
1600                                                 continue;
1601                                         }
1602                                 }
1603                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1604                         }
1605                         # Close off any remaining tags
1606                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1607                                 $text .= "</$t>\n";
1608                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1609                         }
1610                 } else {
1611                         # this might be possible using tidy itself
1612                         foreach ( $bits as $x ) {
1613                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1614                                 $x, $regs );
1615                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1616                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1617                                         $newparams = $this->fixTagAttributes($params);
1618                                         $rest = str_replace( ">", "&gt;", $rest );
1619                                         $text .= "<$slash$t $newparams$brace$rest";
1620                                 } else {
1621                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1622                                 }
1623                         }
1624                 }
1625                 wfProfileOut( $fname );
1626                 return $text;
1627         }
1628
1629
1630 /*
1631  *
1632  * This function accomplishes several tasks:
1633  * 1) Auto-number headings if that option is enabled
1634  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1635  * 3) Add a Table of contents on the top for users who have enabled the option
1636  * 4) Auto-anchor headings
1637  *
1638  * It loops through all headlines, collects the necessary data, then splits up the
1639  * string and re-inserts the newly formatted headlines.
1640  *
1641  */
1642
1643         /* private */ function formatHeadings( $text, $isMain=true )
1644         {
1645                 global $wgInputEncoding;
1646
1647                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1648                 $doShowToc = $this->mOptions->getShowToc();
1649                 if( !$this->mTitle->userCanEdit() ) {
1650                         $showEditLink = 0;
1651                         $rightClickHack = 0;
1652                 } else {
1653                         $showEditLink = $this->mOptions->getEditSection();
1654                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1655                 }
1656
1657                 # Inhibit editsection links if requested in the page
1658                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1659                 if( $esw->matchAndRemove( $text ) ) {
1660                         $showEditLink = 0;
1661                 }
1662                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1663                 # do not add TOC
1664                 $mw =& MagicWord::get( MAG_NOTOC );
1665                 if( $mw->matchAndRemove( $text ) ) {
1666                         $doShowToc = 0;
1667                 }
1668
1669                 # never add the TOC to the Main Page. This is an entry page that should not
1670                 # be more than 1-2 screens large anyway
1671                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1672                         $doShowToc = 0;
1673                 }
1674
1675                 # Get all headlines for numbering them and adding funky stuff like [edit]
1676                 # links - this is for later, but we need the number of headlines right now
1677                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1678
1679                 # if there are fewer than 4 headlines in the article, do not show TOC
1680                 if( $numMatches < 4 ) {
1681                         $doShowToc = 0;
1682                 }
1683
1684                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1685                 # override above conditions and always show TOC
1686                 $mw =& MagicWord::get( MAG_FORCETOC );
1687                 if ($mw->matchAndRemove( $text ) ) {
1688                         $doShowToc = 1;
1689                 }
1690
1691
1692                 # We need this to perform operations on the HTML
1693                 $sk =& $this->mOptions->getSkin();
1694
1695                 # headline counter
1696                 $headlineCount = 0;
1697
1698                 # Ugh .. the TOC should have neat indentation levels which can be
1699                 # passed to the skin functions. These are determined here
1700                 $toclevel = 0;
1701                 $toc = "";
1702                 $full = "";
1703                 $head = array();
1704                 $sublevelCount = array();
1705                 $level = 0;
1706                 $prevlevel = 0;
1707                 foreach( $matches[3] as $headline ) {
1708                         $numbering = "";
1709                         if( $level ) {
1710                                 $prevlevel = $level;
1711                         }
1712                         $level = $matches[1][$headlineCount];
1713                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1714                                 # reset when we enter a new level
1715                                 $sublevelCount[$level] = 0;
1716                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1717                                 $toclevel += $level - $prevlevel;
1718                         }
1719                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1720                                 # reset when we step back a level
1721                                 $sublevelCount[$level+1]=0;
1722                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1723                                 $toclevel -= $prevlevel - $level;
1724                         }
1725                         # count number of headlines for each level
1726                         @$sublevelCount[$level]++;
1727                         if( $doNumberHeadings || $doShowToc ) {
1728                                 $dot = 0;
1729                                 for( $i = 1; $i <= $level; $i++ ) {
1730                                         if( !empty( $sublevelCount[$i] ) ) {
1731                                                 if( $dot ) {
1732                                                         $numbering .= ".";
1733                                                 }
1734                                                 $numbering .= $sublevelCount[$i];
1735                                                 $dot = 1;
1736                                         }
1737                                 }
1738                         }
1739
1740                         # The canonized header is a version of the header text safe to use for links
1741                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1742                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1743
1744                         # strip out HTML
1745                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1746                         $tocline = trim( $canonized_headline );
1747                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1748                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1749                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1750                         $refer[$headlineCount] = $canonized_headline;
1751
1752                         # count how many in assoc. array so we can track dupes in anchors
1753                         @$refers[$canonized_headline]++;
1754                         $refcount[$headlineCount]=$refers[$canonized_headline];
1755
1756                         # Prepend the number to the heading text
1757
1758                         if( $doNumberHeadings || $doShowToc ) {
1759                                 $tocline = $numbering . " " . $tocline;
1760
1761                                 # Don't number the heading if it is the only one (looks silly)
1762                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1763                                         # the two are different if the line contains a link
1764                                         $headline=$numbering . " " . $headline;
1765                                 }
1766                         }
1767
1768                         # Create the anchor for linking from the TOC to the section
1769                         $anchor = $canonized_headline;
1770                         if($refcount[$headlineCount] > 1 ) {
1771                                 $anchor .= "_" . $refcount[$headlineCount];
1772                         }
1773                         if( $doShowToc ) {
1774                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1775                         }
1776                         if( $showEditLink ) {
1777                                 if ( empty( $head[$headlineCount] ) ) {
1778                                         $head[$headlineCount] = "";
1779                                 }
1780                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1781                         }
1782
1783                         # Add the edit section span
1784                         if( $rightClickHack ) {
1785                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1786                         }
1787
1788                         # give headline the correct <h#> tag
1789                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1790
1791                         $headlineCount++;
1792                 }
1793
1794                 if( $doShowToc ) {
1795                         $toclines = $headlineCount;
1796                         $toc .= $sk->tocUnindent( $toclevel );
1797                         $toc = $sk->tocTable( $toc );
1798                 }
1799
1800                 # split up and insert constructed headlines
1801
1802                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1803                 $i = 0;
1804
1805                 foreach( $blocks as $block ) {
1806                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1807                             # This is the [edit] link that appears for the top block of text when
1808                                 # section editing is enabled
1809
1810                                 # Disabled because it broke block formatting
1811                                 # For example, a bullet point in the top line
1812                                 # $full .= $sk->editSectionLink(0);
1813                         }
1814                         $full .= $block;
1815                         if( $doShowToc && !$i && $isMain) {
1816                         # Top anchor now in skin
1817                                 $full = $full.$toc;
1818                         }
1819
1820                         if( !empty( $head[$i] ) ) {
1821                                 $full .= $head[$i];
1822                         }
1823                         $i++;
1824                 }
1825
1826                 return $full;
1827         }
1828
1829         /* private */ function magicISBN( $text )
1830         {
1831                 global $wgLang;
1832
1833                 $a = split( "ISBN ", " $text" );
1834                 if ( count ( $a ) < 2 ) return $text;
1835                 $text = substr( array_shift( $a ), 1);
1836                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1837
1838                 foreach ( $a as $x ) {
1839                         $isbn = $blank = "" ;
1840                         while ( " " == $x{0} ) {
1841                                 $blank .= " ";
1842                                 $x = substr( $x, 1 );
1843                         }
1844                         while ( strstr( $valid, $x{0} ) != false ) {
1845                                 $isbn .= $x{0};
1846                                 $x = substr( $x, 1 );
1847                         }
1848                         $num = str_replace( "-", "", $isbn );
1849                         $num = str_replace( " ", "", $num );
1850
1851                         if ( "" == $num ) {
1852                                 $text .= "ISBN $blank$x";
1853                         } else {
1854                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1855                                 $text .= "<a href=\"" .
1856                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1857                                         "\" class=\"internal\">ISBN $isbn</a>";
1858                                 $text .= $x;
1859                         }
1860                 }
1861                 return $text;
1862         }
1863         /* private */ function magicRFC( $text )
1864         {
1865                 global $wgLang;
1866
1867                 $a = split( "RFC ", " $text" );
1868                 if ( count ( $a ) < 2 ) return $text;
1869                 $text = substr( array_shift( $a ), 1);
1870                 $valid = "0123456789";
1871
1872                 foreach ( $a as $x ) {
1873                         $rfc = $blank = "" ;
1874                         while ( " " == $x{0} ) {
1875                                 $blank .= " ";
1876                                 $x = substr( $x, 1 );
1877                         }
1878                         while ( strstr( $valid, $x{0} ) != false ) {
1879                                 $rfc .= $x{0};
1880                                 $x = substr( $x, 1 );
1881                         }
1882
1883                         if ( "" == $rfc ) {
1884                                 $text .= "RFC $blank$x";
1885                         } else {
1886                                 $url = wfmsg( "rfcurl" );
1887                                 $url = str_replace( "$1", $rfc, $url);
1888                                 $sk =& $this->mOptions->getSkin();
1889                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1890                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1891                         }
1892                 }
1893                 return $text;
1894         }
1895
1896         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1897         {
1898                 $this->mOptions = $options;
1899                 $this->mTitle =& $title;
1900                 $this->mOutputType = OT_WIKI;
1901
1902                 if ( $clearState ) {
1903                         $this->clearState();
1904                 }
1905
1906                 $stripState = false;
1907                 $pairs = array(
1908                         "\r\n" => "\n",
1909                         );
1910                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1911                 // now with regexes
1912                 /*
1913                 $pairs = array(
1914                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1915                         "/<br *?>/i" => "<br />",
1916                 );
1917                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1918                 */
1919                 $text = $this->strip( $text, $stripState, false );
1920                 $text = $this->pstPass2( $text, $user );
1921                 $text = $this->unstrip( $text, $stripState );
1922                 return $text;
1923         }
1924
1925         /* private */ function pstPass2( $text, &$user )
1926         {
1927                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1928
1929                 # Variable replacement
1930                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1931                 $text = $this->replaceVariables( $text );
1932
1933                 # Signatures
1934                 #
1935                 $n = $user->getName();
1936                 $k = $user->getOption( "nickname" );
1937                 if ( "" == $k ) { $k = $n; }
1938                 if(isset($wgLocaltimezone)) {
1939                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1940                 }
1941                 /* Note: this is an ugly timezone hack for the European wikis */
1942                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1943                   " (" . date( "T" ) . ")";
1944                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1945
1946                 $text = preg_replace( "/~~~~~/", $d, $text );
1947                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1948                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1949                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1950                   Namespace::getUser() ) . ":$n|$k]]", $text );
1951
1952                 # Context links: [[|name]] and [[name (context)|]]
1953                 #
1954                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1955                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1956                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1957                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1958
1959                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1960                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1961                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1962                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1963                                                                                                                 # [[ns:page (cont)|]]
1964                 $context = "";
1965                 $t = $this->mTitle->getText();
1966                 if ( preg_match( $conpat, $t, $m ) ) {
1967                         $context = $m[2];
1968                 }
1969                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1970                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1971                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1972
1973                 if ( "" == $context ) {
1974                         $text = preg_replace( $p2, "[[\\1]]", $text );
1975                 } else {
1976                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1977                 }
1978
1979                 /*
1980                 $mw =& MagicWord::get( MAG_SUBST );
1981                 $wgCurParser = $this->fork();
1982                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1983                 $this->merge( $wgCurParser );
1984                 */
1985
1986                 # Trim trailing whitespace
1987                 # MAG_END (__END__) tag allows for trailing
1988                 # whitespace to be deliberately included
1989                 $text = rtrim( $text );
1990                 $mw =& MagicWord::get( MAG_END );
1991                 $mw->matchAndRemove( $text );
1992
1993                 return $text;
1994         }
1995
1996         # Set up some variables which are usually set up in parse()
1997         # so that an external function can call some class members with confidence
1998         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1999         {
2000                 $this->mTitle =& $title;
2001                 $this->mOptions = $options;
2002                 $this->mOutputType = $outputType;
2003                 if ( $clearState ) {
2004                         $this->clearState();
2005                 }
2006         }
2007
2008         function transformMsg( $text, $options ) {
2009                 global $wgTitle;
2010                 static $executing = false;
2011
2012                 # Guard against infinite recursion
2013                 if ( $executing ) {
2014                         return $text;
2015                 }
2016                 $executing = true;
2017
2018                 $this->mTitle = $wgTitle;
2019                 $this->mOptions = $options;
2020                 $this->mOutputType = OT_MSG;
2021                 $this->clearState();
2022                 $text = $this->replaceVariables( $text );
2023
2024                 $executing = false;
2025                 return $text;
2026         }
2027 }
2028
2029 class ParserOutput
2030 {
2031         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2032         var $mCacheTime; # Used in ParserCache
2033
2034         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2035                 $containsOldMagic = false )
2036         {
2037                 $this->mText = $text;
2038                 $this->mLanguageLinks = $languageLinks;
2039                 $this->mCategoryLinks = $categoryLinks;
2040                 $this->mContainsOldMagic = $containsOldMagic;
2041                 $this->mCacheTime = "";
2042         }
2043
2044         function getText() { return $this->mText; }
2045         function getLanguageLinks() { return $this->mLanguageLinks; }
2046         function getCategoryLinks() { return $this->mCategoryLinks; }
2047         function getCacheTime() { return $this->mCacheTime; }
2048         function containsOldMagic() { return $this->mContainsOldMagic; }
2049         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2050         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2051         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2052         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2053         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2054
2055         function merge( $other ) {
2056                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2057                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2058                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2059         }
2060
2061 }
2062
2063 class ParserOptions
2064 {
2065         # All variables are private
2066         var $mUseTeX;                    # Use texvc to expand <math> tags
2067         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2068         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2069         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2070         var $mAllowExternalImages;       # Allow external images inline
2071         var $mSkin;                      # Reference to the preferred skin
2072         var $mDateFormat;                # Date format index
2073         var $mEditSection;               # Create "edit section" links
2074         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2075         var $mNumberHeadings;            # Automatically number headings
2076         var $mShowToc;                   # Show table of contents
2077
2078         function getUseTeX() { return $this->mUseTeX; }
2079         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2080         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2081         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2082         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2083         function getSkin() { return $this->mSkin; }
2084         function getDateFormat() { return $this->mDateFormat; }
2085         function getEditSection() { return $this->mEditSection; }
2086         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2087         function getNumberHeadings() { return $this->mNumberHeadings; }
2088         function getShowToc() { return $this->mShowToc; }
2089
2090         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2091         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2092         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2093         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2094         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2095         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2096         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2097         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2098         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2099         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2100         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2101
2102         /* static */ function newFromUser( &$user )
2103         {
2104                 $popts = new ParserOptions;
2105                 $popts->initialiseFromUser( $user );
2106                 return $popts;
2107         }
2108
2109         function initialiseFromUser( &$userInput )
2110         {
2111                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2112
2113                 if ( !$userInput ) {
2114                         $user = new User;
2115                         $user->setLoaded( true );
2116                 } else {
2117                         $user =& $userInput;
2118                 }
2119
2120                 $this->mUseTeX = $wgUseTeX;
2121                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2122                 $this->mUseDynamicDates = $wgUseDynamicDates;
2123                 $this->mInterwikiMagic = $wgInterwikiMagic;
2124                 $this->mAllowExternalImages = $wgAllowExternalImages;
2125                 $this->mSkin =& $user->getSkin();
2126                 $this->mDateFormat = $user->getOption( "date" );
2127                 $this->mEditSection = $user->getOption( "editsection" );
2128                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2129                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2130                 $this->mShowToc = $user->getOption( "showtoc" );
2131         }
2132
2133
2134 }
2135
2136 # Regex callbacks, used in Parser::replaceVariables
2137 function wfBraceSubstitution( $matches )
2138 {
2139         global $wgCurParser;
2140         return $wgCurParser->braceSubstitution( $matches );
2141 }
2142
2143 function wfArgSubstitution( $matches )
2144 {
2145         global $wgCurParser;
2146         return $wgCurParser->argSubstitution( $matches );
2147 }
2148
2149 function wfVariableSubstitution( $matches )
2150 {
2151         global $wgCurParser;
2152         return $wgCurParser->variableSubstitution( $matches );
2153 }
2154
2155 ?>