includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80                 $this->mInPre = false;
  81                 $this->mInNowiki = false;
  82         }
  83
  84         # First pass--just handle <nowiki> sections, pass the rest off
  85         # to internalParse() which does all the real work.
  86         #
  87         # Returns a ParserOutput
  88         #
  89         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  90         {
  91                 global $wgUseTidy;
  92                 $fname = "Parser::parse";
  93                 wfProfileIn( $fname );
  94
  95                 if ( $clearState ) {
  96                         $this->clearState();
  97                 }
  98
  99                 $this->mOptions = $options;
 100                 $this->mTitle =& $title;
 101                 $this->mOutputType = OT_HTML;
 102
 103                 $stripState = NULL;
 104                 $text = $this->strip( $text, $this->mStripState );
 105                 $text = $this->internalParse( $text, $linestart );
 106                 $text = $this->unstrip( $text, $this->mStripState );
 107                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 108                 if(!$wgUseTidy) {
 109                         $fixtags = array(
 110                                 # french spaces, last one Guillemet-left
 111                                 # only if there is something before the space
 112                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 113                                 # french spaces, Guillemet-right
 114                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 115                                 "/<hr *>/i" => '<hr />',
 116                                 "/<br *>/i" => '<br />',
 117                                 "/<center *>/i"=>'<div class="center">',
 118                                 "/<\\/center *>/i" => '</div>',
 119                                 # Clean up spare ampersands; note that we probably ought to be
 120                                 # more careful about named entities.
 121                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 122                         );
 123                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 124                 } else {
 125                         $fixtags = array(
 126                                 # french spaces, last one Guillemet-left
 127                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 128                                 # french spaces, Guillemet-right
 129                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 130                                 "/<center *>/i"=>'<div class="center">',
 131                                 "/<\\/center *>/i" => '</div>'
 132                         );
 133                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 134                 }
 135                 # only once and last
 136                 $text = $this->doBlockLevels( $text, $linestart );
 137                 if($wgUseTidy) {
 138                         $text = $this->tidy($text);
 139                 }
 140                 $this->mOutput->setText( $text );
 141                 wfProfileOut( $fname );
 142                 return $this->mOutput;
 143         }
 144
 145         /* static */ function getRandomString()
 146         {
 147                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 148         }
 149
 150         # Replaces all occurrences of <$tag>content</$tag> in the text
 151         # with a random marker and returns the new text. the output parameter
 152         # $content will be an associative array filled with data on the form
 153         # $unique_marker => content.
 154
 155         # If $content is already set, the additional entries will be appended
 156
 157         # If $tag is set to STRIP_COMMENTS, the function will extract
 158         # <!-- HTML comments -->
 159
 160         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 161                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 162                 if ( !$content ) {
 163                         $content = array( );
 164                 }
 165                 $n = 1;
 166                 $stripped = "";
 167
 168                 while ( "" != $text ) {
 169                         if($tag==STRIP_COMMENTS) {
 170                                 $p = preg_split( "/<!--/i", $text, 2 );
 171                         } else {
 172                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 173                         }
 174                         $stripped .= $p[0];
 175                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 176                                 $text = "";
 177                         } else {
 178                                 if($tag==STRIP_COMMENTS) {
 179                                         $q = preg_split( "/-->/i", $p[1], 2 );
 180                                 } else {
 181                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 182                                 }
 183                                 $marker = $rnd . sprintf("%08X", $n++);
 184                                 $content[$marker] = $q[0];
 185                                 $stripped .= $marker;
 186                                 $text = $q[1];
 187                         }
 188                 }
 189                 return $stripped;
 190         }
 191
 192         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 193         # If $render is set, performs necessary rendering operations on plugins
 194         # Returns the text, and fills an array with data needed in unstrip()
 195         # If the $state is already a valid strip state, it adds to the state
 196
 197         # When $stripcomments is set, HTML comments <!-- like this -->
 198         # will be stripped in addition to other tags. This is important
 199         # for section editing, where these comments cause confusion when
 200         # counting the sections in the wikisource
 201         function strip( $text, &$state, $stripcomments = false )
 202         {
 203                 $render = ($this->mOutputType == OT_HTML);
 204                 $nowiki_content = array();
 205                 $hiero_content = array();
 206                 $timeline_content = array();
 207                 $math_content = array();
 208                 $pre_content = array();
 209                 $comment_content = array();
 210
 211                 # Replace any instances of the placeholders
 212                 $uniq_prefix = UNIQ_PREFIX;
 213                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 214
 215                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 216                 foreach( $nowiki_content as $marker => $content ){
 217                         if( $render ){
 218                                 # use span to mark nowiki areas, note the trailing whitespace in span to avoid collisions with other spans
 219                                 $nowiki_content[$marker] = '<span class="nowiki">'.wfEscapeHTMLTagsOnly( $content )."</span  >";
 220                         } else {
 221                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 222                         }
 223                 }
 224
 225                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 226                 foreach( $hiero_content as $marker => $content ){
 227                         if( $render && $GLOBALS['wgUseWikiHiero']){
 228                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 229                         } else {
 230                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 231                         }
 232                 }
 233
 234                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 235                 foreach( $timeline_content as $marker => $content ){
 236                         if( $render && $GLOBALS['wgUseTimeline']){
 237                                 $timeline_content[$marker] = renderTimeline( $content );
 238                         } else {
 239                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 240                         }
 241                 }
 242
 243                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 244                 foreach( $math_content as $marker => $content ){
 245                         if( $render ) {
 246                                 if( $this->mOptions->getUseTeX() ) {
 247                                         $math_content[$marker] = renderMath( $content );
 248                                 } else {
 249                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 250                                 }
 251                         } else {
 252                                 $math_content[$marker] = "<math>$content</math>";
 253                         }
 254                 }
 255
 256                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 257                 foreach( $pre_content as $marker => $content ){
 258                         if( $render ){
 259                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 260                         } else {
 261                                 $pre_content[$marker] = "<pre>$content</pre>";
 262                         }
 263                 }
 264                 if($stripcomments) {
 265                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 266                         foreach( $comment_content as $marker => $content ){
 267                                 $comment_content[$marker] = "<!--$content-->";
 268                         }
 269                 }
 270
 271                 # Merge state with the pre-existing state, if there is one
 272                 if ( $state ) {
 273                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 274                         $state['hiero'] = $state['hiero'] + $hiero_content;
 275                         $state['timeline'] = $state['timeline'] + $timeline_content;
 276                         $state['math'] = $state['math'] + $math_content;
 277                         $state['pre'] = $state['pre'] + $pre_content;
 278                         $state['comment'] = $state['comment'] + $comment_content;
 279                 } else {
 280                         $state = array(
 281                           'nowiki' => $nowiki_content,
 282                           'hiero' => $hiero_content,
 283                           'timeline' => $timeline_content,
 284                           'math' => $math_content,
 285                           'pre' => $pre_content,
 286                           'comment' => $comment_content
 287                         );
 288                 }
 289                 return $text;
 290         }
 291
 292         function unstrip( $text, &$state )
 293         {
 294                 # Must expand in reverse order, otherwise nested tags will be corrupted
 295                 $contentDict = end( $state );
 296                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 297                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 298                                 $text = str_replace( key( $contentDict ), $content, $text );
 299                         }
 300                 }
 301
 302                 return $text;
 303         }
 304
 305         # Add an item to the strip state
 306         # Returns the unique tag which must be inserted into the stripped text
 307         # The tag will be replaced with the original text in unstrip()
 308
 309         function insertStripItem( $text, &$state )
 310         {
 311                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 312                 if ( !$state ) {
 313                         $state = array(
 314                           'nowiki' => array(),
 315                           'hiero' => array(),
 316                           'math' => array(),
 317                           'pre' => array()
 318                         );
 319                 }
 320                 $state['item'][$rnd] = $text;
 321                 return $rnd;
 322         }
 323
 324         # This method generates the list of subcategories and pages for a category
 325         function categoryMagic ()
 326         {
 327                 global $wgLang , $wgUser ;
 328                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 329
 330                 $cns = Namespace::getCategory() ;
 331                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 332
 333                 $r = "<br style=\"clear:both;\"/>\n";
 334
 335
 336                 $sk =& $wgUser->getSkin() ;
 337
 338                 $articles = array() ;
 339                 $children = array() ;
 340                 $data = array () ;
 341                 $id = $this->mTitle->getArticleID() ;
 342
 343                 # FIXME: add limits
 344                 $t = wfStrencode( $this->mTitle->getDBKey() );
 345                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 346                 $res = wfQuery ( $sql, DB_READ ) ;
 347                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 348
 349                 # For all pages that link to this category
 350                 foreach ( $data AS $x )
 351                 {
 352                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 353                         if ( $t != "" ) $t .= ":" ;
 354                         $t .= $x->cur_title ;
 355
 356                         if ( $x->cur_namespace == $cns ) {
 357                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 358                         } else {
 359                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 360                         }
 361                 }
 362                 wfFreeResult ( $res ) ;
 363
 364                 # Showing subcategories
 365                 if ( count ( $children ) > 0 ) {
 366                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 367                         $r .= implode ( ", " , $children ) ;
 368                 }
 369
 370                 # Showing pages in this category
 371                 if ( count ( $articles ) > 0 ) {
 372                         $ti = $this->mTitle->getText() ;
 373                         $h =  wfMsg( "category_header", $ti );
 374                         $r .= "<h2>{$h}</h2>\n" ;
 375                         $r .= implode ( ", " , $articles ) ;
 376                 }
 377
 378
 379                 return $r ;
 380         }
 381
 382         function getHTMLattrs ()
 383         {
 384                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 385                                 "title", "align", "lang", "dir", "width", "height",
 386                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 387                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 388                                 /* FONT */ "type", "start", "value", "compact",
 389                                 /* For various lists, mostly deprecated but safe */
 390                                 "summary", "width", "border", "frame", "rules",
 391                                 "cellspacing", "cellpadding", "valign", "char",
 392                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 393                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 394                                 "id", "class", "name", "style" /* For CSS */
 395                                 );
 396                 return $htmlattrs ;
 397         }
 398
 399         function fixTagAttributes ( $t )
 400         {
 401                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 402                 $htmlattrs = $this->getHTMLattrs() ;
 403
 404                 # Strip non-approved attributes from the tag
 405                 $t = preg_replace(
 406                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 407                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 408                         $t);
 409                 # Strip javascript "expression" from stylesheets. Brute force approach:
 410                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 411
 412                 if( preg_match(
 413                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 414                         wfMungeToUtf8( $t ) ) )
 415                 {
 416                         $t="";
 417                 }
 418
 419                 return trim ( $t ) ;
 420         }
 421
 422         /* interface with html tidy, used if $wgUseTidy = true */
 423         function tidy ( $text ) {
 424                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 425                 global $wgInputEncoding, $wgOutputEncoding;
 426                 $fname = "Parser::tidy";
 427                 wfProfileIn( $fname );
 428
 429                 $cleansource = '';
 430                 switch(strtoupper($wgOutputEncoding)) {
 431                         case 'ISO-8859-1':
 432                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 433                                 break;
 434                         case 'UTF-8':
 435                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 436                                 break;
 437                         default:
 438                                 $wgTidyOpts .= ' -raw';
 439                         }
 440
 441                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 442 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 443 '<head><title>test</title></head><body>'.$text.'</body></html>';
 444                 $descriptorspec = array(
 445                         0 => array("pipe", "r"),
 446                         1 => array("pipe", "w"),
 447                         2 => array("file", "/dev/null", "a")
 448                 );
 449                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 450                 if (is_resource($process)) {
 451                         fwrite($pipes[0], $wrappedtext);
 452                         fclose($pipes[0]);
 453                         while (!feof($pipes[1])) {
 454                                 $cleansource .= fgets($pipes[1], 1024);
 455                         }
 456                         fclose($pipes[1]);
 457                         $return_value = proc_close($process);
 458                 }
 459
 460                 wfProfileOut( $fname );
 461
 462                 if( $cleansource == '' && $text != '') {
 463                         wfDebug( "Tidy error detected!\n" );
 464                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 465                 } else {
 466                         return $cleansource;
 467                 }
 468         }
 469
 470         function doTableStuff ( $t )
 471         {
 472                 $t = explode ( "\n" , $t ) ;
 473                 $td = array () ; # Is currently a td tag open?
 474                         $ltd = array () ; # Was it TD or TH?
 475                         $tr = array () ; # Is currently a tr tag open?
 476                         $ltr = array () ; # tr attributes
 477                         foreach ( $t AS $k => $x )
 478                         {
 479                                 $x = trim ( $x ) ;
 480                                 $fc = substr ( $x , 0 , 1 ) ;
 481                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 482                                 {
 483                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 484                                         array_push ( $td , false ) ;
 485                                         array_push ( $ltd , "" ) ;
 486                                         array_push ( $tr , false ) ;
 487                                         array_push ( $ltr , "" ) ;
 488                                 }
 489                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 490                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 491                                 {
 492                                         $z = "</table>\n" ;
 493                                         $l = array_pop ( $ltd ) ;
 494                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 495                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 496                                         array_pop ( $ltr ) ;
 497                                         $t[$k] = $z ;
 498                                 }
 499                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 500                                                 {
 501                                                 $z = trim ( substr ( $x , 2 ) ) ;
 502                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 503                                                 }*/
 504                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 505                                 {
 506                                         $x = substr ( $x , 1 ) ;
 507                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 508                                         $z = "" ;
 509                                         $l = array_pop ( $ltd ) ;
 510                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 511                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 512                                         array_pop ( $ltr ) ;
 513                                         $t[$k] = $z ;
 514                                         array_push ( $tr , false ) ;
 515                                         array_push ( $td , false ) ;
 516                                         array_push ( $ltd , "" ) ;
 517                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 518                                 }
 519                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 520                                 {
 521                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 522                                         {
 523                                                 $fc = "+" ;
 524                                                 $x = substr ( $x , 1 ) ;
 525                                         }
 526                                         $after = substr ( $x , 1 ) ;
 527                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 528                                         $after = explode ( "||" , $after ) ;
 529                                         $t[$k] = "" ;
 530                                         foreach ( $after AS $theline )
 531                                         {
 532                                                 $z = "" ;
 533                                                 if ( $fc != "+" )
 534                                                 {
 535                                                         $tra = array_pop ( $ltr ) ;
 536                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 537                                                         array_push ( $tr , true ) ;
 538                                                         array_push ( $ltr , "" ) ;
 539                                                 }
 540
 541                                                 $l = array_pop ( $ltd ) ;
 542                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 543                                                 if ( $fc == "|" ) $l = "td" ;
 544                                                 else if ( $fc == "!" ) $l = "th" ;
 545                                                 else if ( $fc == "+" ) $l = "caption" ;
 546                                                 else $l = "" ;
 547                                                 array_push ( $ltd , $l ) ;
 548                                                 $y = explode ( "|" , $theline , 2 ) ;
 549                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 550                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 551                                                 $t[$k] .= $y ;
 552                                                 array_push ( $td , true ) ;
 553                                         }
 554                                 }
 555                         }
 556
 557                 # Closing open td, tr && table
 558                 while ( count ( $td ) > 0 )
 559                 {
 560                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 561                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 562                         $t[] = "</table>" ;
 563                 }
 564
 565                 $t = implode ( "\n" , $t ) ;
 566                 #               $t = $this->removeHTMLtags( $t );
 567                 return $t ;
 568         }
 569
 570         # Parses the text and adds the result to the strip state
 571         # Returns the strip tag
 572         function stripParse( $text, $newline, $args )
 573         {
 574                 $text = $this->strip( $text, $this->mStripState );
 575                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 576                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 577         }
 578
 579         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 580         {
 581                 $fname = "Parser::internalParse";
 582                 wfProfileIn( $fname );
 583
 584                 $text = $this->removeHTMLtags( $text );
 585                 $text = $this->replaceVariables( $text, $args );
 586
 587                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 588
 589                 $text = $this->doHeadings( $text );
 590                 if($this->mOptions->getUseDynamicDates()) {
 591                         global $wgDateFormatter;
 592                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 593                 }
 594                 $text = $this->doAllQuotes( $text );
 595                 $text = $this->replaceExternalLinks( $text );
 596                 $text = $this->replaceInternalLinks ( $text );
 597                 $text = $this->replaceInternalLinks ( $text );
 598                 //$text = $this->doTokenizedParser ( $text );
 599                 $text = $this->doTableStuff ( $text ) ;
 600                 $text = $this->magicISBN( $text );
 601                 $text = $this->magicRFC( $text );
 602                 $text = $this->formatHeadings( $text, $isMain );
 603                 $sk =& $this->mOptions->getSkin();
 604                 $text = $sk->transformContent( $text );
 605
 606                 if ( !isset ( $this->categoryMagicDone ) ) {
 607                         $text .= $this->categoryMagic () ;
 608                         $this->categoryMagicDone = true ;
 609                 }
 610
 611                 wfProfileOut( $fname );
 612                 return $text;
 613         }
 614
 615
 616         /* private */ function doHeadings( $text )
 617         {
 618                 for ( $i = 6; $i >= 1; --$i ) {
 619                         $h = substr( "======", 0, $i );
 620                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 621                           "<h{$i}>\\1</h{$i}>\\2", $text );
 622                 }
 623                 return $text;
 624         }
 625
 626         /* private */ function doAllQuotes( $text )
 627         {
 628                 $outtext = "";
 629                 $lines = explode( "\n", $text );
 630                 foreach ( $lines as $line ) {
 631                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 632                 }
 633                 return substr($outtext, 0,-1);
 634         }
 635
 636         /* private */ function doQuotes( $pre, $text, $mode )
 637         {
 638                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 639                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 640                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 641                         if ( substr ($m[2], 0, 1) == "'" ) {
 642                                 $m[2] = substr ($m[2], 1);
 643                                 if ($mode == "em") {
 644                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 645                                 } else if ($mode == "strong") {
 646                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 647                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 648                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 649                                 } else if ($mode == "strongem") {
 650                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 651                                 } else {
 652                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 653                                 }
 654                         } else {
 655                                 if ($mode == "strong") {
 656                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 657                                 } else if ($mode == "em") {
 658                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 659                                 } else if ($mode == "emstrong") {
 660                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 661                                 } else if (($mode == "strongem") || ($mode == "both")) {
 662                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 663                                 } else {
 664                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 665                                 }
 666                         }
 667                 } else {
 668                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 669                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 670                         if ($mode == "") {
 671                                 return $pre . $text;
 672                         } else if ($mode == "em") {
 673                                 return $pre . $text_em;
 674                         } else if ($mode == "strong") {
 675                                 return $pre . $text_strong;
 676                         } else if ($mode == "strongem") {
 677                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 678                         } else {
 679                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 680                         }
 681                 }
 682         }
 683
 684         # Note: we have to do external links before the internal ones,
 685         # and otherwise take great care in the order of things here, so
 686         # that we don't end up interpreting some URLs twice.
 687
 688         /* private */ function replaceExternalLinks( $text )
 689         {
 690                 $fname = "Parser::replaceExternalLinks";
 691                 wfProfileIn( $fname );
 692                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 693                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 694                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 695                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 696                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 697                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 698                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 699                 wfProfileOut( $fname );
 700                 return $text;
 701         }
 702
 703         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 704         {
 705                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 706                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 707
 708                 # this is  the list of separators that should be ignored if they
 709                 # are the last character of an URL but that should be included
 710                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 711                 # in this case, the last comma should not become part of the URL,
 712                 # but in "www.foo.com/123,2342,32.htm" it should.
 713                 $sep = ",;\.:";
 714                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 715                 $images = "gif|png|jpg|jpeg";
 716
 717                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 718                 # they are interpreted as part of the string (used to tell PHP
 719                 # that the content of the string should be inserted there).
 720                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 721                   "((?i){$images})([^{$uc}]|$)/";
 722
 723                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 724                 $sk =& $this->mOptions->getSkin();
 725
 726                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 727                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 728                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 729                 }
 730                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 731                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 732                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 733                   "</a>\\5", $s );
 734                 $s = str_replace( $unique, $protocol, $s );
 735
 736                 $a = explode( "[{$protocol}:", " " . $s );
 737                 $s = array_shift( $a );
 738                 $s = substr( $s, 1 );
 739
 740                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 741                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 742
 743                 foreach ( $a as $line ) {
 744                         if ( preg_match( $e1, $line, $m ) ) {
 745                                 $link = "{$protocol}:{$m[1]}";
 746                                 $trail = $m[2];
 747                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 748                                 else { $text = wfEscapeHTML( $link ); }
 749                         } else if ( preg_match( $e2, $line, $m ) ) {
 750                                 $link = "{$protocol}:{$m[1]}";
 751                                 $text = $m[2];
 752                                 $trail = $m[3];
 753                         } else {
 754                                 $s .= "[{$protocol}:" . $line;
 755                                 continue;
 756                         }
 757                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 758                                 $paren = "";
 759                         } else {
 760                                 # Expand the URL for printable version
 761                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 762                         }
 763                         $la = $sk->getExternalLinkAttributes( $link, $text );
 764                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 765
 766                 }
 767                 return $s;
 768         }
 769
 770
 771         /* private */ function replaceInternalLinks( $s )
 772         {
 773                 global $wgLang, $wgLinkCache;
 774                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 775                 static $fname = "Parser::replaceInternalLink" ;
 776                 wfProfileIn( $fname );
 777
 778                 wfProfileIn( "$fname-setup" );
 779                 static $tc = FALSE;
 780                 # the % is needed to support urlencoded titles as well
 781                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 782                 $sk =& $this->mOptions->getSkin();
 783
 784                 $a = explode( "[[", " " . $s );
 785                 $s = array_shift( $a );
 786                 $s = substr( $s, 1 );
 787
 788                 # Match a link having the form [[namespace:link|alternate]]trail
 789                 static $e1 = FALSE;
 790                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 791                 # Match the end of a line for a word that's not followed by whitespace,
 792                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 793                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 794                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 795                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 796
 797
 798                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 799                 static $image = FALSE;
 800                 static $special = FALSE;
 801                 static $media = FALSE;
 802                 static $category = FALSE;
 803                 if ( !$image ) { $image = Namespace::getImage(); }
 804                 if ( !$special ) { $special = Namespace::getSpecial(); }
 805                 if ( !$media ) { $media = Namespace::getMedia(); }
 806                 if ( !$category ) { $category = Namespace::getCategory(); }
 807
 808                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 809
 810                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 811                         $new_prefix = $m[2];
 812                         $s = $m[1];
 813                 } else {
 814                         $new_prefix="";
 815                 }
 816
 817                 wfProfileOut( "$fname-setup" );
 818
 819                 foreach ( $a as $line ) {
 820                         $prefix = $new_prefix;
 821
 822                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 823                                 $text = $m[2];
 824                                 # fix up urlencoded title texts
 825                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 826                                 $trail = $m[3];
 827                         } else { # Invalid form; output directly
 828                                 $s .= $prefix . "[[" . $line ;
 829                                 wfProfileOut( $fname );
 830                                 continue;
 831                         }
 832
 833                         /* Valid link forms:
 834                         Foobar -- normal
 835                         :Foobar -- override special treatment of prefix (images, language links)
 836                         /Foobar -- convert to CurrentPage/Foobar
 837                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 838                         */
 839                         $c = substr($m[1],0,1);
 840                         $noforce = ($c != ":");
 841                         if( $c == "/" ) { # subpage
 842                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 843                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 844                                         $noslash=$m[1];
 845                                 } else {
 846                                         $noslash=substr($m[1],1);
 847                                 }
 848                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 849                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 850                                         if( "" == $text ) {
 851                                                 $text= $m[1];
 852                                         } # this might be changed for ugliness reasons
 853                                 } else {
 854                                         $link = $noslash; # no subpage allowed, use standard link
 855                                 }
 856                         } elseif( $noforce ) { # no subpage
 857                                 $link = $m[1];
 858                         } else {
 859                                 $link = substr( $m[1], 1 );
 860                         }
 861                         $wasblank = ( "" == $text );
 862                         if( $wasblank )
 863                         $text = $link;
 864
 865                         $nt = Title::newFromText( $link );
 866                         if( !$nt ) {
 867                                 $s .= $prefix . "[[" . $line;
 868                                 wfProfileOut( $fname );
 869                                 continue;
 870                         }
 871                         $ns = $nt->getNamespace();
 872                         $iw = $nt->getInterWiki();
 873                         if( $noforce ) {
 874                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 875                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 876                                         $tmp = $prefix . $trail ;
 877                                         wfProfileOut( $fname );
 878                                         $s .= (trim($tmp) == '')? '': $tmp;
 879                                         continue;
 880                                 }
 881                                 if ( $ns == $image ) {
 882                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 883                                         $wgLinkCache->addImageLinkObj( $nt );
 884                                         wfProfileOut( $fname );
 885                                         continue;
 886                                 }
 887                                 if ( $ns == $category ) {
 888                                         $t = $nt->getText() ;
 889                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 890
 891                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 892                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 893                                         $wgLinkCache->resume();
 894
 895                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 896                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 897                                         $this->mOutput->mCategoryLinks[] = $t ;
 898                                         $s .= $prefix . $trail ;
 899                                         wfProfileOut( $fname );
 900                                         continue;
 901                                 }
 902                         }
 903                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 904                         ( strpos( $link, "#" ) == FALSE ) ) {
 905                                 # Self-links are handled specially; generally de-link and change to bold.
 906                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 907                                 wfProfileOut( $fname );
 908                                 continue;
 909                         }
 910
 911                         if( $ns == $media ) {
 912                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 913                                 $wgLinkCache->addImageLinkObj( $nt );
 914                                 wfProfileOut( $fname );
 915                                 continue;
 916                         } elseif( $ns == $special ) {
 917                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 918                                 wfProfileOut( $fname );
 919                                 continue;
 920                         }
 921                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 922                 }
 923                 wfProfileOut( $fname );
 924                 return $s;
 925         }
 926
 927         # Some functions here used by doBlockLevels()
 928         #
 929         /* private */ function closeParagraph()
 930         {
 931                 $result = "";
 932                 if ( '' != $this->mLastSection ) {
 933                         $result = "</" . $this->mLastSection  . ">\n";
 934                 }
 935                 $this->mInPre = false;
 936                 $this->mLastSection = "";
 937                 return $result;
 938         }
 939         # getCommon() returns the length of the longest common substring
 940         # of both arguments, starting at the beginning of both.
 941         #
 942         /* private */ function getCommon( $st1, $st2 )
 943         {
 944                 $fl = strlen( $st1 );
 945                 $shorter = strlen( $st2 );
 946                 if ( $fl < $shorter ) { $shorter = $fl; }
 947
 948                 for ( $i = 0; $i < $shorter; ++$i ) {
 949                         if ( $st1{$i} != $st2{$i} ) { break; }
 950                 }
 951                 return $i;
 952         }
 953         # These next three functions open, continue, and close the list
 954         # element appropriate to the prefix character passed into them.
 955         #
 956         /* private */ function openList( $char )
 957     {
 958                 $result = $this->closeParagraph();
 959
 960                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 961                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 962                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 963                 else if ( ";" == $char ) {
 964                         $result .= "<dl><dt>";
 965                         $this->mDTopen = true;
 966                 }
 967                 else { $result = "<!-- ERR 1 -->"; }
 968
 969                 return $result;
 970         }
 971
 972         /* private */ function nextItem( $char )
 973         {
 974                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 975                 else if ( ":" == $char || ";" == $char ) {
 976                         $close = "</dd>";
 977                         if ( $this->mDTopen ) { $close = "</dt>"; }
 978                         if ( ";" == $char ) {
 979                                 $this->mDTopen = true;
 980                                 return $close . "<dt>";
 981                         } else {
 982                                 $this->mDTopen = false;
 983                                 return $close . "<dd>";
 984                         }
 985                 }
 986                 return "<!-- ERR 2 -->";
 987         }
 988
 989         /* private */function closeList( $char )
 990         {
 991                 if ( "*" == $char ) { $text = "</li></ul>"; }
 992                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 993                 else if ( ":" == $char ) {
 994                         if ( $this->mDTopen ) {
 995                                 $this->mDTopen = false;
 996                                 $text = "</dt></dl>";
 997                         } else {
 998                                 $text = "</dd></dl>";
 999                         }
1000                 }
1001                 else {  return "<!-- ERR 3 -->"; }
1002                 return $text."\n";
1003         }
1004
1005         /* private */ function doBlockLevels( $text, $linestart ) {
1006                 $fname = "Parser::doBlockLevels";
1007                 wfProfileIn( $fname );
1008
1009                 # Parsing through the text line by line.  The main thing
1010                 # happening here is handling of block-level elements p, pre,
1011                 # and making lists from lines starting with * # : etc.
1012                 #
1013                 $textLines = explode( "\n", $text );
1014
1015                 $lastPrefix = $output = $lastLine = '';
1016                 $this->mDTopen = $inBlockElem = false;
1017                 $prefixLength = 0;
1018                 $paragraphStack = false;
1019
1020                 if ( !$linestart ) {
1021                         $output .= array_shift( $textLines );
1022                 }
1023                 foreach ( $textLines as $oLine ) {
1024                         $lastPrefixLength = strlen( $lastPrefix );
1025                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1026                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1027                         $nowikiOpenMatch = preg_match("/<span class=\"nowiki\"/", $oLine );
1028                         $nowikiCloseMatch = preg_match("/<\\/span  >/", $oLine );
1029                         if($nowikiOpenMatch) $nowikiFullMatch = preg_match("/^(.*)<span class=\"nowiki\"/", $oLine, $nowikiOpenMatches );
1030                         if (!$this->mInPre) {
1031                                 $this->mInPre = !empty($preOpenMatch);
1032                         }
1033                         if (!$this->mInNowiki) {
1034                                 $this->mInNowiki = !empty($nowikiOpenMatch);
1035                         }
1036                         if (
1037                                 !$this->mInPre && (!$this->mInNowiki ||
1038                                 ($nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0) )
1039                         )
1040                         {
1041                                 # Multiple prefixes may abut each other for nested lists.
1042                                 $prefixLength = strspn( $oLine, "*#:;" );
1043                                 $pref = substr( $oLine, 0, $prefixLength );
1044
1045                                 # eh?
1046                                 $pref2 = str_replace( ";", ":", $pref );
1047                                 $t = substr( $oLine, $prefixLength );
1048                         } else {
1049                                 # Don't interpret any other prefixes in preformatted text
1050                                 $prefixLength = 0;
1051                                 $pref = $pref2 = '';
1052                                 $t = $oLine;
1053                         }
1054
1055                         # List generation
1056                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1057                                 # Same as the last item, so no need to deal with nesting or opening stuff
1058                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1059                                 $paragraphStack = false;
1060
1061                                 if ( ";" == substr( $pref, -1 ) ) {
1062                                         # The one nasty exception: definition lists work like this:
1063                                         # ; title : definition text
1064                                         # So we check for : in the remainder text to split up the
1065                                         # title and definition, without b0rking links.
1066                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1067                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1068                                                 $term = $match[1];
1069                                                 $output .= $term . $this->nextItem( ":" );
1070                                                 $t = $match[2];
1071                                         }
1072                                 }
1073                         } elseif( $prefixLength || $lastPrefixLength ) {
1074                                 # Either open or close a level...
1075                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1076                                 $paragraphStack = false;
1077
1078                                 while( $commonPrefixLength < $lastPrefixLength ) {
1079                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1080                                         --$lastPrefixLength;
1081                                 }
1082                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1083                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1084                                 }
1085                                 while ( $prefixLength > $commonPrefixLength ) {
1086                                         $char = substr( $pref, $commonPrefixLength, 1 );
1087                                         $output .= $this->openList( $char );
1088
1089                                         if ( ";" == $char ) {
1090                                                 # FIXME: This is dupe of code above
1091                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1092                                                         $term = $match[1];
1093                                                         $output .= $term . $this->nextItem( ":" );
1094                                                         $t = $match[2];
1095                                                 }
1096                                         }
1097                                         ++$commonPrefixLength;
1098                                 }
1099                                 $lastPrefix = $pref2;
1100                         }
1101                         if( 0 == $prefixLength ) {
1102                                 # No prefix (not in list)--go to paragraph mode
1103                                 $uniq_prefix = UNIQ_PREFIX;
1104                                 // XXX: use a stack for nestable elements like span, table and div
1105                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1106                                 $closematch = preg_match(
1107                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1108                                         "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1109                                 if ( $openmatch or $closematch ) {
1110                                         $paragraphStack = false;
1111                                         $output .= $this->closeParagraph();
1112                                         if($preOpenMatch and !$preCloseMatch) {
1113                                                 $this->mInPre = true;
1114                                         }
1115                                         if ( $closematch  ) {
1116                                                 $inBlockElem = false;
1117                                         } else {
1118                                                 $inBlockElem = true;
1119                                         }
1120                                 } else if (
1121                                         !$inBlockElem && !$this->mInPre &&
1122                                         (!$this->mInNowiki || ($nowikiOpenMatch && trim($nowikiOpenMatches[1]) == ''  ) ) )
1123                                         {
1124                                         if ( " " == $t{0} and trim($t) != '' and (!$this->mInNowiki || $nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0 ) ) {
1125                                                 // pre
1126                                                 if ($this->mLastSection != 'pre') {
1127                                                         $paragraphStack = false;
1128                                                         $output .= $this->closeParagraph().'<pre>';
1129                                                         $this->mLastSection = 'pre';
1130                                                 }
1131                                         } else {
1132                                                 // paragraph
1133                                                 if ( '' == trim($t) ) {
1134                                                         if ( $paragraphStack ) {
1135                                                                 $output .= $paragraphStack.'<br />';
1136                                                                 $paragraphStack = false;
1137                                                                 $this->mLastSection = 'p';
1138                                                         } else {
1139                                                                 if ($this->mLastSection != 'p' ) {
1140                                                                         $output .= $this->closeParagraph();
1141                                                                         $this->mLastSection = '';
1142                                                                         $paragraphStack = "<p>";
1143                                                                 } else {
1144                                                                         $paragraphStack = '</p><p>';
1145                                                                 }
1146                                                         }
1147                                                 } else {
1148                                                         if ( $paragraphStack ) {
1149                                                                 $output .= $paragraphStack;
1150                                                                 $paragraphStack = false;
1151                                                                 $this->mLastSection = 'p';
1152                                                         } else if ($this->mLastSection != 'p') {
1153                                                                 $output .= $this->closeParagraph().'<p>';
1154                                                                 $this->mLastSection = 'p';
1155                                                         }
1156                                                 }
1157                                         }
1158                                 }
1159                         }
1160                         if($nowikiCloseMatch) $this->mInNowiki = false;
1161                         if ($paragraphStack === false) {
1162                                 $output .= $t."\n";
1163                         }
1164                 }
1165                 while ( $prefixLength ) {
1166                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1167                         --$prefixLength;
1168                 }
1169                 if ( "" != $this->mLastSection ) {
1170                         $output .= "</" . $this->mLastSection . ">";
1171                         $this->mLastSection = "";
1172                 }
1173
1174                 wfProfileOut( $fname );
1175                 return $output;
1176         }
1177
1178         function getVariableValue( $index ) {
1179                 global $wgLang, $wgSitename, $wgServer;
1180
1181                 switch ( $index ) {
1182                         case MAG_CURRENTMONTH:
1183                                 return date( "m" );
1184                         case MAG_CURRENTMONTHNAME:
1185                                 return $wgLang->getMonthName( date("n") );
1186                         case MAG_CURRENTMONTHNAMEGEN:
1187                                 return $wgLang->getMonthNameGen( date("n") );
1188                         case MAG_CURRENTDAY:
1189                                 return date("j");
1190                         case MAG_PAGENAME:
1191                                 return $this->mTitle->getText();
1192                         case MAG_NAMESPACE:
1193                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1194                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1195                         case MAG_CURRENTDAYNAME:
1196                                 return $wgLang->getWeekdayName( date("w")+1 );
1197                         case MAG_CURRENTYEAR:
1198                                 return date( "Y" );
1199                         case MAG_CURRENTTIME:
1200                                 return $wgLang->time( wfTimestampNow(), false );
1201                         case MAG_NUMBEROFARTICLES:
1202                                 return wfNumberOfArticles();
1203                         case MAG_SITENAME:
1204                                 return $wgSitename;
1205                         case MAG_SERVER:
1206                                 return $wgServer;
1207                         default:
1208                                 return NULL;
1209                 }
1210         }
1211
1212         function initialiseVariables()
1213         {
1214                 global $wgVariableIDs;
1215                 $this->mVariables = array();
1216                 foreach ( $wgVariableIDs as $id ) {
1217                         $mw =& MagicWord::get( $id );
1218                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1219                 }
1220         }
1221
1222         /* private */ function replaceVariables( $text, $args = array() )
1223         {
1224                 global $wgLang, $wgScript, $wgArticlePath;
1225
1226                 $fname = "Parser::replaceVariables";
1227                 wfProfileIn( $fname );
1228
1229                 $bail = false;
1230                 if ( !$this->mVariables ) {
1231                         $this->initialiseVariables();
1232                 }
1233                 $titleChars = Title::legalChars();
1234                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1235
1236                 # This function is called recursively. To keep track of arguments we need a stack:
1237                 array_push( $this->mArgStack, $args );
1238
1239                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1240                 $GLOBALS['wgCurParser'] =& $this;
1241
1242
1243                 if ( $this->mOutputType == OT_HTML ) {
1244                         # Variable substitution
1245                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1246
1247                         # Argument substitution
1248                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1249                 }
1250                 # Template substitution
1251                 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1252                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1253
1254                 array_pop( $this->mArgStack );
1255
1256                 wfProfileOut( $fname );
1257                 return $text;
1258         }
1259
1260         function variableSubstitution( $matches )
1261         {
1262                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1263                         $text = $this->mVariables[$matches[1]];
1264                         $this->mOutput->mContainsOldMagic = true;
1265                 } else {
1266                         $text = $matches[0];
1267                 }
1268                 return $text;
1269         }
1270
1271         function braceSubstitution( $matches )
1272         {
1273                 global $wgLinkCache, $wgLang;
1274                 $fname = "Parser::braceSubstitution";
1275                 $found = false;
1276                 $nowiki = false;
1277                 $noparse = false;
1278
1279                 $title = NULL;
1280
1281                 # $newline is an optional newline character before the braces
1282                 # $part1 is the bit before the first |, and must contain only title characters
1283                 # $args is a list of arguments, starting from index 0, not including $part1
1284
1285                 $newline = $matches[1];
1286                 $part1 = $matches[2];
1287                 # If the third subpattern matched anything, it will start with |
1288                 if ( $matches[3] !== "" ) {
1289                         $args = explode( "|", substr( $matches[3], 1 ) );
1290                 } else {
1291                         $args = array();
1292                 }
1293                 $argc = count( $args );
1294
1295                 # {{{}}}
1296                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1297                         $text = $matches[0];
1298                         $found = true;
1299                         $noparse = true;
1300                 }
1301
1302                 # SUBST
1303                 if ( !$found ) {
1304                         $mwSubst =& MagicWord::get( MAG_SUBST );
1305                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1306                                 if ( $this->mOutputType != OT_WIKI ) {
1307                                         # Invalid SUBST not replaced at PST time
1308                                         # Return without further processing
1309                                         $text = $matches[0];
1310                                         $found = true;
1311                                         $noparse= true;
1312                                 }
1313                         } elseif ( $this->mOutputType == OT_WIKI ) {
1314                                 # SUBST not found in PST pass, do nothing
1315                                 $text = $matches[0];
1316                                 $found = true;
1317                         }
1318                 }
1319
1320                 # MSG, MSGNW and INT
1321                 if ( !$found ) {
1322                         # Check for MSGNW:
1323                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1324                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1325                                 $nowiki = true;
1326                         } else {
1327                                 # Remove obsolete MSG:
1328                                 $mwMsg =& MagicWord::get( MAG_MSG );
1329                                 $mwMsg->matchStartAndRemove( $part1 );
1330                         }
1331
1332                         # Check if it is an internal message
1333                         $mwInt =& MagicWord::get( MAG_INT );
1334                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1335                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1336                                         $text = wfMsgReal( $part1, $args, true );
1337                                         $found = true;
1338                                 }
1339                         }
1340                 }
1341
1342                 # NS
1343                 if ( !$found ) {
1344                         # Check for NS: (namespace expansion)
1345                         $mwNs = MagicWord::get( MAG_NS );
1346                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1347                                 if ( intval( $part1 ) ) {
1348                                         $text = $wgLang->getNsText( intval( $part1 ) );
1349                                         $found = true;
1350                                 } else {
1351                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1352                                         if ( !is_null( $index ) ) {
1353                                                 $text = $wgLang->getNsText( $index );
1354                                                 $found = true;
1355                                         }
1356                                 }
1357                         }
1358                 }
1359
1360                 # LOCALURL and LOCALURLE
1361                 if ( !$found ) {
1362                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1363                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1364
1365                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1366                                 $func = 'getLocalURL';
1367                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1368                                 $func = 'escapeLocalURL';
1369                         } else {
1370                                 $func = '';
1371                         }
1372
1373                         if ( $func !== '' ) {
1374                                 $title = Title::newFromText( $part1 );
1375                                 if ( !is_null( $title ) ) {
1376                                         if ( $argc > 0 ) {
1377                                                 $text = $title->$func( $args[0] );
1378                                         } else {
1379                                                 $text = $title->$func();
1380                                         }
1381                                         $found = true;
1382                                 }
1383                         }
1384                 }
1385
1386                 # Internal variables
1387                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1388                         $text = $this->mVariables[$part1];
1389                         $found = true;
1390                         $this->mOutput->mContainsOldMagic = true;
1391                 }
1392 /*
1393                 # Arguments input from the caller
1394                 $inputArgs = end( $this->mArgStack );
1395                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1396                         $text = $inputArgs[$part1];
1397                         $found = true;
1398                 }
1399 */
1400                 # Load from database
1401                 if ( !$found ) {
1402                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1403                         if ( !is_null( $title ) && !$title->isExternal() ) {
1404                                 # Check for excessive inclusion
1405                                 $dbk = $title->getPrefixedDBkey();
1406                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1407                                         $article = new Article( $title );
1408                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1409                                         if ( $articleContent !== false ) {
1410                                                 $found = true;
1411                                                 $text = $articleContent;
1412
1413                                         }
1414                                 }
1415
1416                                 # If the title is valid but undisplayable, make a link to it
1417                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1418                                         $text = "[[" . $title->getPrefixedText() . "]]";
1419                                         $found = true;
1420                                 }
1421                         }
1422                 }
1423
1424                 # Recursive parsing, escaping and link table handling
1425                 # Only for HTML output
1426                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1427                         $text = wfEscapeWikiText( $text );
1428                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1429                         # Clean up argument array
1430                         $assocArgs = array();
1431                         $index = 1;
1432                         foreach( $args as $arg ) {
1433                                 $eqpos = strpos( $arg, "=" );
1434                                 if ( $eqpos === false ) {
1435                                         $assocArgs[$index++] = $arg;
1436                                 } else {
1437                                         $name = trim( substr( $arg, 0, $eqpos ) );
1438                                         $value = trim( substr( $arg, $eqpos+1 ) );
1439                                         if ( $value === false ) {
1440                                                 $value = "";
1441                                         }
1442                                         if ( $name !== false ) {
1443                                                 $assocArgs[$name] = $value;
1444                                         }
1445                                 }
1446                         }
1447
1448                         # Do not enter included links in link table
1449                         if ( !is_null( $title ) ) {
1450                                 $wgLinkCache->suspend();
1451                         }
1452
1453                         # Run full parser on the included text
1454                         $text = $this->stripParse( $text, $newline, $assocArgs );
1455
1456                         # Resume the link cache and register the inclusion as a link
1457                         if ( !is_null( $title ) ) {
1458                                 $wgLinkCache->resume();
1459                                 $wgLinkCache->addLinkObj( $title );
1460                         }
1461                 }
1462
1463                 if ( !$found ) {
1464                         return $matches[0];
1465                 } else {
1466                         return $text;
1467                 }
1468         }
1469
1470         # Triple brace replacement -- used for template arguments
1471         function argSubstitution( $matches )
1472         {
1473                 $newline = $matches[1];
1474                 $arg = trim( $matches[2] );
1475                 $text = $matches[0];
1476                 $inputArgs = end( $this->mArgStack );
1477
1478                 if ( array_key_exists( $arg, $inputArgs ) ) {
1479                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1480                 }
1481
1482                 return $text;
1483         }
1484
1485         # Returns true if the function is allowed to include this entity
1486         function incrementIncludeCount( $dbk )
1487         {
1488                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1489                         $this->mIncludeCount[$dbk] = 0;
1490                 }
1491                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1492                         return true;
1493                 } else {
1494                         return false;
1495                 }
1496         }
1497
1498
1499         # Cleans up HTML, removes dangerous tags and attributes
1500         /* private */ function removeHTMLtags( $text )
1501         {
1502                 global $wgUseTidy, $wgUserHtml;
1503                 $fname = "Parser::removeHTMLtags";
1504                 wfProfileIn( $fname );
1505
1506                 if( $wgUserHtml ) {
1507                         $htmlpairs = array( # Tags that must be closed
1508                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1509                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1510                                 "strike", "strong", "tt", "var", "div", "center",
1511                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1512                                 "ruby", "rt" , "rb" , "rp", "p"
1513                         );
1514                         $htmlsingle = array(
1515                                 "br", "hr", "li", "dt", "dd"
1516                         );
1517                         $htmlnest = array( # Tags that can be nested--??
1518                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1519                                 "dl", "font", "big", "small", "sub", "sup"
1520                         );
1521                         $tabletags = array( # Can only appear inside table
1522                                 "td", "th", "tr"
1523                         );
1524                 } else {
1525                         $htmlpairs = array();
1526                         $htmlsingle = array();
1527                         $htmlnest = array();
1528                         $tabletags = array();
1529                 }
1530
1531                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1532                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1533
1534                 $htmlattrs = $this->getHTMLattrs () ;
1535
1536                 # Remove HTML comments
1537                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1538
1539                 $bits = explode( "<", $text );
1540                 $text = array_shift( $bits );
1541                 if(!$wgUseTidy) {
1542                         $tagstack = array(); $tablestack = array();
1543                         foreach ( $bits as $x ) {
1544                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1545                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1546                                 $x, $regs );
1547                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1548                                 error_reporting( $prev );
1549
1550                                 $badtag = 0 ;
1551                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1552                                         # Check our stack
1553                                         if ( $slash ) {
1554                                                 # Closing a tag...
1555                                                 if ( ! in_array( $t, $htmlsingle ) &&
1556                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1557                                                         @array_push( $tagstack, $ot );
1558                                                         $badtag = 1;
1559                                                 } else {
1560                                                         if ( $t == "table" ) {
1561                                                                 $tagstack = array_pop( $tablestack );
1562                                                         }
1563                                                         $newparams = "";
1564                                                 }
1565                                         } else {
1566                                                 # Keep track for later
1567                                                 if ( in_array( $t, $tabletags ) &&
1568                                                 ! in_array( "table", $tagstack ) ) {
1569                                                         $badtag = 1;
1570                                                 } else if ( in_array( $t, $tagstack ) &&
1571                                                 ! in_array ( $t , $htmlnest ) ) {
1572                                                         $badtag = 1 ;
1573                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1574                                                         if ( $t == "table" ) {
1575                                                                 array_push( $tablestack, $tagstack );
1576                                                                 $tagstack = array();
1577                                                         }
1578                                                         array_push( $tagstack, $t );
1579                                                 }
1580                                                 # Strip non-approved attributes from the tag
1581                                                 $newparams = $this->fixTagAttributes($params);
1582
1583                                         }
1584                                         if ( ! $badtag ) {
1585                                                 $rest = str_replace( ">", "&gt;", $rest );
1586                                                 $text .= "<$slash$t $newparams$brace$rest";
1587                                                 continue;
1588                                         }
1589                                 }
1590                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1591                         }
1592                         # Close off any remaining tags
1593                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1594                                 $text .= "</$t>\n";
1595                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1596                         }
1597                 } else {
1598                         # this might be possible using tidy itself
1599                         foreach ( $bits as $x ) {
1600                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1601                                 $x, $regs );
1602                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1603                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1604                                         $newparams = $this->fixTagAttributes($params);
1605                                         $rest = str_replace( ">", "&gt;", $rest );
1606                                         $text .= "<$slash$t $newparams$brace$rest";
1607                                 } else {
1608                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1609                                 }
1610                         }
1611                 }
1612                 wfProfileOut( $fname );
1613                 return $text;
1614         }
1615
1616
1617 /*
1618  *
1619  * This function accomplishes several tasks:
1620  * 1) Auto-number headings if that option is enabled
1621  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1622  * 3) Add a Table of contents on the top for users who have enabled the option
1623  * 4) Auto-anchor headings
1624  *
1625  * It loops through all headlines, collects the necessary data, then splits up the
1626  * string and re-inserts the newly formatted headlines.
1627  *
1628  */
1629
1630         /* private */ function formatHeadings( $text, $isMain=true )
1631         {
1632                 global $wgInputEncoding;
1633
1634                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1635                 $doShowToc = $this->mOptions->getShowToc();
1636                 if( !$this->mTitle->userCanEdit() ) {
1637                         $showEditLink = 0;
1638                         $rightClickHack = 0;
1639                 } else {
1640                         $showEditLink = $this->mOptions->getEditSection();
1641                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1642                 }
1643
1644                 # Inhibit editsection links if requested in the page
1645                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1646                 if( $esw->matchAndRemove( $text ) ) {
1647                         $showEditLink = 0;
1648                 }
1649                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1650                 # do not add TOC
1651                 $mw =& MagicWord::get( MAG_NOTOC );
1652                 if( $mw->matchAndRemove( $text ) ) {
1653                         $doShowToc = 0;
1654                 }
1655
1656                 # never add the TOC to the Main Page. This is an entry page that should not
1657                 # be more than 1-2 screens large anyway
1658                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1659                         $doShowToc = 0;
1660                 }
1661
1662                 # Get all headlines for numbering them and adding funky stuff like [edit]
1663                 # links - this is for later, but we need the number of headlines right now
1664                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1665
1666                 # if there are fewer than 4 headlines in the article, do not show TOC
1667                 if( $numMatches < 4 ) {
1668                         $doShowToc = 0;
1669                 }
1670
1671                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1672                 # override above conditions and always show TOC
1673                 $mw =& MagicWord::get( MAG_FORCETOC );
1674                 if ($mw->matchAndRemove( $text ) ) {
1675                         $doShowToc = 1;
1676                 }
1677
1678
1679                 # We need this to perform operations on the HTML
1680                 $sk =& $this->mOptions->getSkin();
1681
1682                 # headline counter
1683                 $headlineCount = 0;
1684
1685                 # Ugh .. the TOC should have neat indentation levels which can be
1686                 # passed to the skin functions. These are determined here
1687                 $toclevel = 0;
1688                 $toc = "";
1689                 $full = "";
1690                 $head = array();
1691                 $sublevelCount = array();
1692                 $level = 0;
1693                 $prevlevel = 0;
1694                 foreach( $matches[3] as $headline ) {
1695                         $numbering = "";
1696                         if( $level ) {
1697                                 $prevlevel = $level;
1698                         }
1699                         $level = $matches[1][$headlineCount];
1700                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1701                                 # reset when we enter a new level
1702                                 $sublevelCount[$level] = 0;
1703                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1704                                 $toclevel += $level - $prevlevel;
1705                         }
1706                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1707                                 # reset when we step back a level
1708                                 $sublevelCount[$level+1]=0;
1709                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1710                                 $toclevel -= $prevlevel - $level;
1711                         }
1712                         # count number of headlines for each level
1713                         @$sublevelCount[$level]++;
1714                         if( $doNumberHeadings || $doShowToc ) {
1715                                 $dot = 0;
1716                                 for( $i = 1; $i <= $level; $i++ ) {
1717                                         if( !empty( $sublevelCount[$i] ) ) {
1718                                                 if( $dot ) {
1719                                                         $numbering .= ".";
1720                                                 }
1721                                                 $numbering .= $sublevelCount[$i];
1722                                                 $dot = 1;
1723                                         }
1724                                 }
1725                         }
1726
1727                         # The canonized header is a version of the header text safe to use for links
1728                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1729                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1730
1731                         # strip out HTML
1732                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1733                         $tocline = trim( $canonized_headline );
1734                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1735                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1736                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1737                         $refer[$headlineCount] = $canonized_headline;
1738
1739                         # count how many in assoc. array so we can track dupes in anchors
1740                         @$refers[$canonized_headline]++;
1741                         $refcount[$headlineCount]=$refers[$canonized_headline];
1742
1743                         # Prepend the number to the heading text
1744
1745                         if( $doNumberHeadings || $doShowToc ) {
1746                                 $tocline = $numbering . " " . $tocline;
1747
1748                                 # Don't number the heading if it is the only one (looks silly)
1749                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1750                                         # the two are different if the line contains a link
1751                                         $headline=$numbering . " " . $headline;
1752                                 }
1753                         }
1754
1755                         # Create the anchor for linking from the TOC to the section
1756                         $anchor = $canonized_headline;
1757                         if($refcount[$headlineCount] > 1 ) {
1758                                 $anchor .= "_" . $refcount[$headlineCount];
1759                         }
1760                         if( $doShowToc ) {
1761                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1762                         }
1763                         if( $showEditLink ) {
1764                                 if ( empty( $head[$headlineCount] ) ) {
1765                                         $head[$headlineCount] = "";
1766                                 }
1767                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1768                         }
1769
1770                         # Add the edit section span
1771                         if( $rightClickHack ) {
1772                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1773                         }
1774
1775                         # give headline the correct <h#> tag
1776                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1777
1778                         $headlineCount++;
1779                 }
1780
1781                 if( $doShowToc ) {
1782                         $toclines = $headlineCount;
1783                         $toc .= $sk->tocUnindent( $toclevel );
1784                         $toc = $sk->tocTable( $toc );
1785                 }
1786
1787                 # split up and insert constructed headlines
1788
1789                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1790                 $i = 0;
1791
1792                 foreach( $blocks as $block ) {
1793                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1794                             # This is the [edit] link that appears for the top block of text when
1795                                 # section editing is enabled
1796
1797                                 # Disabled because it broke block formatting
1798                                 # For example, a bullet point in the top line
1799                                 # $full .= $sk->editSectionLink(0);
1800                         }
1801                         $full .= $block;
1802                         if( $doShowToc && !$i && $isMain) {
1803                         # Top anchor now in skin
1804                                 $full = $full.$toc;
1805                         }
1806
1807                         if( !empty( $head[$i] ) ) {
1808                                 $full .= $head[$i];
1809                         }
1810                         $i++;
1811                 }
1812
1813                 return $full;
1814         }
1815
1816         /* private */ function magicISBN( $text )
1817         {
1818                 global $wgLang;
1819
1820                 $a = split( "ISBN ", " $text" );
1821                 if ( count ( $a ) < 2 ) return $text;
1822                 $text = substr( array_shift( $a ), 1);
1823                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1824
1825                 foreach ( $a as $x ) {
1826                         $isbn = $blank = "" ;
1827                         while ( " " == $x{0} ) {
1828                                 $blank .= " ";
1829                                 $x = substr( $x, 1 );
1830                         }
1831                         while ( strstr( $valid, $x{0} ) != false ) {
1832                                 $isbn .= $x{0};
1833                                 $x = substr( $x, 1 );
1834                         }
1835                         $num = str_replace( "-", "", $isbn );
1836                         $num = str_replace( " ", "", $num );
1837
1838                         if ( "" == $num ) {
1839                                 $text .= "ISBN $blank$x";
1840                         } else {
1841                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1842                                 $text .= "<a href=\"" .
1843                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1844                                         "\" class=\"internal\">ISBN $isbn</a>";
1845                                 $text .= $x;
1846                         }
1847                 }
1848                 return $text;
1849         }
1850         /* private */ function magicRFC( $text )
1851         {
1852                 global $wgLang;
1853
1854                 $a = split( "RFC ", " $text" );
1855                 if ( count ( $a ) < 2 ) return $text;
1856                 $text = substr( array_shift( $a ), 1);
1857                 $valid = "0123456789";
1858
1859                 foreach ( $a as $x ) {
1860                         $rfc = $blank = "" ;
1861                         while ( " " == $x{0} ) {
1862                                 $blank .= " ";
1863                                 $x = substr( $x, 1 );
1864                         }
1865                         while ( strstr( $valid, $x{0} ) != false ) {
1866                                 $rfc .= $x{0};
1867                                 $x = substr( $x, 1 );
1868                         }
1869
1870                         if ( "" == $rfc ) {
1871                                 $text .= "RFC $blank$x";
1872                         } else {
1873                                 $url = wfmsg( "rfcurl" );
1874                                 $url = str_replace( "$1", $rfc, $url);
1875                                 $sk =& $this->mOptions->getSkin();
1876                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1877                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1878                         }
1879                 }
1880                 return $text;
1881         }
1882
1883         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1884         {
1885                 $this->mOptions = $options;
1886                 $this->mTitle =& $title;
1887                 $this->mOutputType = OT_WIKI;
1888
1889                 if ( $clearState ) {
1890                         $this->clearState();
1891                 }
1892
1893                 $stripState = false;
1894                 $pairs = array(
1895                         "\r\n" => "\n",
1896                         );
1897                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1898                 // now with regexes
1899                 /*
1900                 $pairs = array(
1901                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1902                         "/<br *?>/i" => "<br />",
1903                 );
1904                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1905                 */
1906                 $text = $this->strip( $text, $stripState, false );
1907                 $text = $this->pstPass2( $text, $user );
1908                 $text = $this->unstrip( $text, $stripState );
1909                 return $text;
1910         }
1911
1912         /* private */ function pstPass2( $text, &$user )
1913         {
1914                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1915
1916                 # Variable replacement
1917                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1918                 $text = $this->replaceVariables( $text );
1919
1920                 # Signatures
1921                 #
1922                 $n = $user->getName();
1923                 $k = $user->getOption( "nickname" );
1924                 if ( "" == $k ) { $k = $n; }
1925                 if(isset($wgLocaltimezone)) {
1926                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1927                 }
1928                 /* Note: this is an ugly timezone hack for the European wikis */
1929                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1930                   " (" . date( "T" ) . ")";
1931                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1932
1933                 $text = preg_replace( "/~~~~~/", $d, $text );
1934                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1935                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1936                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1937                   Namespace::getUser() ) . ":$n|$k]]", $text );
1938
1939                 # Context links: [[|name]] and [[name (context)|]]
1940                 #
1941                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1942                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1943                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1944                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1945
1946                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1947                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1948                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1949                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1950                                                                                                                 # [[ns:page (cont)|]]
1951                 $context = "";
1952                 $t = $this->mTitle->getText();
1953                 if ( preg_match( $conpat, $t, $m ) ) {
1954                         $context = $m[2];
1955                 }
1956                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1957                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1958                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1959
1960                 if ( "" == $context ) {
1961                         $text = preg_replace( $p2, "[[\\1]]", $text );
1962                 } else {
1963                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1964                 }
1965
1966                 /*
1967                 $mw =& MagicWord::get( MAG_SUBST );
1968                 $wgCurParser = $this->fork();
1969                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1970                 $this->merge( $wgCurParser );
1971                 */
1972
1973                 # Trim trailing whitespace
1974                 # MAG_END (__END__) tag allows for trailing
1975                 # whitespace to be deliberately included
1976                 $text = rtrim( $text );
1977                 $mw =& MagicWord::get( MAG_END );
1978                 $mw->matchAndRemove( $text );
1979
1980                 return $text;
1981         }
1982
1983         # Set up some variables which are usually set up in parse()
1984         # so that an external function can call some class members with confidence
1985         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1986         {
1987                 $this->mTitle =& $title;
1988                 $this->mOptions = $options;
1989                 $this->mOutputType = $outputType;
1990                 if ( $clearState ) {
1991                         $this->clearState();
1992                 }
1993         }
1994
1995         function transformMsg( $text, $options ) {
1996                 global $wgTitle;
1997                 static $executing = false;
1998
1999                 # Guard against infinite recursion
2000                 if ( $executing ) {
2001                         return $text;
2002                 }
2003                 $executing = true;
2004
2005                 $this->mTitle = $wgTitle;
2006                 $this->mOptions = $options;
2007                 $this->mOutputType = OT_MSG;
2008                 $this->clearState();
2009                 $text = $this->replaceVariables( $text );
2010
2011                 $executing = false;
2012                 return $text;
2013         }
2014 }
2015
2016 class ParserOutput
2017 {
2018         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2019         var $mCacheTime; # Used in ParserCache
2020
2021         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2022                 $containsOldMagic = false )
2023         {
2024                 $this->mText = $text;
2025                 $this->mLanguageLinks = $languageLinks;
2026                 $this->mCategoryLinks = $categoryLinks;
2027                 $this->mContainsOldMagic = $containsOldMagic;
2028                 $this->mCacheTime = "";
2029         }
2030
2031         function getText() { return $this->mText; }
2032         function getLanguageLinks() { return $this->mLanguageLinks; }
2033         function getCategoryLinks() { return $this->mCategoryLinks; }
2034         function getCacheTime() { return $this->mCacheTime; }
2035         function containsOldMagic() { return $this->mContainsOldMagic; }
2036         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2037         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2038         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2039         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2040         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2041
2042         function merge( $other ) {
2043                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2044                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2045                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2046         }
2047
2048 }
2049
2050 class ParserOptions
2051 {
2052         # All variables are private
2053         var $mUseTeX;                    # Use texvc to expand <math> tags
2054         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2055         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2056         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2057         var $mAllowExternalImages;       # Allow external images inline
2058         var $mSkin;                      # Reference to the preferred skin
2059         var $mDateFormat;                # Date format index
2060         var $mEditSection;               # Create "edit section" links
2061         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2062         var $mNumberHeadings;            # Automatically number headings
2063         var $mShowToc;                   # Show table of contents
2064
2065         function getUseTeX() { return $this->mUseTeX; }
2066         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2067         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2068         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2069         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2070         function getSkin() { return $this->mSkin; }
2071         function getDateFormat() { return $this->mDateFormat; }
2072         function getEditSection() { return $this->mEditSection; }
2073         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2074         function getNumberHeadings() { return $this->mNumberHeadings; }
2075         function getShowToc() { return $this->mShowToc; }
2076
2077         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2078         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2079         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2080         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2081         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2082         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2083         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2084         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2085         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2086         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2087         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2088
2089         /* static */ function newFromUser( &$user )
2090         {
2091                 $popts = new ParserOptions;
2092                 $popts->initialiseFromUser( $user );
2093                 return $popts;
2094         }
2095
2096         function initialiseFromUser( &$userInput )
2097         {
2098                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2099
2100                 if ( !$userInput ) {
2101                         $user = new User;
2102                         $user->setLoaded( true );
2103                 } else {
2104                         $user =& $userInput;
2105                 }
2106
2107                 $this->mUseTeX = $wgUseTeX;
2108                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2109                 $this->mUseDynamicDates = $wgUseDynamicDates;
2110                 $this->mInterwikiMagic = $wgInterwikiMagic;
2111                 $this->mAllowExternalImages = $wgAllowExternalImages;
2112                 $this->mSkin =& $user->getSkin();
2113                 $this->mDateFormat = $user->getOption( "date" );
2114                 $this->mEditSection = $user->getOption( "editsection" );
2115                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2116                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2117                 $this->mShowToc = $user->getOption( "showtoc" );
2118         }
2119
2120
2121 }
2122
2123 # Regex callbacks, used in Parser::replaceVariables
2124 function wfBraceSubstitution( $matches )
2125 {
2126         global $wgCurParser;
2127         return $wgCurParser->braceSubstitution( $matches );
2128 }
2129
2130 function wfArgSubstitution( $matches )
2131 {
2132         global $wgCurParser;
2133         return $wgCurParser->argSubstitution( $matches );
2134 }
2135
2136 function wfVariableSubstitution( $matches )
2137 {
2138         global $wgCurParser;
2139         return $wgCurParser->variableSubstitution( $matches );
2140 }
2141
2142 ?>