includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 global $wgUseTidy;
  90                 $fname = "Parser::parse";
  91                 wfProfileIn( $fname );
  92
  93                 if ( $clearState ) {
  94                         $this->clearState();
  95                 }
  96
  97                 $this->mOptions = $options;
  98                 $this->mTitle =& $title;
  99                 $this->mOutputType = OT_HTML;
 100
 101                 $stripState = NULL;
 102                 $text = $this->strip( $text, $this->mStripState );
 103                 $text = $this->internalParse( $text, $linestart );
 104                 $text = $this->unstrip( $text, $this->mStripState );
 105                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 106                 if(!$wgUseTidy) {
 107                         $fixtags = array(
 108                                 # french spaces, last one Guillemet-left
 109                                 # only if there is something before the space
 110                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 111                                 # french spaces, Guillemet-right
 112                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 113                                 "/<hr *>/i" => '<hr />',
 114                                 "/<br *>/i" => '<br />',
 115                                 "/<center *>/i"=>'<div class="center">',
 116                                 "/<\\/center *>/i" => '</div>',
 117                                 # Clean up spare ampersands; note that we probably ought to be
 118                                 # more careful about named entities.
 119                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 120                         );
 121                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 122                 } else {
 123                         $fixtags = array(
 124                                 # french spaces, last one Guillemet-left
 125                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 126                                 # french spaces, Guillemet-right
 127                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 128                                 "/<center *>/i"=>'<div class="center">',
 129                                 "/<\\/center *>/i" => '</div>'
 130                         );
 131                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 132                 }
 133                 # only once and last
 134                 $text = $this->doBlockLevels( $text, $linestart );
 135                 if($wgUseTidy) {
 136                         $text = $this->tidy($text);
 137                 }
 138                 $this->mOutput->setText( $text );
 139                 wfProfileOut( $fname );
 140                 return $this->mOutput;
 141         }
 142
 143         /* static */ function getRandomString()
 144         {
 145                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 146         }
 147
 148         # Replaces all occurrences of <$tag>content</$tag> in the text
 149         # with a random marker and returns the new text. the output parameter
 150         # $content will be an associative array filled with data on the form
 151         # $unique_marker => content.
 152
 153         # If $content is already set, the additional entries will be appended
 154
 155         # If $tag is set to STRIP_COMMENTS, the function will extract
 156         # <!-- HTML comments -->
 157
 158         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 159                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 160                 if ( !$content ) {
 161                         $content = array( );
 162                 }
 163                 $n = 1;
 164                 $stripped = "";
 165
 166                 while ( "" != $text ) {
 167                         if($tag==STRIP_COMMENTS) {
 168                                 $p = preg_split( "/<!--/i", $text, 2 );
 169                         } else {
 170                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 171                         }
 172                         $stripped .= $p[0];
 173                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 174                                 $text = "";
 175                         } else {
 176                                 if($tag==STRIP_COMMENTS) {
 177                                         $q = preg_split( "/-->/i", $p[1], 2 );
 178                                 } else {
 179                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 180                                 }
 181                                 $marker = $rnd . sprintf("%08X", $n++);
 182                                 $content[$marker] = $q[0];
 183                                 $stripped .= $marker;
 184                                 $text = $q[1];
 185                         }
 186                 }
 187                 return $stripped;
 188         }
 189
 190         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 191         # If $render is set, performs necessary rendering operations on plugins
 192         # Returns the text, and fills an array with data needed in unstrip()
 193         # If the $state is already a valid strip state, it adds to the state
 194
 195         # When $stripcomments is set, HTML comments <!-- like this -->
 196         # will be stripped in addition to other tags. This is important
 197         # for section editing, where these comments cause confusion when
 198         # counting the sections in the wikisource
 199         function strip( $text, &$state, $stripcomments = false )
 200         {
 201                 $render = ($this->mOutputType == OT_HTML);
 202                 $nowiki_content = array();
 203                 $hiero_content = array();
 204                 $timeline_content = array();
 205                 $math_content = array();
 206                 $pre_content = array();
 207                 $comment_content = array();
 208
 209                 # Replace any instances of the placeholders
 210                 $uniq_prefix = UNIQ_PREFIX;
 211                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 212
 213                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 214                 foreach( $nowiki_content as $marker => $content ){
 215                         if( $render ){
 216                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 217                         } else {
 218                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 219                         }
 220                 }
 221
 222                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 223                 foreach( $hiero_content as $marker => $content ){
 224                         if( $render && $GLOBALS['wgUseWikiHiero']){
 225                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 226                         } else {
 227                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 228                         }
 229                 }
 230
 231                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 232                 foreach( $timeline_content as $marker => $content ){
 233                         if( $render && $GLOBALS['wgUseTimeline']){
 234                                 $timeline_content[$marker] = renderTimeline( $content );
 235                         } else {
 236                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 237                         }
 238                 }
 239
 240                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 241                 foreach( $math_content as $marker => $content ){
 242                         if( $render ) {
 243                                 if( $this->mOptions->getUseTeX() ) {
 244                                         $math_content[$marker] = renderMath( $content );
 245                                 } else {
 246                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 247                                 }
 248                         } else {
 249                                 $math_content[$marker] = "<math>$content</math>";
 250                         }
 251                 }
 252
 253                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 254                 foreach( $pre_content as $marker => $content ){
 255                         if( $render ){
 256                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 257                         } else {
 258                                 $pre_content[$marker] = "<pre>$content</pre>";
 259                         }
 260                 }
 261                 if($stripcomments) {
 262                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 263                         foreach( $comment_content as $marker => $content ){
 264                                 $comment_content[$marker] = "<!--$content-->";
 265                         }
 266                 }
 267
 268                 # Merge state with the pre-existing state, if there is one
 269                 if ( $state ) {
 270                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 271                         $state['hiero'] = $state['hiero'] + $hiero_content;
 272                         $state['timeline'] = $state['timeline'] + $timeline_content;
 273                         $state['math'] = $state['math'] + $math_content;
 274                         $state['pre'] = $state['pre'] + $pre_content;
 275                         $state['comment'] = $state['comment'] + $comment_content;
 276                 } else {
 277                         $state = array(
 278                           'nowiki' => $nowiki_content,
 279                           'hiero' => $hiero_content,
 280                           'timeline' => $timeline_content,
 281                           'math' => $math_content,
 282                           'pre' => $pre_content,
 283                           'comment' => $comment_content
 284                         );
 285                 }
 286                 return $text;
 287         }
 288
 289         function unstrip( $text, &$state )
 290         {
 291                 # Must expand in reverse order, otherwise nested tags will be corrupted
 292                 $contentDict = end( $state );
 293                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 294                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 295                                 $text = str_replace( key( $contentDict ), $content, $text );
 296                         }
 297                 }
 298
 299                 return $text;
 300         }
 301
 302         # Add an item to the strip state
 303         # Returns the unique tag which must be inserted into the stripped text
 304         # The tag will be replaced with the original text in unstrip()
 305
 306         function insertStripItem( $text, &$state )
 307         {
 308                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 309                 if ( !$state ) {
 310                         $state = array(
 311                           'nowiki' => array(),
 312                           'hiero' => array(),
 313                           'math' => array(),
 314                           'pre' => array()
 315                         );
 316                 }
 317                 $state['item'][$rnd] = $text;
 318                 return $rnd;
 319         }
 320
 321         # This method generates the list of subcategories and pages for a category
 322         function categoryMagic ()
 323         {
 324                 global $wgLang , $wgUser ;
 325                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 326
 327                 $cns = Namespace::getCategory() ;
 328                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 329
 330                 $r = "<br style=\"clear:both;\"/>\n";
 331
 332
 333                 $sk =& $wgUser->getSkin() ;
 334
 335                 $articles = array() ;
 336                 $children = array() ;
 337                 $data = array () ;
 338                 $id = $this->mTitle->getArticleID() ;
 339
 340                 # FIXME: add limits
 341                 $t = wfStrencode( $this->mTitle->getDBKey() );
 342                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 343                 $res = wfQuery ( $sql, DB_READ ) ;
 344                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 345
 346                 # For all pages that link to this category
 347                 foreach ( $data AS $x )
 348                 {
 349                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 350                         if ( $t != "" ) $t .= ":" ;
 351                         $t .= $x->cur_title ;
 352
 353                         if ( $x->cur_namespace == $cns ) {
 354                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 355                         } else {
 356                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 357                         }
 358                 }
 359                 wfFreeResult ( $res ) ;
 360
 361                 # Showing subcategories
 362                 if ( count ( $children ) > 0 ) {
 363                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 364                         $r .= implode ( ", " , $children ) ;
 365                 }
 366
 367                 # Showing pages in this category
 368                 if ( count ( $articles ) > 0 ) {
 369                         $ti = $this->mTitle->getText() ;
 370                         $h =  wfMsg( "category_header", $ti );
 371                         $r .= "<h2>{$h}</h2>\n" ;
 372                         $r .= implode ( ", " , $articles ) ;
 373                 }
 374
 375
 376                 return $r ;
 377         }
 378
 379         function getHTMLattrs ()
 380         {
 381                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 382                                 "title", "align", "lang", "dir", "width", "height",
 383                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 384                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 385                                 /* FONT */ "type", "start", "value", "compact",
 386                                 /* For various lists, mostly deprecated but safe */
 387                                 "summary", "width", "border", "frame", "rules",
 388                                 "cellspacing", "cellpadding", "valign", "char",
 389                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 390                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 391                                 "id", "class", "name", "style" /* For CSS */
 392                                 );
 393                 return $htmlattrs ;
 394         }
 395
 396         function fixTagAttributes ( $t )
 397         {
 398                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 399                 $htmlattrs = $this->getHTMLattrs() ;
 400
 401                 # Strip non-approved attributes from the tag
 402                 $t = preg_replace(
 403                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 404                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 405                         $t);
 406                 # Strip javascript "expression" from stylesheets. Brute force approach:
 407                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 408
 409                 if( preg_match(
 410                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 411                         wfMungeToUtf8( $t ) ) )
 412                 {
 413                         $t="";
 414                 }
 415
 416                 return trim ( $t ) ;
 417         }
 418
 419         /* interface with html tidy, used if $wgUseTidy = true */
 420         function tidy ( $text ) {
 421                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 422                 global $wgInputEncoding, $wgOutputEncoding;
 423                 $fname = "Parser::tidy";
 424                 wfProfileIn( $fname );
 425
 426                 $cleansource = '';
 427                 switch(strtoupper($wgOutputEncoding)) {
 428                         case 'ISO-8859-1':
 429                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 430                                 break;
 431                         case 'UTF-8':
 432                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 433                                 break;
 434                         default:
 435                                 $wgTidyOpts .= ' -raw';
 436                         }
 437
 438                 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 439 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 440 '<head><title>test</title></head><body>'.$text.'</body></html>';
 441                 $descriptorspec = array(
 442                         0 => array("pipe", "r"),
 443                         1 => array("pipe", "w"),
 444                         2 => array("file", "/dev/null", "a")
 445                 );
 446                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 447                 if (is_resource($process)) {
 448                         fwrite($pipes[0], $text);
 449                         fclose($pipes[0]);
 450                         while (!feof($pipes[1])) {
 451                                 $cleansource .= fgets($pipes[1], 1024);
 452                         }
 453                         fclose($pipes[1]);
 454                         $return_value = proc_close($process);
 455                 }
 456
 457                 wfProfileOut( $fname );
 458
 459                 if( $cleansource == '' && $text != '') {
 460                         wfDebug( "Tidy error detected!\n" );
 461                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 462                 } else {
 463                         return $cleansource;
 464                 }
 465         }
 466
 467         function doTableStuff ( $t )
 468         {
 469                 $t = explode ( "\n" , $t ) ;
 470                 $td = array () ; # Is currently a td tag open?
 471                         $ltd = array () ; # Was it TD or TH?
 472                         $tr = array () ; # Is currently a tr tag open?
 473                         $ltr = array () ; # tr attributes
 474                         foreach ( $t AS $k => $x )
 475                         {
 476                                 $x = trim ( $x ) ;
 477                                 $fc = substr ( $x , 0 , 1 ) ;
 478                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 479                                 {
 480                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 481                                         array_push ( $td , false ) ;
 482                                         array_push ( $ltd , "" ) ;
 483                                         array_push ( $tr , false ) ;
 484                                         array_push ( $ltr , "" ) ;
 485                                 }
 486                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 487                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 488                                 {
 489                                         $z = "</table>\n" ;
 490                                         $l = array_pop ( $ltd ) ;
 491                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 492                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 493                                         array_pop ( $ltr ) ;
 494                                         $t[$k] = $z ;
 495                                 }
 496                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 497                                                 {
 498                                                 $z = trim ( substr ( $x , 2 ) ) ;
 499                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 500                                                 }*/
 501                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 502                                 {
 503                                         $x = substr ( $x , 1 ) ;
 504                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 505                                         $z = "" ;
 506                                         $l = array_pop ( $ltd ) ;
 507                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 508                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 509                                         array_pop ( $ltr ) ;
 510                                         $t[$k] = $z ;
 511                                         array_push ( $tr , false ) ;
 512                                         array_push ( $td , false ) ;
 513                                         array_push ( $ltd , "" ) ;
 514                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 515                                 }
 516                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 517                                 {
 518                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 519                                         {
 520                                                 $fc = "+" ;
 521                                                 $x = substr ( $x , 1 ) ;
 522                                         }
 523                                         $after = substr ( $x , 1 ) ;
 524                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 525                                         $after = explode ( "||" , $after ) ;
 526                                         $t[$k] = "" ;
 527                                         foreach ( $after AS $theline )
 528                                         {
 529                                                 $z = "" ;
 530                                                 if ( $fc != "+" )
 531                                                 {
 532                                                         $tra = array_pop ( $ltr ) ;
 533                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 534                                                         array_push ( $tr , true ) ;
 535                                                         array_push ( $ltr , "" ) ;
 536                                                 }
 537
 538                                                 $l = array_pop ( $ltd ) ;
 539                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 540                                                 if ( $fc == "|" ) $l = "td" ;
 541                                                 else if ( $fc == "!" ) $l = "th" ;
 542                                                 else if ( $fc == "+" ) $l = "caption" ;
 543                                                 else $l = "" ;
 544                                                 array_push ( $ltd , $l ) ;
 545                                                 $y = explode ( "|" , $theline , 2 ) ;
 546                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 547                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 548                                                 $t[$k] .= $y ;
 549                                                 array_push ( $td , true ) ;
 550                                         }
 551                                 }
 552                         }
 553
 554                 # Closing open td, tr && table
 555                 while ( count ( $td ) > 0 )
 556                 {
 557                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 558                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 559                         $t[] = "</table>" ;
 560                 }
 561
 562                 $t = implode ( "\n" , $t ) ;
 563                 #               $t = $this->removeHTMLtags( $t );
 564                 return $t ;
 565         }
 566
 567         # Parses the text and adds the result to the strip state
 568         # Returns the strip tag
 569         function stripParse( $text, $linestart, $args )
 570         {
 571                 $text = $this->strip( $text, $this->mStripState );
 572                 $text = $this->internalParse( $text, $linestart, $args, false );
 573                 if( $linestart ) {
 574                         $text = "\n" . $text;
 575                 }
 576                 return $this->insertStripItem( $text, $this->mStripState );
 577         }
 578
 579         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 580         {
 581                 $fname = "Parser::internalParse";
 582                 wfProfileIn( $fname );
 583
 584                 $text = $this->removeHTMLtags( $text );
 585                 $text = $this->replaceVariables( $text, $args );
 586
 587                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 588
 589                 $text = $this->doHeadings( $text );
 590                 if($this->mOptions->getUseDynamicDates()) {
 591                         global $wgDateFormatter;
 592                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 593                 }
 594                 $text = $this->doAllQuotes( $text );
 595                 $text = $this->replaceExternalLinks( $text );
 596                 $text = $this->replaceInternalLinks ( $text );
 597                 $text = $this->replaceInternalLinks ( $text );
 598                 //$text = $this->doTokenizedParser ( $text );
 599                 $text = $this->doTableStuff ( $text ) ;
 600                 $text = $this->magicISBN( $text );
 601                 $text = $this->magicRFC( $text );
 602                 $text = $this->formatHeadings( $text, $isMain );
 603                 $sk =& $this->mOptions->getSkin();
 604                 $text = $sk->transformContent( $text );
 605
 606                 if ( !isset ( $this->categoryMagicDone ) ) {
 607                         $text .= $this->categoryMagic () ;
 608                         $this->categoryMagicDone = true ;
 609                 }
 610
 611                 wfProfileOut( $fname );
 612                 return $text;
 613         }
 614
 615
 616         /* private */ function doHeadings( $text )
 617         {
 618                 for ( $i = 6; $i >= 1; --$i ) {
 619                         $h = substr( "======", 0, $i );
 620                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 621                           "<h{$i}>\\1</h{$i}>\\2", $text );
 622                 }
 623                 return $text;
 624         }
 625
 626         /* private */ function doAllQuotes( $text )
 627         {
 628                 $outtext = "";
 629                 $lines = explode( "\n", $text );
 630                 foreach ( $lines as $line ) {
 631                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 632                 }
 633                 return substr($outtext, 0,-1);
 634         }
 635
 636         /* private */ function doQuotes( $pre, $text, $mode )
 637         {
 638                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 639                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 640                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 641                         if ( substr ($m[2], 0, 1) == "'" ) {
 642                                 $m[2] = substr ($m[2], 1);
 643                                 if ($mode == "em") {
 644                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 645                                 } else if ($mode == "strong") {
 646                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 647                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 648                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 649                                 } else if ($mode == "strongem") {
 650                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 651                                 } else {
 652                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 653                                 }
 654                         } else {
 655                                 if ($mode == "strong") {
 656                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 657                                 } else if ($mode == "em") {
 658                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 659                                 } else if ($mode == "emstrong") {
 660                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 661                                 } else if (($mode == "strongem") || ($mode == "both")) {
 662                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 663                                 } else {
 664                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 665                                 }
 666                         }
 667                 } else {
 668                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 669                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 670                         if ($mode == "") {
 671                                 return $pre . $text;
 672                         } else if ($mode == "em") {
 673                                 return $pre . $text_em;
 674                         } else if ($mode == "strong") {
 675                                 return $pre . $text_strong;
 676                         } else if ($mode == "strongem") {
 677                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 678                         } else {
 679                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 680                         }
 681                 }
 682         }
 683
 684         # Note: we have to do external links before the internal ones,
 685         # and otherwise take great care in the order of things here, so
 686         # that we don't end up interpreting some URLs twice.
 687
 688         /* private */ function replaceExternalLinks( $text )
 689         {
 690                 $fname = "Parser::replaceExternalLinks";
 691                 wfProfileIn( $fname );
 692                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 693                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 694                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 695                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 696                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 697                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 698                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 699                 wfProfileOut( $fname );
 700                 return $text;
 701         }
 702
 703         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 704         {
 705                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 706                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 707
 708                 # this is  the list of separators that should be ignored if they
 709                 # are the last character of an URL but that should be included
 710                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 711                 # in this case, the last comma should not become part of the URL,
 712                 # but in "www.foo.com/123,2342,32.htm" it should.
 713                 $sep = ",;\.:";
 714                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 715                 $images = "gif|png|jpg|jpeg";
 716
 717                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 718                 # they are interpreted as part of the string (used to tell PHP
 719                 # that the content of the string should be inserted there).
 720                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 721                   "((?i){$images})([^{$uc}]|$)/";
 722
 723                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 724                 $sk =& $this->mOptions->getSkin();
 725
 726                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 727                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 728                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 729                 }
 730                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 731                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 732                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 733                   "</a>\\5", $s );
 734                 $s = str_replace( $unique, $protocol, $s );
 735
 736                 $a = explode( "[{$protocol}:", " " . $s );
 737                 $s = array_shift( $a );
 738                 $s = substr( $s, 1 );
 739
 740                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 741                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 742
 743                 foreach ( $a as $line ) {
 744                         if ( preg_match( $e1, $line, $m ) ) {
 745                                 $link = "{$protocol}:{$m[1]}";
 746                                 $trail = $m[2];
 747                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 748                                 else { $text = wfEscapeHTML( $link ); }
 749                         } else if ( preg_match( $e2, $line, $m ) ) {
 750                                 $link = "{$protocol}:{$m[1]}";
 751                                 $text = $m[2];
 752                                 $trail = $m[3];
 753                         } else {
 754                                 $s .= "[{$protocol}:" . $line;
 755                                 continue;
 756                         }
 757                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 758                                 $paren = "";
 759                         } else {
 760                                 # Expand the URL for printable version
 761                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 762                         }
 763                         $la = $sk->getExternalLinkAttributes( $link, $text );
 764                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 765
 766                 }
 767                 return $s;
 768         }
 769
 770
 771         /* private */ function replaceInternalLinks( $s )
 772         {
 773                 global $wgLang, $wgLinkCache;
 774                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 775                 static $fname = "Parser::replaceInternalLink" ;
 776                 wfProfileIn( $fname );
 777
 778                 wfProfileIn( "$fname-setup" );
 779                 static $tc = FALSE;
 780                 # the % is needed to support urlencoded titles as well
 781                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 782                 $sk =& $this->mOptions->getSkin();
 783
 784                 $a = explode( "[[", " " . $s );
 785                 $s = array_shift( $a );
 786                 $s = substr( $s, 1 );
 787
 788                 # Match a link having the form [[namespace:link|alternate]]trail
 789                 static $e1 = FALSE;
 790                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 791                 # Match the end of a line for a word that's not followed by whitespace,
 792                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 793                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 794                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 795                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 796
 797
 798                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 799                 static $image = FALSE;
 800                 static $special = FALSE;
 801                 static $media = FALSE;
 802                 static $category = FALSE;
 803                 if ( !$image ) { $image = Namespace::getImage(); }
 804                 if ( !$special ) { $special = Namespace::getSpecial(); }
 805                 if ( !$media ) { $media = Namespace::getMedia(); }
 806                 if ( !$category ) { $category = Namespace::getCategory(); }
 807
 808                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 809
 810                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 811                         $new_prefix = $m[2];
 812                         $s = $m[1];
 813                 } else {
 814                         $new_prefix="";
 815                 }
 816
 817                 wfProfileOut( "$fname-setup" );
 818
 819                 foreach ( $a as $line ) {
 820                         $prefix = $new_prefix;
 821
 822                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 823                                 $text = $m[2];
 824                                 # fix up urlencoded title texts
 825                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 826                                 $trail = $m[3];
 827                         } else { # Invalid form; output directly
 828                                 $s .= $prefix . "[[" . $line ;
 829                                 wfProfileOut( $fname );
 830                                 continue;
 831                         }
 832
 833                         /* Valid link forms:
 834                         Foobar -- normal
 835                         :Foobar -- override special treatment of prefix (images, language links)
 836                         /Foobar -- convert to CurrentPage/Foobar
 837                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 838                         */
 839                         $c = substr($m[1],0,1);
 840                         $noforce = ($c != ":");
 841                         if( $c == "/" ) { # subpage
 842                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 843                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 844                                         $noslash=$m[1];
 845                                 } else {
 846                                         $noslash=substr($m[1],1);
 847                                 }
 848                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 849                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 850                                         if( "" == $text ) {
 851                                                 $text= $m[1];
 852                                         } # this might be changed for ugliness reasons
 853                                 } else {
 854                                         $link = $noslash; # no subpage allowed, use standard link
 855                                 }
 856                         } elseif( $noforce ) { # no subpage
 857                                 $link = $m[1];
 858                         } else {
 859                                 $link = substr( $m[1], 1 );
 860                         }
 861                         $wasblank = ( "" == $text );
 862                         if( $wasblank )
 863                         $text = $link;
 864
 865                         $nt = Title::newFromText( $link );
 866                         if( !$nt ) {
 867                                 $s .= $prefix . "[[" . $line;
 868                                 wfProfileOut( $fname );
 869                                 continue;
 870                         }
 871                         $ns = $nt->getNamespace();
 872                         $iw = $nt->getInterWiki();
 873                         if( $noforce ) {
 874                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 875                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 876                                         $tmp = $prefix . $trail ;
 877                                         wfProfileOut( $fname );
 878                                         $s .= (trim($tmp) == '')? '': $tmp;
 879                                         continue;
 880                                 }
 881                                 if ( $ns == $image ) {
 882                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 883                                         $wgLinkCache->addImageLinkObj( $nt );
 884                                         wfProfileOut( $fname );
 885                                         continue;
 886                                 }
 887                                 if ( $ns == $category ) {
 888                                         $t = $nt->getText() ;
 889                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 890
 891                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 892                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 893                                         $wgLinkCache->resume();
 894
 895                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 896                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 897                                         $this->mOutput->mCategoryLinks[] = $t ;
 898                                         $s .= $prefix . $trail ;
 899                                         wfProfileOut( $fname );
 900                                         continue;
 901                                 }
 902                         }
 903                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 904                         ( strpos( $link, "#" ) == FALSE ) ) {
 905                                 # Self-links are handled specially; generally de-link and change to bold.
 906                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 907                                 wfProfileOut( $fname );
 908                                 continue;
 909                         }
 910
 911                         if( $ns == $media ) {
 912                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 913                                 $wgLinkCache->addImageLinkObj( $nt );
 914                                 wfProfileOut( $fname );
 915                                 continue;
 916                         } elseif( $ns == $special ) {
 917                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 918                                 wfProfileOut( $fname );
 919                                 continue;
 920                         }
 921                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 922                 }
 923                 wfProfileOut( $fname );
 924                 return $s;
 925         }
 926
 927         # Some functions here used by doBlockLevels()
 928         #
 929         /* private */ function closeParagraph()
 930         {
 931                 $result = "";
 932                 if ( '' != $this->mLastSection ) {
 933                         $result = "</" . $this->mLastSection  . ">\n";
 934                 }
 935                 $this->mInPre = false;
 936                 $this->mLastSection = "";
 937                 return $result;
 938         }
 939         # getCommon() returns the length of the longest common substring
 940         # of both arguments, starting at the beginning of both.
 941         #
 942         /* private */ function getCommon( $st1, $st2 )
 943         {
 944                 $fl = strlen( $st1 );
 945                 $shorter = strlen( $st2 );
 946                 if ( $fl < $shorter ) { $shorter = $fl; }
 947
 948                 for ( $i = 0; $i < $shorter; ++$i ) {
 949                         if ( $st1{$i} != $st2{$i} ) { break; }
 950                 }
 951                 return $i;
 952         }
 953         # These next three functions open, continue, and close the list
 954         # element appropriate to the prefix character passed into them.
 955         #
 956         /* private */ function openList( $char )
 957     {
 958                 $result = $this->closeParagraph();
 959
 960                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 961                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 962                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 963                 else if ( ";" == $char ) {
 964                         $result .= "<dl><dt>";
 965                         $this->mDTopen = true;
 966                 }
 967                 else { $result = "<!-- ERR 1 -->"; }
 968
 969                 return $result;
 970         }
 971
 972         /* private */ function nextItem( $char )
 973         {
 974                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 975                 else if ( ":" == $char || ";" == $char ) {
 976                         $close = "</dd>";
 977                         if ( $this->mDTopen ) { $close = "</dt>"; }
 978                         if ( ";" == $char ) {
 979                                 $this->mDTopen = true;
 980                                 return $close . "<dt>";
 981                         } else {
 982                                 $this->mDTopen = false;
 983                                 return $close . "<dd>";
 984                         }
 985                 }
 986                 return "<!-- ERR 2 -->";
 987         }
 988
 989         /* private */function closeList( $char )
 990         {
 991                 if ( "*" == $char ) { $text = "</li></ul>"; }
 992                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 993                 else if ( ":" == $char ) {
 994                         if ( $this->mDTopen ) {
 995                                 $this->mDTopen = false;
 996                                 $text = "</dt></dl>";
 997                         } else {
 998                                 $text = "</dd></dl>";
 999                         }
1000                 }
1001                 else {  return "<!-- ERR 3 -->"; }
1002                 return $text."\n";
1003         }
1004
1005         /* private */ function doBlockLevels( $text, $linestart ) {
1006                 $fname = "Parser::doBlockLevels";
1007                 wfProfileIn( $fname );
1008
1009                 # Parsing through the text line by line.  The main thing
1010                 # happening here is handling of block-level elements p, pre,
1011                 # and making lists from lines starting with * # : etc.
1012                 #
1013                 $textLines = explode( "\n", $text );
1014
1015                 $lastPrefix = $output = $lastLine = '';
1016                 $this->mDTopen = $inBlockElem = false;
1017                 $prefixLength = 0;
1018                 $paragraphStack = false;
1019
1020                 if ( !$linestart ) {
1021                         $output .= array_shift( $textLines );
1022                 }
1023                 foreach ( $textLines as $oLine ) {
1024                         $lastPrefixLength = strlen( $lastPrefix );
1025                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1026                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1027                         if (!$this->mInPre) {
1028                                 $this->mInPre = !empty($preOpenMatch);
1029                         }
1030                         if ( !$this->mInPre ) {
1031                                 # Multiple prefixes may abut each other for nested lists.
1032                                 $prefixLength = strspn( $oLine, "*#:;" );
1033                                 $pref = substr( $oLine, 0, $prefixLength );
1034
1035                                 # eh?
1036                                 $pref2 = str_replace( ";", ":", $pref );
1037                                 $t = substr( $oLine, $prefixLength );
1038                         } else {
1039                                 # Don't interpret any other prefixes in preformatted text
1040                                 $prefixLength = 0;
1041                                 $pref = $pref2 = '';
1042                                 $t = $oLine;
1043                         }
1044
1045                         # List generation
1046                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1047                                 # Same as the last item, so no need to deal with nesting or opening stuff
1048                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1049                                 $paragraphStack = false;
1050
1051                                 if ( ";" == substr( $pref, -1 ) ) {
1052                                         # The one nasty exception: definition lists work like this:
1053                                         # ; title : definition text
1054                                         # So we check for : in the remainder text to split up the
1055                                         # title and definition, without b0rking links.
1056                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1057                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1058                                                 $term = $match[1];
1059                                                 $output .= $term . $this->nextItem( ":" );
1060                                                 $t = $match[2];
1061                                         }
1062                                 }
1063                         } elseif( $prefixLength || $lastPrefixLength ) {
1064                                 # Either open or close a level...
1065                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1066                                 $paragraphStack = false;
1067
1068                                 while( $commonPrefixLength < $lastPrefixLength ) {
1069                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1070                                         --$lastPrefixLength;
1071                                 }
1072                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1073                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1074                                 }
1075                                 while ( $prefixLength > $commonPrefixLength ) {
1076                                         $char = substr( $pref, $commonPrefixLength, 1 );
1077                                         $output .= $this->openList( $char );
1078
1079                                         if ( ";" == $char ) {
1080                                                 # FIXME: This is dupe of code above
1081                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1082                                                         $term = $match[1];
1083                                                         $output .= $term . $this->nextItem( ":" );
1084                                                         $t = $match[2];
1085                                                 }
1086                                         }
1087                                         ++$commonPrefixLength;
1088                                 }
1089                                 $lastPrefix = $pref2;
1090                         }
1091                         if( 0 == $prefixLength ) {
1092                                 # No prefix (not in list)--go to paragraph mode
1093                                 $uniq_prefix = UNIQ_PREFIX;
1094                                 // XXX: use a stack for nestable elements like span, table and div
1095                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1096                                 $closematch = preg_match(
1097                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1098                                         "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1099                                 if ( $openmatch or $closematch ) {
1100                                         $paragraphStack = false;
1101                                         $output .= $this->closeParagraph();
1102                                         if($preOpenMatch and !$preCloseMatch) {
1103                                                 $this->mInPre = true;
1104                                         }
1105                                         if ( $closematch  ) {
1106                                                 $inBlockElem = false;
1107                                         } else {
1108                                                 $inBlockElem = true;
1109                                         }
1110                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1111                                         if ( " " == $t{0} and trim($t) != '' ) {
1112                                                 // pre
1113                                                 if ($this->mLastSection != 'pre') {
1114                                                         $paragraphStack = false;
1115                                                         $output .= $this->closeParagraph().'<pre>';
1116                                                         $this->mLastSection = 'pre';
1117                                                 }
1118                                         } else {
1119                                                 // paragraph
1120                                                 if ( '' == trim($t) ) {
1121                                                         if ( $paragraphStack ) {
1122                                                                 $output .= $paragraphStack.'<br />';
1123                                                                 $paragraphStack = false;
1124                                                                 $this->mLastSection = 'p';
1125                                                         } else {
1126                                                                 if ($this->mLastSection != 'p' ) {
1127                                                                         $output .= $this->closeParagraph();
1128                                                                         $this->mLastSection = '';
1129                                                                         $paragraphStack = "<p>";
1130                                                                 } else {
1131                                                                         $paragraphStack = '</p><p>';
1132                                                                 }
1133                                                         }
1134                                                 } else {
1135                                                         if ( $paragraphStack ) {
1136                                                                 $output .= $paragraphStack;
1137                                                                 $paragraphStack = false;
1138                                                                 $this->mLastSection = 'p';
1139                                                         } else if ($this->mLastSection != 'p') {
1140                                                                 $output .= $this->closeParagraph().'<p>';
1141                                                                 $this->mLastSection = 'p';
1142                                                         }
1143                                                 }
1144                                         }
1145                                 }
1146                         }
1147                         if ($paragraphStack === false) {
1148                                 $output .= $t."\n";
1149                         }
1150                 }
1151                 while ( $prefixLength ) {
1152                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1153                         --$prefixLength;
1154                 }
1155                 if ( "" != $this->mLastSection ) {
1156                         $output .= "</" . $this->mLastSection . ">";
1157                         $this->mLastSection = "";
1158                 }
1159
1160                 wfProfileOut( $fname );
1161                 return $output;
1162         }
1163
1164         function getVariableValue( $index ) {
1165                 global $wgLang, $wgSitename, $wgServer;
1166
1167                 switch ( $index ) {
1168                         case MAG_CURRENTMONTH:
1169                                 return date( "m" );
1170                         case MAG_CURRENTMONTHNAME:
1171                                 return $wgLang->getMonthName( date("n") );
1172                         case MAG_CURRENTMONTHNAMEGEN:
1173                                 return $wgLang->getMonthNameGen( date("n") );
1174                         case MAG_CURRENTDAY:
1175                                 return date("j");
1176                         case MAG_PAGENAME:
1177                                 return $this->mTitle->getText();
1178                         case MAG_NAMESPACE:
1179                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1180                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1181                         case MAG_CURRENTDAYNAME:
1182                                 return $wgLang->getWeekdayName( date("w")+1 );
1183                         case MAG_CURRENTYEAR:
1184                                 return date( "Y" );
1185                         case MAG_CURRENTTIME:
1186                                 return $wgLang->time( wfTimestampNow(), false );
1187                         case MAG_NUMBEROFARTICLES:
1188                                 return wfNumberOfArticles();
1189                         case MAG_SITENAME:
1190                                 return $wgSitename;
1191                         case MAG_SERVER:
1192                                 return $wgServer;
1193                         default:
1194                                 return NULL;
1195                 }
1196         }
1197
1198         function initialiseVariables()
1199         {
1200                 global $wgVariableIDs;
1201                 $this->mVariables = array();
1202                 foreach ( $wgVariableIDs as $id ) {
1203                         $mw =& MagicWord::get( $id );
1204                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1205                 }
1206         }
1207
1208         /* private */ function replaceVariables( $text, $args = array() )
1209         {
1210                 global $wgLang, $wgScript, $wgArticlePath;
1211
1212                 $fname = "Parser::replaceVariables";
1213                 wfProfileIn( $fname );
1214
1215                 $bail = false;
1216                 if ( !$this->mVariables ) {
1217                         $this->initialiseVariables();
1218                 }
1219                 $titleChars = Title::legalChars();
1220                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1221
1222                 # This function is called recursively. To keep track of arguments we need a stack:
1223                 array_push( $this->mArgStack, $args );
1224
1225                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1226                 $GLOBALS['wgCurParser'] =& $this;
1227
1228
1229                 if ( $this->mOutputType == OT_HTML ) {
1230                         # Variable substitution
1231                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1232
1233                         # Argument substitution
1234                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1235                 }
1236                 # Template substitution
1237                 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1238                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1239
1240                 array_pop( $this->mArgStack );
1241
1242                 wfProfileOut( $fname );
1243                 return $text;
1244         }
1245
1246         function variableSubstitution( $matches )
1247         {
1248                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1249                         $text = $this->mVariables[$matches[1]];
1250                         $this->mOutput->mContainsOldMagic = true;
1251                 } else {
1252                         $text = $matches[0];
1253                 }
1254                 return $text;
1255         }
1256
1257         function braceSubstitution( $matches )
1258         {
1259                 global $wgLinkCache, $wgLang;
1260                 $fname = "Parser::braceSubstitution";
1261                 $found = false;
1262                 $nowiki = false;
1263                 $noparse = false;
1264
1265                 $title = NULL;
1266
1267                 # $newline is an optional newline character before the braces
1268                 # $part1 is the bit before the first |, and must contain only title characters
1269                 # $args is a list of arguments, starting from index 0, not including $part1
1270
1271                 $newline = $matches[1];
1272                 $part1 = $matches[2];
1273                 # If the third subpattern matched anything, it will start with |
1274                 if ( $matches[3] !== "" ) {
1275                         $args = explode( "|", substr( $matches[3], 1 ) );
1276                 } else {
1277                         $args = array();
1278                 }
1279                 $argc = count( $args );
1280
1281                 # {{{}}}
1282                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1283                         $text = $matches[0];
1284                         $found = true;
1285                         $noparse = true;
1286                 }
1287
1288                 # SUBST
1289                 if ( !$found ) {
1290                         $mwSubst =& MagicWord::get( MAG_SUBST );
1291                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1292                                 if ( $this->mOutputType != OT_WIKI ) {
1293                                         # Invalid SUBST not replaced at PST time
1294                                         # Return without further processing
1295                                         $text = $matches[0];
1296                                         $found = true;
1297                                         $noparse= true;
1298                                 }
1299                         } elseif ( $this->mOutputType == OT_WIKI ) {
1300                                 # SUBST not found in PST pass, do nothing
1301                                 $text = $matches[0];
1302                                 $found = true;
1303                         }
1304                 }
1305
1306                 # MSG, MSGNW and INT
1307                 if ( !$found ) {
1308                         # Check for MSGNW:
1309                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1310                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1311                                 $nowiki = true;
1312                         } else {
1313                                 # Remove obsolete MSG:
1314                                 $mwMsg =& MagicWord::get( MAG_MSG );
1315                                 $mwMsg->matchStartAndRemove( $part1 );
1316                         }
1317
1318                         # Check if it is an internal message
1319                         $mwInt =& MagicWord::get( MAG_INT );
1320                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1321                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1322                                         $text = wfMsgReal( $part1, $args, true );
1323                                         $found = true;
1324                                 }
1325                         }
1326                 }
1327
1328                 # NS
1329                 if ( !$found ) {
1330                         # Check for NS: (namespace expansion)
1331                         $mwNs = MagicWord::get( MAG_NS );
1332                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1333                                 if ( intval( $part1 ) ) {
1334                                         $text = $wgLang->getNsText( intval( $part1 ) );
1335                                         $found = true;
1336                                 } else {
1337                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1338                                         if ( !is_null( $index ) ) {
1339                                                 $text = $wgLang->getNsText( $index );
1340                                                 $found = true;
1341                                         }
1342                                 }
1343                         }
1344                 }
1345
1346                 # LOCALURL and LOCALURLE
1347                 if ( !$found ) {
1348                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1349                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1350
1351                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1352                                 $func = 'getLocalURL';
1353                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1354                                 $func = 'escapeLocalURL';
1355                         } else {
1356                                 $func = '';
1357                         }
1358
1359                         if ( $func !== '' ) {
1360                                 $title = Title::newFromText( $part1 );
1361                                 if ( !is_null( $title ) ) {
1362                                         if ( $argc > 0 ) {
1363                                                 $text = $title->$func( $args[0] );
1364                                         } else {
1365                                                 $text = $title->$func();
1366                                         }
1367                                         $found = true;
1368                                 }
1369                         }
1370                 }
1371
1372                 # Internal variables
1373                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1374                         $text = $this->mVariables[$part1];
1375                         $found = true;
1376                         $this->mOutput->mContainsOldMagic = true;
1377                 }
1378 /*
1379                 # Arguments input from the caller
1380                 $inputArgs = end( $this->mArgStack );
1381                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1382                         $text = $inputArgs[$part1];
1383                         $found = true;
1384                 }
1385 */
1386                 # Load from database
1387                 if ( !$found ) {
1388                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1389                         if ( !is_null( $title ) && !$title->isExternal() ) {
1390                                 # Check for excessive inclusion
1391                                 $dbk = $title->getPrefixedDBkey();
1392                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1393                                         $article = new Article( $title );
1394                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1395                                         if ( $articleContent !== false ) {
1396                                                 $found = true;
1397                                                 $text = $articleContent;
1398
1399                                         }
1400                                 }
1401
1402                                 # If the title is valid but undisplayable, make a link to it
1403                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1404                                         $text = "[[" . $title->getPrefixedText() . "]]";
1405                                         $found = true;
1406                                 }
1407                         }
1408                 }
1409
1410                 # Recursive parsing, escaping and link table handling
1411                 # Only for HTML output
1412                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1413                         $text = wfEscapeWikiText( $text );
1414                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1415                         # Clean up argument array
1416                         $assocArgs = array();
1417                         $index = 1;
1418                         foreach( $args as $arg ) {
1419                                 $eqpos = strpos( $arg, "=" );
1420                                 if ( $eqpos === false ) {
1421                                         $assocArgs[$index++] = $arg;
1422                                 } else {
1423                                         $name = trim( substr( $arg, 0, $eqpos ) );
1424                                         $value = trim( substr( $arg, $eqpos+1 ) );
1425                                         if ( $value === false ) {
1426                                                 $value = "";
1427                                         }
1428                                         if ( $name !== false ) {
1429                                                 $assocArgs[$name] = $value;
1430                                         }
1431                                 }
1432                         }
1433
1434                         # Do not enter included links in link table
1435                         if ( !is_null( $title ) ) {
1436                                 $wgLinkCache->suspend();
1437                         }
1438
1439                         # Run full parser on the included text
1440                         $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1441
1442                         # Resume the link cache and register the inclusion as a link
1443                         if ( !is_null( $title ) ) {
1444                                 $wgLinkCache->resume();
1445                                 $wgLinkCache->addLinkObj( $title );
1446                         }
1447                 }
1448
1449                 if ( !$found ) {
1450                         return $matches[0];
1451                 } else {
1452                         return $text;
1453                 }
1454         }
1455
1456         # Triple brace replacement -- used for template arguments
1457         function argSubstitution( $matches )
1458         {
1459                 $newline = $matches[1];
1460                 $arg = trim( $matches[2] );
1461                 $text = $matches[0];
1462                 $inputArgs = end( $this->mArgStack );
1463
1464                 if ( array_key_exists( $arg, $inputArgs ) ) {
1465                         $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1466                 }
1467
1468                 return $text;
1469         }
1470
1471         # Returns true if the function is allowed to include this entity
1472         function incrementIncludeCount( $dbk )
1473         {
1474                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1475                         $this->mIncludeCount[$dbk] = 0;
1476                 }
1477                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1478                         return true;
1479                 } else {
1480                         return false;
1481                 }
1482         }
1483
1484
1485         # Cleans up HTML, removes dangerous tags and attributes
1486         /* private */ function removeHTMLtags( $text )
1487         {
1488                 global $wgUseTidy, $wgUserHtml;
1489                 $fname = "Parser::removeHTMLtags";
1490                 wfProfileIn( $fname );
1491
1492                 if( $wgUserHtml ) {
1493                         $htmlpairs = array( # Tags that must be closed
1494                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1495                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1496                                 "strike", "strong", "tt", "var", "div", "center",
1497                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1498                                 "ruby", "rt" , "rb" , "rp", "p"
1499                         );
1500                         $htmlsingle = array(
1501                                 "br", "hr", "li", "dt", "dd"
1502                         );
1503                         $htmlnest = array( # Tags that can be nested--??
1504                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1505                                 "dl", "font", "big", "small", "sub", "sup"
1506                         );
1507                         $tabletags = array( # Can only appear inside table
1508                                 "td", "th", "tr"
1509                         );
1510                 } else {
1511                         $htmlpairs = array();
1512                         $htmlsingle = array();
1513                         $htmlnest = array();
1514                         $tabletags = array();
1515                 }
1516
1517                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1518                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1519
1520                 $htmlattrs = $this->getHTMLattrs () ;
1521
1522                 # Remove HTML comments
1523                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1524
1525                 $bits = explode( "<", $text );
1526                 $text = array_shift( $bits );
1527                 if(!$wgUseTidy) {
1528                         $tagstack = array(); $tablestack = array();
1529                         foreach ( $bits as $x ) {
1530                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1531                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1532                                 $x, $regs );
1533                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1534                                 error_reporting( $prev );
1535
1536                                 $badtag = 0 ;
1537                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1538                                         # Check our stack
1539                                         if ( $slash ) {
1540                                                 # Closing a tag...
1541                                                 if ( ! in_array( $t, $htmlsingle ) &&
1542                                                 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1543                                                         if(!empty($ot)) array_push( $tagstack, $ot );
1544                                                         $badtag = 1;
1545                                                 } else {
1546                                                         if ( $t == "table" ) {
1547                                                                 $tagstack = array_pop( $tablestack );
1548                                                         }
1549                                                         $newparams = "";
1550                                                 }
1551                                         } else {
1552                                                 # Keep track for later
1553                                                 if ( in_array( $t, $tabletags ) &&
1554                                                 ! in_array( "table", $tagstack ) ) {
1555                                                         $badtag = 1;
1556                                                 } else if ( in_array( $t, $tagstack ) &&
1557                                                 ! in_array ( $t , $htmlnest ) ) {
1558                                                         $badtag = 1 ;
1559                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1560                                                         if ( $t == "table" ) {
1561                                                                 array_push( $tablestack, $tagstack );
1562                                                                 $tagstack = array();
1563                                                         }
1564                                                         array_push( $tagstack, $t );
1565                                                 }
1566                                                 # Strip non-approved attributes from the tag
1567                                                 $newparams = $this->fixTagAttributes($params);
1568
1569                                         }
1570                                         if ( ! $badtag ) {
1571                                                 $rest = str_replace( ">", "&gt;", $rest );
1572                                                 $text .= "<$slash$t $newparams$brace$rest";
1573                                                 continue;
1574                                         }
1575                                 }
1576                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1577                         }
1578                         # Close off any remaining tags
1579                         while ( $t = array_pop( $tagstack ) ) {
1580                                 $text .= "</$t>\n";
1581                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1582                         }
1583                 } else {
1584                         # this might be possible using tidy itself
1585                         foreach ( $bits as $x ) {
1586                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1587                                 $x, $regs );
1588                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1589                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1590                                         $newparams = $this->fixTagAttributes($params);
1591                                         $rest = str_replace( ">", "&gt;", $rest );
1592                                         $text .= "<$slash$t $newparams$brace$rest";
1593                                 } else {
1594                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1595                                 }
1596                         }
1597                 }
1598                 wfProfileOut( $fname );
1599                 return $text;
1600         }
1601
1602
1603 /*
1604  *
1605  * This function accomplishes several tasks:
1606  * 1) Auto-number headings if that option is enabled
1607  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1608  * 3) Add a Table of contents on the top for users who have enabled the option
1609  * 4) Auto-anchor headings
1610  *
1611  * It loops through all headlines, collects the necessary data, then splits up the
1612  * string and re-inserts the newly formatted headlines.
1613  *
1614  */
1615
1616         /* private */ function formatHeadings( $text, $isMain=true )
1617         {
1618                 global $wgInputEncoding;
1619
1620                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1621                 $doShowToc = $this->mOptions->getShowToc();
1622                 if( !$this->mTitle->userCanEdit() ) {
1623                         $showEditLink = 0;
1624                         $rightClickHack = 0;
1625                 } else {
1626                         $showEditLink = $this->mOptions->getEditSection();
1627                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1628                 }
1629
1630                 # Inhibit editsection links if requested in the page
1631                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1632                 if( $esw->matchAndRemove( $text ) ) {
1633                         $showEditLink = 0;
1634                 }
1635                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1636                 # do not add TOC
1637                 $mw =& MagicWord::get( MAG_NOTOC );
1638                 if( $mw->matchAndRemove( $text ) ) {
1639                         $doShowToc = 0;
1640                 }
1641
1642                 # never add the TOC to the Main Page. This is an entry page that should not
1643                 # be more than 1-2 screens large anyway
1644                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1645                         $doShowToc = 0;
1646                 }
1647
1648                 # Get all headlines for numbering them and adding funky stuff like [edit]
1649                 # links - this is for later, but we need the number of headlines right now
1650                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1651
1652                 # if there are fewer than 4 headlines in the article, do not show TOC
1653                 if( $numMatches < 4 ) {
1654                         $doShowToc = 0;
1655                 }
1656
1657                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1658                 # override above conditions and always show TOC
1659                 $mw =& MagicWord::get( MAG_FORCETOC );
1660                 if ($mw->matchAndRemove( $text ) ) {
1661                         $doShowToc = 1;
1662                 }
1663
1664
1665                 # We need this to perform operations on the HTML
1666                 $sk =& $this->mOptions->getSkin();
1667
1668                 # headline counter
1669                 $headlineCount = 0;
1670
1671                 # Ugh .. the TOC should have neat indentation levels which can be
1672                 # passed to the skin functions. These are determined here
1673                 $toclevel = 0;
1674                 $toc = "";
1675                 $full = "";
1676                 $head = array();
1677                 $sublevelCount = array();
1678                 $level = 0;
1679                 $prevlevel = 0;
1680                 foreach( $matches[3] as $headline ) {
1681                         $numbering = "";
1682                         if( $level ) {
1683                                 $prevlevel = $level;
1684                         }
1685                         $level = $matches[1][$headlineCount];
1686                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1687                                 # reset when we enter a new level
1688                                 $sublevelCount[$level] = 0;
1689                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1690                                 $toclevel += $level - $prevlevel;
1691                         }
1692                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1693                                 # reset when we step back a level
1694                                 $sublevelCount[$level+1]=0;
1695                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1696                                 $toclevel -= $prevlevel - $level;
1697                         }
1698                         # count number of headlines for each level
1699                         @$sublevelCount[$level]++;
1700                         if( $doNumberHeadings || $doShowToc ) {
1701                                 $dot = 0;
1702                                 for( $i = 1; $i <= $level; $i++ ) {
1703                                         if( !empty( $sublevelCount[$i] ) ) {
1704                                                 if( $dot ) {
1705                                                         $numbering .= ".";
1706                                                 }
1707                                                 $numbering .= $sublevelCount[$i];
1708                                                 $dot = 1;
1709                                         }
1710                                 }
1711                         }
1712
1713                         # The canonized header is a version of the header text safe to use for links
1714                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1715                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1716
1717                         # strip out HTML
1718                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1719                         $tocline = trim( $canonized_headline );
1720                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1721                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1722                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1723                         $refer[$headlineCount] = $canonized_headline;
1724
1725                         # count how many in assoc. array so we can track dupes in anchors
1726                         @$refers[$canonized_headline]++;
1727                         $refcount[$headlineCount]=$refers[$canonized_headline];
1728
1729                         # Prepend the number to the heading text
1730
1731                         if( $doNumberHeadings || $doShowToc ) {
1732                                 $tocline = $numbering . " " . $tocline;
1733
1734                                 # Don't number the heading if it is the only one (looks silly)
1735                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1736                                         # the two are different if the line contains a link
1737                                         $headline=$numbering . " " . $headline;
1738                                 }
1739                         }
1740
1741                         # Create the anchor for linking from the TOC to the section
1742                         $anchor = $canonized_headline;
1743                         if($refcount[$headlineCount] > 1 ) {
1744                                 $anchor .= "_" . $refcount[$headlineCount];
1745                         }
1746                         if( $doShowToc ) {
1747                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1748                         }
1749                         if( $showEditLink ) {
1750                                 if ( empty( $head[$headlineCount] ) ) {
1751                                         $head[$headlineCount] = "";
1752                                 }
1753                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1754                         }
1755
1756                         # Add the edit section span
1757                         if( $rightClickHack ) {
1758                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1759                         }
1760
1761                         # give headline the correct <h#> tag
1762                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1763
1764                         $headlineCount++;
1765                 }
1766
1767                 if( $doShowToc ) {
1768                         $toclines = $headlineCount;
1769                         $toc .= $sk->tocUnindent( $toclevel );
1770                         $toc = $sk->tocTable( $toc );
1771                 }
1772
1773                 # split up and insert constructed headlines
1774
1775                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1776                 $i = 0;
1777
1778                 foreach( $blocks as $block ) {
1779                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1780                             # This is the [edit] link that appears for the top block of text when
1781                                 # section editing is enabled
1782
1783                                 # Disabled because it broke block formatting
1784                                 # For example, a bullet point in the top line
1785                                 # $full .= $sk->editSectionLink(0);
1786                         }
1787                         $full .= $block;
1788                         if( $doShowToc && !$i && $isMain) {
1789                         # Top anchor now in skin
1790                                 $full = $full.$toc;
1791                         }
1792
1793                         if( !empty( $head[$i] ) ) {
1794                                 $full .= $head[$i];
1795                         }
1796                         $i++;
1797                 }
1798
1799                 return $full;
1800         }
1801
1802         /* private */ function magicISBN( $text )
1803         {
1804                 global $wgLang;
1805
1806                 $a = split( "ISBN ", " $text" );
1807                 if ( count ( $a ) < 2 ) return $text;
1808                 $text = substr( array_shift( $a ), 1);
1809                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1810
1811                 foreach ( $a as $x ) {
1812                         $isbn = $blank = "" ;
1813                         while ( " " == $x{0} ) {
1814                                 $blank .= " ";
1815                                 $x = substr( $x, 1 );
1816                         }
1817                         while ( strstr( $valid, $x{0} ) != false ) {
1818                                 $isbn .= $x{0};
1819                                 $x = substr( $x, 1 );
1820                         }
1821                         $num = str_replace( "-", "", $isbn );
1822                         $num = str_replace( " ", "", $num );
1823
1824                         if ( "" == $num ) {
1825                                 $text .= "ISBN $blank$x";
1826                         } else {
1827                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1828                                 $text .= "<a href=\"" .
1829                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1830                                         "\" class=\"internal\">ISBN $isbn</a>";
1831                                 $text .= $x;
1832                         }
1833                 }
1834                 return $text;
1835         }
1836         /* private */ function magicRFC( $text )
1837         {
1838                 global $wgLang;
1839
1840                 $a = split( "RFC ", " $text" );
1841                 if ( count ( $a ) < 2 ) return $text;
1842                 $text = substr( array_shift( $a ), 1);
1843                 $valid = "0123456789";
1844
1845                 foreach ( $a as $x ) {
1846                         $rfc = $blank = "" ;
1847                         while ( " " == $x{0} ) {
1848                                 $blank .= " ";
1849                                 $x = substr( $x, 1 );
1850                         }
1851                         while ( strstr( $valid, $x{0} ) != false ) {
1852                                 $rfc .= $x{0};
1853                                 $x = substr( $x, 1 );
1854                         }
1855
1856                         if ( "" == $rfc ) {
1857                                 $text .= "RFC $blank$x";
1858                         } else {
1859                                 $url = wfmsg( "rfcurl" );
1860                                 $url = str_replace( "$1", $rfc, $url);
1861                                 $sk =& $this->mOptions->getSkin();
1862                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1863                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1864                         }
1865                 }
1866                 return $text;
1867         }
1868
1869         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1870         {
1871                 $this->mOptions = $options;
1872                 $this->mTitle =& $title;
1873                 $this->mOutputType = OT_WIKI;
1874
1875                 if ( $clearState ) {
1876                         $this->clearState();
1877                 }
1878
1879                 $stripState = false;
1880                 $pairs = array(
1881                         "\r\n" => "\n",
1882                         );
1883                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1884                 // now with regexes
1885                 /*
1886                 $pairs = array(
1887                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1888                         "/<br *?>/i" => "<br />",
1889                 );
1890                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1891                 */
1892                 $text = $this->strip( $text, $stripState, false );
1893                 $text = $this->pstPass2( $text, $user );
1894                 $text = $this->unstrip( $text, $stripState );
1895                 return $text;
1896         }
1897
1898         /* private */ function pstPass2( $text, &$user )
1899         {
1900                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1901
1902                 # Variable replacement
1903                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1904                 $text = $this->replaceVariables( $text );
1905
1906                 # Signatures
1907                 #
1908                 $n = $user->getName();
1909                 $k = $user->getOption( "nickname" );
1910                 if ( "" == $k ) { $k = $n; }
1911                 if(isset($wgLocaltimezone)) {
1912                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1913                 }
1914                 /* Note: this is an ugly timezone hack for the European wikis */
1915                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1916                   " (" . date( "T" ) . ")";
1917                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1918
1919                 $text = preg_replace( "/~~~~~/", $d, $text );
1920                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1921                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1922                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1923                   Namespace::getUser() ) . ":$n|$k]]", $text );
1924
1925                 # Context links: [[|name]] and [[name (context)|]]
1926                 #
1927                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1928                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1929                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1930                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1931
1932                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1933                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1934                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1935                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1936                                                                                                                 # [[ns:page (cont)|]]
1937                 $context = "";
1938                 $t = $this->mTitle->getText();
1939                 if ( preg_match( $conpat, $t, $m ) ) {
1940                         $context = $m[2];
1941                 }
1942                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1943                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1944                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1945
1946                 if ( "" == $context ) {
1947                         $text = preg_replace( $p2, "[[\\1]]", $text );
1948                 } else {
1949                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1950                 }
1951
1952                 /*
1953                 $mw =& MagicWord::get( MAG_SUBST );
1954                 $wgCurParser = $this->fork();
1955                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1956                 $this->merge( $wgCurParser );
1957                 */
1958
1959                 # Trim trailing whitespace
1960                 # MAG_END (__END__) tag allows for trailing
1961                 # whitespace to be deliberately included
1962                 $text = rtrim( $text );
1963                 $mw =& MagicWord::get( MAG_END );
1964                 $mw->matchAndRemove( $text );
1965
1966                 return $text;
1967         }
1968
1969         # Set up some variables which are usually set up in parse()
1970         # so that an external function can call some class members with confidence
1971         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1972         {
1973                 $this->mTitle =& $title;
1974                 $this->mOptions = $options;
1975                 $this->mOutputType = $outputType;
1976                 if ( $clearState ) {
1977                         $this->clearState();
1978                 }
1979         }
1980
1981         function transformMsg( $text, $options ) {
1982                 global $wgTitle;
1983                 static $executing = false;
1984
1985                 # Guard against infinite recursion
1986                 if ( $executing ) {
1987                         return $text;
1988                 }
1989                 $executing = true;
1990
1991                 $this->mTitle = $wgTitle;
1992                 $this->mOptions = $options;
1993                 $this->mOutputType = OT_MSG;
1994                 $this->clearState();
1995                 $text = $this->replaceVariables( $text );
1996
1997                 $executing = false;
1998                 return $text;
1999         }
2000 }
2001
2002 class ParserOutput
2003 {
2004         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2005         var $mCacheTime; # Used in ParserCache
2006
2007         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2008                 $containsOldMagic = false )
2009         {
2010                 $this->mText = $text;
2011                 $this->mLanguageLinks = $languageLinks;
2012                 $this->mCategoryLinks = $categoryLinks;
2013                 $this->mContainsOldMagic = $containsOldMagic;
2014                 $this->mCacheTime = "";
2015         }
2016
2017         function getText() { return $this->mText; }
2018         function getLanguageLinks() { return $this->mLanguageLinks; }
2019         function getCategoryLinks() { return $this->mCategoryLinks; }
2020         function getCacheTime() { return $this->mCacheTime; }
2021         function containsOldMagic() { return $this->mContainsOldMagic; }
2022         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2023         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2024         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2025         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2026         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2027
2028         function merge( $other ) {
2029                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2030                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2031                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2032         }
2033
2034 }
2035
2036 class ParserOptions
2037 {
2038         # All variables are private
2039         var $mUseTeX;                    # Use texvc to expand <math> tags
2040         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2041         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2042         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2043         var $mAllowExternalImages;       # Allow external images inline
2044         var $mSkin;                      # Reference to the preferred skin
2045         var $mDateFormat;                # Date format index
2046         var $mEditSection;               # Create "edit section" links
2047         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2048         var $mNumberHeadings;            # Automatically number headings
2049         var $mShowToc;                   # Show table of contents
2050
2051         function getUseTeX() { return $this->mUseTeX; }
2052         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2053         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2054         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2055         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2056         function getSkin() { return $this->mSkin; }
2057         function getDateFormat() { return $this->mDateFormat; }
2058         function getEditSection() { return $this->mEditSection; }
2059         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2060         function getNumberHeadings() { return $this->mNumberHeadings; }
2061         function getShowToc() { return $this->mShowToc; }
2062
2063         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2064         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2065         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2066         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2067         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2068         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2069         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2070         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2071         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2072         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2073         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2074
2075         /* static */ function newFromUser( &$user )
2076         {
2077                 $popts = new ParserOptions;
2078                 $popts->initialiseFromUser( $user );
2079                 return $popts;
2080         }
2081
2082         function initialiseFromUser( &$userInput )
2083         {
2084                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2085
2086                 if ( !$userInput ) {
2087                         $user = new User;
2088                         $user->setLoaded( true );
2089                 } else {
2090                         $user =& $userInput;
2091                 }
2092
2093                 $this->mUseTeX = $wgUseTeX;
2094                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2095                 $this->mUseDynamicDates = $wgUseDynamicDates;
2096                 $this->mInterwikiMagic = $wgInterwikiMagic;
2097                 $this->mAllowExternalImages = $wgAllowExternalImages;
2098                 $this->mSkin =& $user->getSkin();
2099                 $this->mDateFormat = $user->getOption( "date" );
2100                 $this->mEditSection = $user->getOption( "editsection" );
2101                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2102                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2103                 $this->mShowToc = $user->getOption( "showtoc" );
2104         }
2105
2106
2107 }
2108
2109 # Regex callbacks, used in Parser::replaceVariables
2110 function wfBraceSubstitution( $matches )
2111 {
2112         global $wgCurParser;
2113         return $wgCurParser->braceSubstitution( $matches );
2114 }
2115
2116 function wfArgSubstitution( $matches )
2117 {
2118         global $wgCurParser;
2119         return $wgCurParser->argSubstitution( $matches );
2120 }
2121
2122 function wfVariableSubstitution( $matches )
2123 {
2124         global $wgCurParser;
2125         return $wgCurParser->variableSubstitution( $matches );
2126 }
2127
2128 ?>