includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80                 $this->mInPre = false;
  81                 $this->mInNowiki = false;
  82         }
  83
  84         # First pass--just handle <nowiki> sections, pass the rest off
  85         # to internalParse() which does all the real work.
  86         #
  87         # Returns a ParserOutput
  88         #
  89         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  90         {
  91                 global $wgUseTidy;
  92                 $fname = "Parser::parse";
  93                 wfProfileIn( $fname );
  94
  95                 if ( $clearState ) {
  96                         $this->clearState();
  97                 }
  98
  99                 $this->mOptions = $options;
 100                 $this->mTitle =& $title;
 101                 $this->mOutputType = OT_HTML;
 102
 103                 $stripState = NULL;
 104                 $text = $this->strip( $text, $this->mStripState );
 105                 $text = $this->internalParse( $text, $linestart );
 106                 $text = $this->unstrip( $text, $this->mStripState );
 107                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 108                 if(!$wgUseTidy) {
 109                         $fixtags = array(
 110                                 # french spaces, last one Guillemet-left
 111                                 # only if there is something before the space
 112                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 113                                 # french spaces, Guillemet-right
 114                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 115                                 "/<hr *>/i" => '<hr />',
 116                                 "/<br *>/i" => '<br />',
 117                                 "/<center *>/i"=>'<div class="center">',
 118                                 "/<\\/center *>/i" => '</div>',
 119                                 # Clean up spare ampersands; note that we probably ought to be
 120                                 # more careful about named entities.
 121                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 122                         );
 123                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 124                 } else {
 125                         $fixtags = array(
 126                                 # french spaces, last one Guillemet-left
 127                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 128                                 # french spaces, Guillemet-right
 129                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 130                                 "/<center *>/i"=>'<div class="center">',
 131                                 "/<\\/center *>/i" => '</div>'
 132                         );
 133                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 134                 }
 135                 # only once and last
 136                 $text = $this->doBlockLevels( $text, $linestart );
 137                 if($wgUseTidy) {
 138                         $text = $this->tidy($text);
 139                 }
 140                 $this->mOutput->setText( $text );
 141                 wfProfileOut( $fname );
 142                 return $this->mOutput;
 143         }
 144
 145         /* static */ function getRandomString()
 146         {
 147                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 148         }
 149
 150         # Replaces all occurrences of <$tag>content</$tag> in the text
 151         # with a random marker and returns the new text. the output parameter
 152         # $content will be an associative array filled with data on the form
 153         # $unique_marker => content.
 154
 155         # If $content is already set, the additional entries will be appended
 156
 157         # If $tag is set to STRIP_COMMENTS, the function will extract
 158         # <!-- HTML comments -->
 159
 160         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 161                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 162                 if ( !$content ) {
 163                         $content = array( );
 164                 }
 165                 $n = 1;
 166                 $stripped = "";
 167
 168                 while ( "" != $text ) {
 169                         if($tag==STRIP_COMMENTS) {
 170                                 $p = preg_split( "/<!--/i", $text, 2 );
 171                         } else {
 172                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 173                         }
 174                         $stripped .= $p[0];
 175                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 176                                 $text = "";
 177                         } else {
 178                                 if($tag==STRIP_COMMENTS) {
 179                                         $q = preg_split( "/-->/i", $p[1], 2 );
 180                                 } else {
 181                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 182                                 }
 183                                 $marker = $rnd . sprintf("%08X", $n++);
 184                                 $content[$marker] = $q[0];
 185                                 $stripped .= $marker;
 186                                 $text = $q[1];
 187                         }
 188                 }
 189                 return $stripped;
 190         }
 191
 192         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 193         # If $render is set, performs necessary rendering operations on plugins
 194         # Returns the text, and fills an array with data needed in unstrip()
 195         # If the $state is already a valid strip state, it adds to the state
 196
 197         # When $stripcomments is set, HTML comments <!-- like this -->
 198         # will be stripped in addition to other tags. This is important
 199         # for section editing, where these comments cause confusion when
 200         # counting the sections in the wikisource
 201         function strip( $text, &$state, $stripcomments = false )
 202         {
 203                 $render = ($this->mOutputType == OT_HTML);
 204                 $nowiki_content = array();
 205                 $hiero_content = array();
 206                 $timeline_content = array();
 207                 $math_content = array();
 208                 $pre_content = array();
 209                 $comment_content = array();
 210
 211                 # Replace any instances of the placeholders
 212                 $uniq_prefix = UNIQ_PREFIX;
 213                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 214
 215                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 216                 foreach( $nowiki_content as $marker => $content ){
 217                         //if( $render ){
 218                                 //# use span to mark nowiki areas, note the trailing whitespace in span to avoid collisions with other spans
 219                                 //$nowiki_content[$marker] = '<span class="nowiki">'.wfEscapeHTMLTagsOnly( $content )."</span  >";
 220                         //} else {
 221                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 222                         //}
 223                 }
 224
 225                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 226                 foreach( $hiero_content as $marker => $content ){
 227                         if( $render && $GLOBALS['wgUseWikiHiero']){
 228                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 229                         } else {
 230                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 231                         }
 232                 }
 233
 234                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 235                 foreach( $timeline_content as $marker => $content ){
 236                         if( $render && $GLOBALS['wgUseTimeline']){
 237                                 $timeline_content[$marker] = renderTimeline( $content );
 238                         } else {
 239                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 240                         }
 241                 }
 242
 243                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 244                 foreach( $math_content as $marker => $content ){
 245                         if( $render ) {
 246                                 if( $this->mOptions->getUseTeX() ) {
 247                                         $math_content[$marker] = renderMath( $content );
 248                                 } else {
 249                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 250                                 }
 251                         } else {
 252                                 $math_content[$marker] = "<math>$content</math>";
 253                         }
 254                 }
 255
 256                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 257                 foreach( $pre_content as $marker => $content ){
 258                         if( $render ){
 259                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 260                         } else {
 261                                 $pre_content[$marker] = "<pre>$content</pre>";
 262                         }
 263                 }
 264                 if($stripcomments) {
 265                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 266                         foreach( $comment_content as $marker => $content ){
 267                                 $comment_content[$marker] = "<!--$content-->";
 268                         }
 269                 }
 270
 271                 # Merge state with the pre-existing state, if there is one
 272                 if ( $state ) {
 273                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 274                         $state['hiero'] = $state['hiero'] + $hiero_content;
 275                         $state['timeline'] = $state['timeline'] + $timeline_content;
 276                         $state['math'] = $state['math'] + $math_content;
 277                         $state['pre'] = $state['pre'] + $pre_content;
 278                         $state['comment'] = $state['comment'] + $comment_content;
 279                 } else {
 280                         $state = array(
 281                           'nowiki' => $nowiki_content,
 282                           'hiero' => $hiero_content,
 283                           'timeline' => $timeline_content,
 284                           'math' => $math_content,
 285                           'pre' => $pre_content,
 286                           'comment' => $comment_content
 287                         );
 288                 }
 289                 return $text;
 290         }
 291
 292         function unstrip( $text, &$state )
 293         {
 294                 # Must expand in reverse order, otherwise nested tags will be corrupted
 295                 $contentDict = end( $state );
 296                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 297                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 298                                 $text = str_replace( key( $contentDict ), $content, $text );
 299                         }
 300                 }
 301
 302                 return $text;
 303         }
 304
 305         # Add an item to the strip state
 306         # Returns the unique tag which must be inserted into the stripped text
 307         # The tag will be replaced with the original text in unstrip()
 308
 309         function insertStripItem( $text, &$state )
 310         {
 311                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 312                 if ( !$state ) {
 313                         $state = array(
 314                           'nowiki' => array(),
 315                           'hiero' => array(),
 316                           'math' => array(),
 317                           'pre' => array()
 318                         );
 319                 }
 320                 $state['item'][$rnd] = $text;
 321                 return $rnd;
 322         }
 323
 324         # This method generates the list of subcategories and pages for a category
 325         function categoryMagic ()
 326         {
 327                 global $wgLang , $wgUser ;
 328                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 329
 330                 $cns = Namespace::getCategory() ;
 331                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 332
 333                 $r = "<br style=\"clear:both;\"/>\n";
 334
 335
 336                 $sk =& $wgUser->getSkin() ;
 337
 338                 $articles = array() ;
 339                 $children = array() ;
 340                 $data = array () ;
 341                 $id = $this->mTitle->getArticleID() ;
 342
 343                 # FIXME: add limits
 344                 $t = wfStrencode( $this->mTitle->getDBKey() );
 345                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 346                 $res = wfQuery ( $sql, DB_READ ) ;
 347                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 348
 349                 # For all pages that link to this category
 350                 foreach ( $data AS $x )
 351                 {
 352                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 353                         if ( $t != "" ) $t .= ":" ;
 354                         $t .= $x->cur_title ;
 355
 356                         if ( $x->cur_namespace == $cns ) {
 357                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 358                         } else {
 359                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 360                         }
 361                 }
 362                 wfFreeResult ( $res ) ;
 363
 364                 # Showing subcategories
 365                 if ( count ( $children ) > 0 ) {
 366                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 367                         $r .= implode ( ", " , $children ) ;
 368                 }
 369
 370                 # Showing pages in this category
 371                 if ( count ( $articles ) > 0 ) {
 372                         $ti = $this->mTitle->getText() ;
 373                         $h =  wfMsg( "category_header", $ti );
 374                         $r .= "<h2>{$h}</h2>\n" ;
 375                         $r .= implode ( ", " , $articles ) ;
 376                 }
 377
 378
 379                 return $r ;
 380         }
 381
 382         function getHTMLattrs ()
 383         {
 384                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 385                                 "title", "align", "lang", "dir", "width", "height",
 386                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 387                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 388                                 /* FONT */ "type", "start", "value", "compact",
 389                                 /* For various lists, mostly deprecated but safe */
 390                                 "summary", "width", "border", "frame", "rules",
 391                                 "cellspacing", "cellpadding", "valign", "char",
 392                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 393                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 394                                 "id", "class", "name", "style" /* For CSS */
 395                                 );
 396                 return $htmlattrs ;
 397         }
 398
 399         function fixTagAttributes ( $t )
 400         {
 401                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 402                 $htmlattrs = $this->getHTMLattrs() ;
 403
 404                 # Strip non-approved attributes from the tag
 405                 $t = preg_replace(
 406                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 407                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 408                         $t);
 409                 # Strip javascript "expression" from stylesheets. Brute force approach:
 410                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 411
 412                 if( preg_match(
 413                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 414                         wfMungeToUtf8( $t ) ) )
 415                 {
 416                         $t="";
 417                 }
 418
 419                 return trim ( $t ) ;
 420         }
 421
 422         /* interface with html tidy, used if $wgUseTidy = true */
 423         function tidy ( $text ) {
 424                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 425                 global $wgInputEncoding, $wgOutputEncoding;
 426                 $fname = "Parser::tidy";
 427                 wfProfileIn( $fname );
 428
 429                 $cleansource = '';
 430                 switch(strtoupper($wgOutputEncoding)) {
 431                         case 'ISO-8859-1':
 432                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 433                                 break;
 434                         case 'UTF-8':
 435                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 436                                 break;
 437                         default:
 438                                 $wgTidyOpts .= ' -raw';
 439                         }
 440
 441                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 442 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 443 '<head><title>test</title></head><body>'.$text.'</body></html>';
 444                 $descriptorspec = array(
 445                         0 => array("pipe", "r"),
 446                         1 => array("pipe", "w"),
 447                         2 => array("file", "/dev/null", "a")
 448                 );
 449                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 450                 if (is_resource($process)) {
 451                         fwrite($pipes[0], $wrappedtext);
 452                         fclose($pipes[0]);
 453                         while (!feof($pipes[1])) {
 454                                 $cleansource .= fgets($pipes[1], 1024);
 455                         }
 456                         fclose($pipes[1]);
 457                         $return_value = proc_close($process);
 458                 }
 459
 460                 wfProfileOut( $fname );
 461
 462                 if( $cleansource == '' && $text != '') {
 463                         wfDebug( "Tidy error detected!\n" );
 464                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 465                 } else {
 466                         return $cleansource;
 467                 }
 468         }
 469
 470         function doTableStuff ( $t )
 471         {
 472                 $t = explode ( "\n" , $t ) ;
 473                 $td = array () ; # Is currently a td tag open?
 474                         $ltd = array () ; # Was it TD or TH?
 475                         $tr = array () ; # Is currently a tr tag open?
 476                         $ltr = array () ; # tr attributes
 477                         foreach ( $t AS $k => $x )
 478                         {
 479                                 $x = trim ( $x ) ;
 480                                 $fc = substr ( $x , 0 , 1 ) ;
 481                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 482                                 {
 483                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 484                                         array_push ( $td , false ) ;
 485                                         array_push ( $ltd , "" ) ;
 486                                         array_push ( $tr , false ) ;
 487                                         array_push ( $ltr , "" ) ;
 488                                 }
 489                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 490                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 491                                 {
 492                                         $z = "</table>\n" ;
 493                                         $l = array_pop ( $ltd ) ;
 494                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 495                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 496                                         array_pop ( $ltr ) ;
 497                                         $t[$k] = $z ;
 498                                 }
 499                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 500                                                 {
 501                                                 $z = trim ( substr ( $x , 2 ) ) ;
 502                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 503                                                 }*/
 504                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 505                                 {
 506                                         $x = substr ( $x , 1 ) ;
 507                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 508                                         $z = "" ;
 509                                         $l = array_pop ( $ltd ) ;
 510                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 511                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 512                                         array_pop ( $ltr ) ;
 513                                         $t[$k] = $z ;
 514                                         array_push ( $tr , false ) ;
 515                                         array_push ( $td , false ) ;
 516                                         array_push ( $ltd , "" ) ;
 517                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 518                                 }
 519                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 520                                 {
 521                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 522                                         {
 523                                                 $fc = "+" ;
 524                                                 $x = substr ( $x , 1 ) ;
 525                                         }
 526                                         $after = substr ( $x , 1 ) ;
 527                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 528                                         $after = explode ( "||" , $after ) ;
 529                                         $t[$k] = "" ;
 530                                         foreach ( $after AS $theline )
 531                                         {
 532                                                 $z = "" ;
 533                                                 if ( $fc != "+" )
 534                                                 {
 535                                                         $tra = array_pop ( $ltr ) ;
 536                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 537                                                         array_push ( $tr , true ) ;
 538                                                         array_push ( $ltr , "" ) ;
 539                                                 }
 540
 541                                                 $l = array_pop ( $ltd ) ;
 542                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 543                                                 if ( $fc == "|" ) $l = "td" ;
 544                                                 else if ( $fc == "!" ) $l = "th" ;
 545                                                 else if ( $fc == "+" ) $l = "caption" ;
 546                                                 else $l = "" ;
 547                                                 array_push ( $ltd , $l ) ;
 548                                                 $y = explode ( "|" , $theline , 2 ) ;
 549                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 550                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 551                                                 $t[$k] .= $y ;
 552                                                 array_push ( $td , true ) ;
 553                                         }
 554                                 }
 555                         }
 556
 557                 # Closing open td, tr && table
 558                 while ( count ( $td ) > 0 )
 559                 {
 560                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 561                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 562                         $t[] = "</table>" ;
 563                 }
 564
 565                 $t = implode ( "\n" , $t ) ;
 566                 #               $t = $this->removeHTMLtags( $t );
 567                 return $t ;
 568         }
 569
 570         # Parses the text and adds the result to the strip state
 571         # Returns the strip tag
 572         function stripParse( $text, $newline, $args )
 573         {
 574                 $text = $this->strip( $text, $this->mStripState );
 575                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 576                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 577         }
 578
 579         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 580         {
 581                 $fname = "Parser::internalParse";
 582                 wfProfileIn( $fname );
 583
 584                 $text = $this->removeHTMLtags( $text );
 585                 $text = $this->replaceVariables( $text, $args );
 586
 587                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 588
 589                 $text = $this->doHeadings( $text );
 590                 if($this->mOptions->getUseDynamicDates()) {
 591                         global $wgDateFormatter;
 592                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 593                 }
 594                 $text = $this->doAllQuotes( $text );
 595                 $text = $this->replaceExternalLinks( $text );
 596                 $text = $this->replaceInternalLinks ( $text );
 597                 $text = $this->replaceInternalLinks ( $text );
 598                 //$text = $this->doTokenizedParser ( $text );
 599                 $text = $this->doTableStuff ( $text ) ;
 600                 $text = $this->magicISBN( $text );
 601                 $text = $this->magicRFC( $text );
 602                 $text = $this->formatHeadings( $text, $isMain );
 603                 $sk =& $this->mOptions->getSkin();
 604                 $text = $sk->transformContent( $text );
 605
 606                 if ( !isset ( $this->categoryMagicDone ) ) {
 607                         $text .= $this->categoryMagic () ;
 608                         $this->categoryMagicDone = true ;
 609                 }
 610
 611                 wfProfileOut( $fname );
 612                 return $text;
 613         }
 614
 615
 616         /* private */ function doHeadings( $text )
 617         {
 618                 for ( $i = 6; $i >= 1; --$i ) {
 619                         $h = substr( "======", 0, $i );
 620                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 621                           "<h{$i}>\\1</h{$i}>\\2", $text );
 622                 }
 623                 return $text;
 624         }
 625
 626         /* private */ function doAllQuotes( $text )
 627         {
 628                 $outtext = "";
 629                 $lines = explode( "\n", $text );
 630                 foreach ( $lines as $line ) {
 631                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 632                 }
 633                 return substr($outtext, 0,-1);
 634         }
 635
 636         /* private */ function doQuotes( $pre, $text, $mode )
 637         {
 638                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 639                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 640                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 641                         if ( substr ($m[2], 0, 1) == "'" ) {
 642                                 $m[2] = substr ($m[2], 1);
 643                                 if ($mode == "em") {
 644                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 645                                 } else if ($mode == "strong") {
 646                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 647                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 648                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 649                                 } else if ($mode == "strongem") {
 650                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 651                                 } else {
 652                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 653                                 }
 654                         } else {
 655                                 if ($mode == "strong") {
 656                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 657                                 } else if ($mode == "em") {
 658                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 659                                 } else if ($mode == "emstrong") {
 660                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 661                                 } else if (($mode == "strongem") || ($mode == "both")) {
 662                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 663                                 } else {
 664                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 665                                 }
 666                         }
 667                 } else {
 668                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 669                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 670                         if ($mode == "") {
 671                                 return $pre . $text;
 672                         } else if ($mode == "em") {
 673                                 return $pre . $text_em;
 674                         } else if ($mode == "strong") {
 675                                 return $pre . $text_strong;
 676                         } else if ($mode == "strongem") {
 677                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 678                         } else {
 679                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 680                         }
 681                 }
 682         }
 683
 684         # Note: we have to do external links before the internal ones,
 685         # and otherwise take great care in the order of things here, so
 686         # that we don't end up interpreting some URLs twice.
 687
 688         /* private */ function replaceExternalLinks( $text )
 689         {
 690                 $fname = "Parser::replaceExternalLinks";
 691                 wfProfileIn( $fname );
 692                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 693                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 694                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 695                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 696                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 697                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 698                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 699                 wfProfileOut( $fname );
 700                 return $text;
 701         }
 702
 703         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 704         {
 705                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 706                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 707
 708                 # this is  the list of separators that should be ignored if they
 709                 # are the last character of an URL but that should be included
 710                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 711                 # in this case, the last comma should not become part of the URL,
 712                 # but in "www.foo.com/123,2342,32.htm" it should.
 713                 $sep = ",;\.:";
 714                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 715                 $images = "gif|png|jpg|jpeg";
 716
 717                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 718                 # they are interpreted as part of the string (used to tell PHP
 719                 # that the content of the string should be inserted there).
 720                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 721                   "((?i){$images})([^{$uc}]|$)/";
 722
 723                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 724                 $sk =& $this->mOptions->getSkin();
 725
 726                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 727                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 728                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 729                 }
 730                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 731                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 732                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 733                   "</a>\\5", $s );
 734                 $s = str_replace( $unique, $protocol, $s );
 735
 736                 $a = explode( "[{$protocol}:", " " . $s );
 737                 $s = array_shift( $a );
 738                 $s = substr( $s, 1 );
 739
 740                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 741                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 742
 743                 foreach ( $a as $line ) {
 744                         if ( preg_match( $e1, $line, $m ) ) {
 745                                 $link = "{$protocol}:{$m[1]}";
 746                                 $trail = $m[2];
 747                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 748                                 else { $text = wfEscapeHTML( $link ); }
 749                         } else if ( preg_match( $e2, $line, $m ) ) {
 750                                 $link = "{$protocol}:{$m[1]}";
 751                                 $text = $m[2];
 752                                 $trail = $m[3];
 753                         } else {
 754                                 $s .= "[{$protocol}:" . $line;
 755                                 continue;
 756                         }
 757                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 758                                 $paren = "";
 759                         } else {
 760                                 # Expand the URL for printable version
 761                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 762                         }
 763                         $la = $sk->getExternalLinkAttributes( $link, $text );
 764                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 765
 766                 }
 767                 return $s;
 768         }
 769
 770
 771         /* private */ function replaceInternalLinks( $s )
 772         {
 773                 global $wgLang, $wgLinkCache;
 774                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 775                 static $fname = "Parser::replaceInternalLink" ;
 776                 wfProfileIn( $fname );
 777
 778                 wfProfileIn( "$fname-setup" );
 779                 static $tc = FALSE;
 780                 # the % is needed to support urlencoded titles as well
 781                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 782                 $sk =& $this->mOptions->getSkin();
 783
 784                 $a = explode( "[[", " " . $s );
 785                 $s = array_shift( $a );
 786                 $s = substr( $s, 1 );
 787
 788                 # Match a link having the form [[namespace:link|alternate]]trail
 789                 static $e1 = FALSE;
 790                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 791                 # Match the end of a line for a word that's not followed by whitespace,
 792                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 793                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 794                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 795                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 796
 797
 798                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 799                 static $image = FALSE;
 800                 static $special = FALSE;
 801                 static $media = FALSE;
 802                 static $category = FALSE;
 803                 if ( !$image ) { $image = Namespace::getImage(); }
 804                 if ( !$special ) { $special = Namespace::getSpecial(); }
 805                 if ( !$media ) { $media = Namespace::getMedia(); }
 806                 if ( !$category ) { $category = Namespace::getCategory(); }
 807
 808                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 809
 810                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 811                         $new_prefix = $m[2];
 812                         $s = $m[1];
 813                 } else {
 814                         $new_prefix="";
 815                 }
 816
 817                 wfProfileOut( "$fname-setup" );
 818
 819                 foreach ( $a as $line ) {
 820                         $prefix = $new_prefix;
 821
 822                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 823                                 $text = $m[2];
 824                                 # fix up urlencoded title texts
 825                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 826                                 $trail = $m[3];
 827                         } else { # Invalid form; output directly
 828                                 $s .= $prefix . "[[" . $line ;
 829                                 wfProfileOut( $fname );
 830                                 continue;
 831                         }
 832
 833                         /* Valid link forms:
 834                         Foobar -- normal
 835                         :Foobar -- override special treatment of prefix (images, language links)
 836                         /Foobar -- convert to CurrentPage/Foobar
 837                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 838                         */
 839                         $c = substr($m[1],0,1);
 840                         $noforce = ($c != ":");
 841                         if( $c == "/" ) { # subpage
 842                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 843                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 844                                         $noslash=$m[1];
 845                                 } else {
 846                                         $noslash=substr($m[1],1);
 847                                 }
 848                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 849                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 850                                         if( "" == $text ) {
 851                                                 $text= $m[1];
 852                                         } # this might be changed for ugliness reasons
 853                                 } else {
 854                                         $link = $noslash; # no subpage allowed, use standard link
 855                                 }
 856                         } elseif( $noforce ) { # no subpage
 857                                 $link = $m[1];
 858                         } else {
 859                                 $link = substr( $m[1], 1 );
 860                         }
 861                         $wasblank = ( "" == $text );
 862                         if( $wasblank )
 863                         $text = $link;
 864
 865                         $nt = Title::newFromText( $link );
 866                         if( !$nt ) {
 867                                 $s .= $prefix . "[[" . $line;
 868                                 wfProfileOut( $fname );
 869                                 continue;
 870                         }
 871                         $ns = $nt->getNamespace();
 872                         $iw = $nt->getInterWiki();
 873                         if( $noforce ) {
 874                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 875                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 876                                         $tmp = $prefix . $trail ;
 877                                         wfProfileOut( $fname );
 878                                         $s .= (trim($tmp) == '')? '': $tmp;
 879                                         continue;
 880                                 }
 881                                 if ( $ns == $image ) {
 882                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 883                                         $wgLinkCache->addImageLinkObj( $nt );
 884                                         wfProfileOut( $fname );
 885                                         continue;
 886                                 }
 887                                 if ( $ns == $category ) {
 888                                         $t = $nt->getText() ;
 889                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 890
 891                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 892                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 893                                         $wgLinkCache->resume();
 894
 895                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 896                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 897                                         $this->mOutput->mCategoryLinks[] = $t ;
 898                                         $s .= $prefix . $trail ;
 899                                         wfProfileOut( $fname );
 900                                         continue;
 901                                 }
 902                         }
 903                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 904                         ( strpos( $link, "#" ) == FALSE ) ) {
 905                                 # Self-links are handled specially; generally de-link and change to bold.
 906                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 907                                 wfProfileOut( $fname );
 908                                 continue;
 909                         }
 910
 911                         if( $ns == $media ) {
 912                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 913                                 $wgLinkCache->addImageLinkObj( $nt );
 914                                 wfProfileOut( $fname );
 915                                 continue;
 916                         } elseif( $ns == $special ) {
 917                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 918                                 wfProfileOut( $fname );
 919                                 continue;
 920                         }
 921                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 922                 }
 923                 wfProfileOut( $fname );
 924                 return $s;
 925         }
 926
 927         # Some functions here used by doBlockLevels()
 928         #
 929         /* private */ function closeParagraph()
 930         {
 931                 $result = "";
 932                 if ( '' != $this->mLastSection ) {
 933                         $result = "</" . $this->mLastSection  . ">\n";
 934                 }
 935                 $this->mInPre = false;
 936                 $this->mLastSection = "";
 937                 return $result;
 938         }
 939         # getCommon() returns the length of the longest common substring
 940         # of both arguments, starting at the beginning of both.
 941         #
 942         /* private */ function getCommon( $st1, $st2 )
 943         {
 944                 $fl = strlen( $st1 );
 945                 $shorter = strlen( $st2 );
 946                 if ( $fl < $shorter ) { $shorter = $fl; }
 947
 948                 for ( $i = 0; $i < $shorter; ++$i ) {
 949                         if ( $st1{$i} != $st2{$i} ) { break; }
 950                 }
 951                 return $i;
 952         }
 953         # These next three functions open, continue, and close the list
 954         # element appropriate to the prefix character passed into them.
 955         #
 956         /* private */ function openList( $char )
 957     {
 958                 $result = $this->closeParagraph();
 959
 960                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 961                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 962                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 963                 else if ( ";" == $char ) {
 964                         $result .= "<dl><dt>";
 965                         $this->mDTopen = true;
 966                 }
 967                 else { $result = "<!-- ERR 1 -->"; }
 968
 969                 return $result;
 970         }
 971
 972         /* private */ function nextItem( $char )
 973         {
 974                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 975                 else if ( ":" == $char || ";" == $char ) {
 976                         $close = "</dd>";
 977                         if ( $this->mDTopen ) { $close = "</dt>"; }
 978                         if ( ";" == $char ) {
 979                                 $this->mDTopen = true;
 980                                 return $close . "<dt>";
 981                         } else {
 982                                 $this->mDTopen = false;
 983                                 return $close . "<dd>";
 984                         }
 985                 }
 986                 return "<!-- ERR 2 -->";
 987         }
 988
 989         /* private */function closeList( $char )
 990         {
 991                 if ( "*" == $char ) { $text = "</li></ul>"; }
 992                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 993                 else if ( ":" == $char ) {
 994                         if ( $this->mDTopen ) {
 995                                 $this->mDTopen = false;
 996                                 $text = "</dt></dl>";
 997                         } else {
 998                                 $text = "</dd></dl>";
 999                         }
1000                 }
1001                 else {  return "<!-- ERR 3 -->"; }
1002                 return $text."\n";
1003         }
1004
1005         /* private */ function doBlockLevels( $text, $linestart ) {
1006                 $fname = "Parser::doBlockLevels";
1007                 wfProfileIn( $fname );
1008
1009                 # Parsing through the text line by line.  The main thing
1010                 # happening here is handling of block-level elements p, pre,
1011                 # and making lists from lines starting with * # : etc.
1012                 #
1013
1014                 // Strip nowiki's again.
1015                 $text = $this->strip($text,$dblStripState);
1016                 $textLines = explode( "\n", $text );
1017
1018                 $lastPrefix = $output = $lastLine = '';
1019                 $this->mDTopen = $inBlockElem = false;
1020                 $prefixLength = 0;
1021                 $paragraphStack = false;
1022
1023                 if ( !$linestart ) {
1024                         $output .= array_shift( $textLines );
1025                 }
1026                 foreach ( $textLines as $oLine ) {
1027                         $lastPrefixLength = strlen( $lastPrefix );
1028                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1029                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1030                         $nowikiOpenMatch = preg_match("/<span class=\"nowiki\"/", $oLine );
1031                         $nowikiCloseMatch = preg_match("/<\\/span  >/", $oLine );
1032                         if($nowikiOpenMatch) $nowikiFullMatch = preg_match("/^(.*)<span class=\"nowiki\"/", $oLine, $nowikiOpenMatches );
1033                         if (!$this->mInPre) {
1034                                 $this->mInPre = !empty($preOpenMatch);
1035                         }
1036                         if (!$this->mInNowiki) {
1037                                 $this->mInNowiki = !empty($nowikiOpenMatch);
1038                         }
1039                         if (
1040                                 !$this->mInPre && (!$this->mInNowiki ||
1041                                 ($nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0) )
1042                         )
1043                         {
1044                                 # Multiple prefixes may abut each other for nested lists.
1045                                 $prefixLength = strspn( $oLine, "*#:;" );
1046                                 $pref = substr( $oLine, 0, $prefixLength );
1047
1048                                 # eh?
1049                                 $pref2 = str_replace( ";", ":", $pref );
1050                                 $t = substr( $oLine, $prefixLength );
1051                         } else {
1052                                 # Don't interpret any other prefixes in preformatted text
1053                                 $prefixLength = 0;
1054                                 $pref = $pref2 = '';
1055                                 $t = $oLine;
1056                         }
1057
1058                         # List generation
1059                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1060                                 # Same as the last item, so no need to deal with nesting or opening stuff
1061                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1062                                 $paragraphStack = false;
1063
1064                                 if ( ";" == substr( $pref, -1 ) ) {
1065                                         # The one nasty exception: definition lists work like this:
1066                                         # ; title : definition text
1067                                         # So we check for : in the remainder text to split up the
1068                                         # title and definition, without b0rking links.
1069                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1070                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1071                                                 $term = $match[1];
1072                                                 $output .= $term . $this->nextItem( ":" );
1073                                                 $t = $match[2];
1074                                         }
1075                                 }
1076                         } elseif( $prefixLength || $lastPrefixLength ) {
1077                                 # Either open or close a level...
1078                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1079                                 $paragraphStack = false;
1080
1081                                 while( $commonPrefixLength < $lastPrefixLength ) {
1082                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1083                                         --$lastPrefixLength;
1084                                 }
1085                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1086                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1087                                 }
1088                                 while ( $prefixLength > $commonPrefixLength ) {
1089                                         $char = substr( $pref, $commonPrefixLength, 1 );
1090                                         $output .= $this->openList( $char );
1091
1092                                         if ( ";" == $char ) {
1093                                                 # FIXME: This is dupe of code above
1094                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1095                                                         $term = $match[1];
1096                                                         $output .= $term . $this->nextItem( ":" );
1097                                                         $t = $match[2];
1098                                                 }
1099                                         }
1100                                         ++$commonPrefixLength;
1101                                 }
1102                                 $lastPrefix = $pref2;
1103                         }
1104                         if( 0 == $prefixLength ) {
1105                                 # No prefix (not in list)--go to paragraph mode
1106                                 $uniq_prefix = UNIQ_PREFIX;
1107                                 // XXX: use a stack for nestable elements like span, table and div
1108                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1109                                 $closematch = preg_match(
1110                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1111                                         "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1112                                 if ( $openmatch or $closematch ) {
1113                                         $paragraphStack = false;
1114                                         $output .= $this->closeParagraph();
1115                                         if($preOpenMatch and !$preCloseMatch) {
1116                                                 $this->mInPre = true;
1117                                         }
1118                                         if ( $closematch  ) {
1119                                                 $inBlockElem = false;
1120                                         } else {
1121                                                 $inBlockElem = true;
1122                                         }
1123                                 } else if (
1124                                         !$inBlockElem && !$this->mInPre &&
1125                                         (!$this->mInNowiki || ($nowikiOpenMatch && trim($nowikiOpenMatches[1]) == ''  ) ) )
1126                                         {
1127                                         if ( " " == $t{0} and trim($t) != '' and (!$this->mInNowiki || $nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0 ) ) {
1128                                                 // pre
1129                                                 if ($this->mLastSection != 'pre') {
1130                                                         $paragraphStack = false;
1131                                                         $output .= $this->closeParagraph().'<pre>';
1132                                                         $this->mLastSection = 'pre';
1133                                                 }
1134                                         } else {
1135                                                 // paragraph
1136                                                 if ( '' == trim($t) ) {
1137                                                         if ( $paragraphStack ) {
1138                                                                 $output .= $paragraphStack.'<br />';
1139                                                                 $paragraphStack = false;
1140                                                                 $this->mLastSection = 'p';
1141                                                         } else {
1142                                                                 if ($this->mLastSection != 'p' ) {
1143                                                                         $output .= $this->closeParagraph();
1144                                                                         $this->mLastSection = '';
1145                                                                         $paragraphStack = "<p>";
1146                                                                 } else {
1147                                                                         $paragraphStack = '</p><p>';
1148                                                                 }
1149                                                         }
1150                                                 } else {
1151                                                         if ( $paragraphStack ) {
1152                                                                 $output .= $paragraphStack;
1153                                                                 $paragraphStack = false;
1154                                                                 $this->mLastSection = 'p';
1155                                                         } else if ($this->mLastSection != 'p') {
1156                                                                 $output .= $this->closeParagraph().'<p>';
1157                                                                 $this->mLastSection = 'p';
1158                                                         }
1159                                                 }
1160                                         }
1161                                 }
1162                         }
1163                         if($nowikiCloseMatch) $this->mInNowiki = false;
1164                         if ($paragraphStack === false) {
1165                                 $output .= $t."\n";
1166                         }
1167                 }
1168                 while ( $prefixLength ) {
1169                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1170                         --$prefixLength;
1171                 }
1172                 if ( "" != $this->mLastSection ) {
1173                         $output .= "</" . $this->mLastSection . ">";
1174                         $this->mLastSection = "";
1175                 }
1176                 $output = $this->unstrip( $output, $dblStripState );
1177
1178                 wfProfileOut( $fname );
1179                 return $output;
1180         }
1181
1182         function getVariableValue( $index ) {
1183                 global $wgLang, $wgSitename, $wgServer;
1184
1185                 switch ( $index ) {
1186                         case MAG_CURRENTMONTH:
1187                                 return date( "m" );
1188                         case MAG_CURRENTMONTHNAME:
1189                                 return $wgLang->getMonthName( date("n") );
1190                         case MAG_CURRENTMONTHNAMEGEN:
1191                                 return $wgLang->getMonthNameGen( date("n") );
1192                         case MAG_CURRENTDAY:
1193                                 return date("j");
1194                         case MAG_PAGENAME:
1195                                 return $this->mTitle->getText();
1196                         case MAG_NAMESPACE:
1197                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1198                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1199                         case MAG_CURRENTDAYNAME:
1200                                 return $wgLang->getWeekdayName( date("w")+1 );
1201                         case MAG_CURRENTYEAR:
1202                                 return date( "Y" );
1203                         case MAG_CURRENTTIME:
1204                                 return $wgLang->time( wfTimestampNow(), false );
1205                         case MAG_NUMBEROFARTICLES:
1206                                 return wfNumberOfArticles();
1207                         case MAG_SITENAME:
1208                                 return $wgSitename;
1209                         case MAG_SERVER:
1210                                 return $wgServer;
1211                         default:
1212                                 return NULL;
1213                 }
1214         }
1215
1216         function initialiseVariables()
1217         {
1218                 global $wgVariableIDs;
1219                 $this->mVariables = array();
1220                 foreach ( $wgVariableIDs as $id ) {
1221                         $mw =& MagicWord::get( $id );
1222                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1223                 }
1224         }
1225
1226         /* private */ function replaceVariables( $text, $args = array() )
1227         {
1228                 global $wgLang, $wgScript, $wgArticlePath;
1229
1230                 $fname = "Parser::replaceVariables";
1231                 wfProfileIn( $fname );
1232
1233                 $bail = false;
1234                 if ( !$this->mVariables ) {
1235                         $this->initialiseVariables();
1236                 }
1237                 $titleChars = Title::legalChars();
1238                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1239
1240                 # This function is called recursively. To keep track of arguments we need a stack:
1241                 array_push( $this->mArgStack, $args );
1242
1243                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1244                 $GLOBALS['wgCurParser'] =& $this;
1245
1246
1247                 if ( $this->mOutputType == OT_HTML ) {
1248                         # Variable substitution
1249                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1250
1251                         # Argument substitution
1252                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1253                 }
1254                 # Template substitution
1255                 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1256                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1257
1258                 array_pop( $this->mArgStack );
1259
1260                 wfProfileOut( $fname );
1261                 return $text;
1262         }
1263
1264         function variableSubstitution( $matches )
1265         {
1266                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1267                         $text = $this->mVariables[$matches[1]];
1268                         $this->mOutput->mContainsOldMagic = true;
1269                 } else {
1270                         $text = $matches[0];
1271                 }
1272                 return $text;
1273         }
1274
1275         function braceSubstitution( $matches )
1276         {
1277                 global $wgLinkCache, $wgLang;
1278                 $fname = "Parser::braceSubstitution";
1279                 $found = false;
1280                 $nowiki = false;
1281                 $noparse = false;
1282
1283                 $title = NULL;
1284
1285                 # $newline is an optional newline character before the braces
1286                 # $part1 is the bit before the first |, and must contain only title characters
1287                 # $args is a list of arguments, starting from index 0, not including $part1
1288
1289                 $newline = $matches[1];
1290                 $part1 = $matches[2];
1291                 # If the third subpattern matched anything, it will start with |
1292                 if ( $matches[3] !== "" ) {
1293                         $args = explode( "|", substr( $matches[3], 1 ) );
1294                 } else {
1295                         $args = array();
1296                 }
1297                 $argc = count( $args );
1298
1299                 # {{{}}}
1300                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1301                         $text = $matches[0];
1302                         $found = true;
1303                         $noparse = true;
1304                 }
1305
1306                 # SUBST
1307                 if ( !$found ) {
1308                         $mwSubst =& MagicWord::get( MAG_SUBST );
1309                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1310                                 if ( $this->mOutputType != OT_WIKI ) {
1311                                         # Invalid SUBST not replaced at PST time
1312                                         # Return without further processing
1313                                         $text = $matches[0];
1314                                         $found = true;
1315                                         $noparse= true;
1316                                 }
1317                         } elseif ( $this->mOutputType == OT_WIKI ) {
1318                                 # SUBST not found in PST pass, do nothing
1319                                 $text = $matches[0];
1320                                 $found = true;
1321                         }
1322                 }
1323
1324                 # MSG, MSGNW and INT
1325                 if ( !$found ) {
1326                         # Check for MSGNW:
1327                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1328                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1329                                 $nowiki = true;
1330                         } else {
1331                                 # Remove obsolete MSG:
1332                                 $mwMsg =& MagicWord::get( MAG_MSG );
1333                                 $mwMsg->matchStartAndRemove( $part1 );
1334                         }
1335
1336                         # Check if it is an internal message
1337                         $mwInt =& MagicWord::get( MAG_INT );
1338                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1339                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1340                                         $text = wfMsgReal( $part1, $args, true );
1341                                         $found = true;
1342                                 }
1343                         }
1344                 }
1345
1346                 # NS
1347                 if ( !$found ) {
1348                         # Check for NS: (namespace expansion)
1349                         $mwNs = MagicWord::get( MAG_NS );
1350                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1351                                 if ( intval( $part1 ) ) {
1352                                         $text = $wgLang->getNsText( intval( $part1 ) );
1353                                         $found = true;
1354                                 } else {
1355                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1356                                         if ( !is_null( $index ) ) {
1357                                                 $text = $wgLang->getNsText( $index );
1358                                                 $found = true;
1359                                         }
1360                                 }
1361                         }
1362                 }
1363
1364                 # LOCALURL and LOCALURLE
1365                 if ( !$found ) {
1366                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1367                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1368
1369                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1370                                 $func = 'getLocalURL';
1371                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1372                                 $func = 'escapeLocalURL';
1373                         } else {
1374                                 $func = '';
1375                         }
1376
1377                         if ( $func !== '' ) {
1378                                 $title = Title::newFromText( $part1 );
1379                                 if ( !is_null( $title ) ) {
1380                                         if ( $argc > 0 ) {
1381                                                 $text = $title->$func( $args[0] );
1382                                         } else {
1383                                                 $text = $title->$func();
1384                                         }
1385                                         $found = true;
1386                                 }
1387                         }
1388                 }
1389
1390                 # Internal variables
1391                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1392                         $text = $this->mVariables[$part1];
1393                         $found = true;
1394                         $this->mOutput->mContainsOldMagic = true;
1395                 }
1396 /*
1397                 # Arguments input from the caller
1398                 $inputArgs = end( $this->mArgStack );
1399                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1400                         $text = $inputArgs[$part1];
1401                         $found = true;
1402                 }
1403 */
1404                 # Load from database
1405                 if ( !$found ) {
1406                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1407                         if ( !is_null( $title ) && !$title->isExternal() ) {
1408                                 # Check for excessive inclusion
1409                                 $dbk = $title->getPrefixedDBkey();
1410                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1411                                         $article = new Article( $title );
1412                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1413                                         if ( $articleContent !== false ) {
1414                                                 $found = true;
1415                                                 $text = $articleContent;
1416
1417                                         }
1418                                 }
1419
1420                                 # If the title is valid but undisplayable, make a link to it
1421                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1422                                         $text = "[[" . $title->getPrefixedText() . "]]";
1423                                         $found = true;
1424                                 }
1425                         }
1426                 }
1427
1428                 # Recursive parsing, escaping and link table handling
1429                 # Only for HTML output
1430                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1431                         $text = wfEscapeWikiText( $text );
1432                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1433                         # Clean up argument array
1434                         $assocArgs = array();
1435                         $index = 1;
1436                         foreach( $args as $arg ) {
1437                                 $eqpos = strpos( $arg, "=" );
1438                                 if ( $eqpos === false ) {
1439                                         $assocArgs[$index++] = $arg;
1440                                 } else {
1441                                         $name = trim( substr( $arg, 0, $eqpos ) );
1442                                         $value = trim( substr( $arg, $eqpos+1 ) );
1443                                         if ( $value === false ) {
1444                                                 $value = "";
1445                                         }
1446                                         if ( $name !== false ) {
1447                                                 $assocArgs[$name] = $value;
1448                                         }
1449                                 }
1450                         }
1451
1452                         # Do not enter included links in link table
1453                         if ( !is_null( $title ) ) {
1454                                 $wgLinkCache->suspend();
1455                         }
1456
1457                         # Run full parser on the included text
1458                         $text = $this->stripParse( $text, $newline, $assocArgs );
1459
1460                         # Resume the link cache and register the inclusion as a link
1461                         if ( !is_null( $title ) ) {
1462                                 $wgLinkCache->resume();
1463                                 $wgLinkCache->addLinkObj( $title );
1464                         }
1465                 }
1466
1467                 if ( !$found ) {
1468                         return $matches[0];
1469                 } else {
1470                         return $text;
1471                 }
1472         }
1473
1474         # Triple brace replacement -- used for template arguments
1475         function argSubstitution( $matches )
1476         {
1477                 $newline = $matches[1];
1478                 $arg = trim( $matches[2] );
1479                 $text = $matches[0];
1480                 $inputArgs = end( $this->mArgStack );
1481
1482                 if ( array_key_exists( $arg, $inputArgs ) ) {
1483                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1484                 }
1485
1486                 return $text;
1487         }
1488
1489         # Returns true if the function is allowed to include this entity
1490         function incrementIncludeCount( $dbk )
1491         {
1492                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1493                         $this->mIncludeCount[$dbk] = 0;
1494                 }
1495                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1496                         return true;
1497                 } else {
1498                         return false;
1499                 }
1500         }
1501
1502
1503         # Cleans up HTML, removes dangerous tags and attributes
1504         /* private */ function removeHTMLtags( $text )
1505         {
1506                 global $wgUseTidy, $wgUserHtml;
1507                 $fname = "Parser::removeHTMLtags";
1508                 wfProfileIn( $fname );
1509
1510                 if( $wgUserHtml ) {
1511                         $htmlpairs = array( # Tags that must be closed
1512                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1513                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1514                                 "strike", "strong", "tt", "var", "div", "center",
1515                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1516                                 "ruby", "rt" , "rb" , "rp", "p"
1517                         );
1518                         $htmlsingle = array(
1519                                 "br", "hr", "li", "dt", "dd"
1520                         );
1521                         $htmlnest = array( # Tags that can be nested--??
1522                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1523                                 "dl", "font", "big", "small", "sub", "sup"
1524                         );
1525                         $tabletags = array( # Can only appear inside table
1526                                 "td", "th", "tr"
1527                         );
1528                 } else {
1529                         $htmlpairs = array();
1530                         $htmlsingle = array();
1531                         $htmlnest = array();
1532                         $tabletags = array();
1533                 }
1534
1535                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1536                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1537
1538                 $htmlattrs = $this->getHTMLattrs () ;
1539
1540                 # Remove HTML comments
1541                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1542
1543                 $bits = explode( "<", $text );
1544                 $text = array_shift( $bits );
1545                 if(!$wgUseTidy) {
1546                         $tagstack = array(); $tablestack = array();
1547                         foreach ( $bits as $x ) {
1548                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1549                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1550                                 $x, $regs );
1551                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1552                                 error_reporting( $prev );
1553
1554                                 $badtag = 0 ;
1555                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1556                                         # Check our stack
1557                                         if ( $slash ) {
1558                                                 # Closing a tag...
1559                                                 if ( ! in_array( $t, $htmlsingle ) &&
1560                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1561                                                         @array_push( $tagstack, $ot );
1562                                                         $badtag = 1;
1563                                                 } else {
1564                                                         if ( $t == "table" ) {
1565                                                                 $tagstack = array_pop( $tablestack );
1566                                                         }
1567                                                         $newparams = "";
1568                                                 }
1569                                         } else {
1570                                                 # Keep track for later
1571                                                 if ( in_array( $t, $tabletags ) &&
1572                                                 ! in_array( "table", $tagstack ) ) {
1573                                                         $badtag = 1;
1574                                                 } else if ( in_array( $t, $tagstack ) &&
1575                                                 ! in_array ( $t , $htmlnest ) ) {
1576                                                         $badtag = 1 ;
1577                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1578                                                         if ( $t == "table" ) {
1579                                                                 array_push( $tablestack, $tagstack );
1580                                                                 $tagstack = array();
1581                                                         }
1582                                                         array_push( $tagstack, $t );
1583                                                 }
1584                                                 # Strip non-approved attributes from the tag
1585                                                 $newparams = $this->fixTagAttributes($params);
1586
1587                                         }
1588                                         if ( ! $badtag ) {
1589                                                 $rest = str_replace( ">", "&gt;", $rest );
1590                                                 $text .= "<$slash$t $newparams$brace$rest";
1591                                                 continue;
1592                                         }
1593                                 }
1594                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1595                         }
1596                         # Close off any remaining tags
1597                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1598                                 $text .= "</$t>\n";
1599                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1600                         }
1601                 } else {
1602                         # this might be possible using tidy itself
1603                         foreach ( $bits as $x ) {
1604                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1605                                 $x, $regs );
1606                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1607                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1608                                         $newparams = $this->fixTagAttributes($params);
1609                                         $rest = str_replace( ">", "&gt;", $rest );
1610                                         $text .= "<$slash$t $newparams$brace$rest";
1611                                 } else {
1612                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1613                                 }
1614                         }
1615                 }
1616                 wfProfileOut( $fname );
1617                 return $text;
1618         }
1619
1620
1621 /*
1622  *
1623  * This function accomplishes several tasks:
1624  * 1) Auto-number headings if that option is enabled
1625  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1626  * 3) Add a Table of contents on the top for users who have enabled the option
1627  * 4) Auto-anchor headings
1628  *
1629  * It loops through all headlines, collects the necessary data, then splits up the
1630  * string and re-inserts the newly formatted headlines.
1631  *
1632  */
1633
1634         /* private */ function formatHeadings( $text, $isMain=true )
1635         {
1636                 global $wgInputEncoding;
1637
1638                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1639                 $doShowToc = $this->mOptions->getShowToc();
1640                 if( !$this->mTitle->userCanEdit() ) {
1641                         $showEditLink = 0;
1642                         $rightClickHack = 0;
1643                 } else {
1644                         $showEditLink = $this->mOptions->getEditSection();
1645                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1646                 }
1647
1648                 # Inhibit editsection links if requested in the page
1649                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1650                 if( $esw->matchAndRemove( $text ) ) {
1651                         $showEditLink = 0;
1652                 }
1653                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1654                 # do not add TOC
1655                 $mw =& MagicWord::get( MAG_NOTOC );
1656                 if( $mw->matchAndRemove( $text ) ) {
1657                         $doShowToc = 0;
1658                 }
1659
1660                 # never add the TOC to the Main Page. This is an entry page that should not
1661                 # be more than 1-2 screens large anyway
1662                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1663                         $doShowToc = 0;
1664                 }
1665
1666                 # Get all headlines for numbering them and adding funky stuff like [edit]
1667                 # links - this is for later, but we need the number of headlines right now
1668                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1669
1670                 # if there are fewer than 4 headlines in the article, do not show TOC
1671                 if( $numMatches < 4 ) {
1672                         $doShowToc = 0;
1673                 }
1674
1675                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1676                 # override above conditions and always show TOC
1677                 $mw =& MagicWord::get( MAG_FORCETOC );
1678                 if ($mw->matchAndRemove( $text ) ) {
1679                         $doShowToc = 1;
1680                 }
1681
1682
1683                 # We need this to perform operations on the HTML
1684                 $sk =& $this->mOptions->getSkin();
1685
1686                 # headline counter
1687                 $headlineCount = 0;
1688
1689                 # Ugh .. the TOC should have neat indentation levels which can be
1690                 # passed to the skin functions. These are determined here
1691                 $toclevel = 0;
1692                 $toc = "";
1693                 $full = "";
1694                 $head = array();
1695                 $sublevelCount = array();
1696                 $level = 0;
1697                 $prevlevel = 0;
1698                 foreach( $matches[3] as $headline ) {
1699                         $numbering = "";
1700                         if( $level ) {
1701                                 $prevlevel = $level;
1702                         }
1703                         $level = $matches[1][$headlineCount];
1704                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1705                                 # reset when we enter a new level
1706                                 $sublevelCount[$level] = 0;
1707                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1708                                 $toclevel += $level - $prevlevel;
1709                         }
1710                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1711                                 # reset when we step back a level
1712                                 $sublevelCount[$level+1]=0;
1713                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1714                                 $toclevel -= $prevlevel - $level;
1715                         }
1716                         # count number of headlines for each level
1717                         @$sublevelCount[$level]++;
1718                         if( $doNumberHeadings || $doShowToc ) {
1719                                 $dot = 0;
1720                                 for( $i = 1; $i <= $level; $i++ ) {
1721                                         if( !empty( $sublevelCount[$i] ) ) {
1722                                                 if( $dot ) {
1723                                                         $numbering .= ".";
1724                                                 }
1725                                                 $numbering .= $sublevelCount[$i];
1726                                                 $dot = 1;
1727                                         }
1728                                 }
1729                         }
1730
1731                         # The canonized header is a version of the header text safe to use for links
1732                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1733                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1734
1735                         # strip out HTML
1736                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1737                         $tocline = trim( $canonized_headline );
1738                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1739                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1740                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1741                         $refer[$headlineCount] = $canonized_headline;
1742
1743                         # count how many in assoc. array so we can track dupes in anchors
1744                         @$refers[$canonized_headline]++;
1745                         $refcount[$headlineCount]=$refers[$canonized_headline];
1746
1747                         # Prepend the number to the heading text
1748
1749                         if( $doNumberHeadings || $doShowToc ) {
1750                                 $tocline = $numbering . " " . $tocline;
1751
1752                                 # Don't number the heading if it is the only one (looks silly)
1753                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1754                                         # the two are different if the line contains a link
1755                                         $headline=$numbering . " " . $headline;
1756                                 }
1757                         }
1758
1759                         # Create the anchor for linking from the TOC to the section
1760                         $anchor = $canonized_headline;
1761                         if($refcount[$headlineCount] > 1 ) {
1762                                 $anchor .= "_" . $refcount[$headlineCount];
1763                         }
1764                         if( $doShowToc ) {
1765                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1766                         }
1767                         if( $showEditLink ) {
1768                                 if ( empty( $head[$headlineCount] ) ) {
1769                                         $head[$headlineCount] = "";
1770                                 }
1771                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1772                         }
1773
1774                         # Add the edit section span
1775                         if( $rightClickHack ) {
1776                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1777                         }
1778
1779                         # give headline the correct <h#> tag
1780                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1781
1782                         $headlineCount++;
1783                 }
1784
1785                 if( $doShowToc ) {
1786                         $toclines = $headlineCount;
1787                         $toc .= $sk->tocUnindent( $toclevel );
1788                         $toc = $sk->tocTable( $toc );
1789                 }
1790
1791                 # split up and insert constructed headlines
1792
1793                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1794                 $i = 0;
1795
1796                 foreach( $blocks as $block ) {
1797                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1798                             # This is the [edit] link that appears for the top block of text when
1799                                 # section editing is enabled
1800
1801                                 # Disabled because it broke block formatting
1802                                 # For example, a bullet point in the top line
1803                                 # $full .= $sk->editSectionLink(0);
1804                         }
1805                         $full .= $block;
1806                         if( $doShowToc && !$i && $isMain) {
1807                         # Top anchor now in skin
1808                                 $full = $full.$toc;
1809                         }
1810
1811                         if( !empty( $head[$i] ) ) {
1812                                 $full .= $head[$i];
1813                         }
1814                         $i++;
1815                 }
1816
1817                 return $full;
1818         }
1819
1820         /* private */ function magicISBN( $text )
1821         {
1822                 global $wgLang;
1823
1824                 $a = split( "ISBN ", " $text" );
1825                 if ( count ( $a ) < 2 ) return $text;
1826                 $text = substr( array_shift( $a ), 1);
1827                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1828
1829                 foreach ( $a as $x ) {
1830                         $isbn = $blank = "" ;
1831                         while ( " " == $x{0} ) {
1832                                 $blank .= " ";
1833                                 $x = substr( $x, 1 );
1834                         }
1835                         while ( strstr( $valid, $x{0} ) != false ) {
1836                                 $isbn .= $x{0};
1837                                 $x = substr( $x, 1 );
1838                         }
1839                         $num = str_replace( "-", "", $isbn );
1840                         $num = str_replace( " ", "", $num );
1841
1842                         if ( "" == $num ) {
1843                                 $text .= "ISBN $blank$x";
1844                         } else {
1845                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1846                                 $text .= "<a href=\"" .
1847                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1848                                         "\" class=\"internal\">ISBN $isbn</a>";
1849                                 $text .= $x;
1850                         }
1851                 }
1852                 return $text;
1853         }
1854         /* private */ function magicRFC( $text )
1855         {
1856                 global $wgLang;
1857
1858                 $a = split( "RFC ", " $text" );
1859                 if ( count ( $a ) < 2 ) return $text;
1860                 $text = substr( array_shift( $a ), 1);
1861                 $valid = "0123456789";
1862
1863                 foreach ( $a as $x ) {
1864                         $rfc = $blank = "" ;
1865                         while ( " " == $x{0} ) {
1866                                 $blank .= " ";
1867                                 $x = substr( $x, 1 );
1868                         }
1869                         while ( strstr( $valid, $x{0} ) != false ) {
1870                                 $rfc .= $x{0};
1871                                 $x = substr( $x, 1 );
1872                         }
1873
1874                         if ( "" == $rfc ) {
1875                                 $text .= "RFC $blank$x";
1876                         } else {
1877                                 $url = wfmsg( "rfcurl" );
1878                                 $url = str_replace( "$1", $rfc, $url);
1879                                 $sk =& $this->mOptions->getSkin();
1880                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1881                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1882                         }
1883                 }
1884                 return $text;
1885         }
1886
1887         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1888         {
1889                 $this->mOptions = $options;
1890                 $this->mTitle =& $title;
1891                 $this->mOutputType = OT_WIKI;
1892
1893                 if ( $clearState ) {
1894                         $this->clearState();
1895                 }
1896
1897                 $stripState = false;
1898                 $pairs = array(
1899                         "\r\n" => "\n",
1900                         );
1901                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1902                 // now with regexes
1903                 /*
1904                 $pairs = array(
1905                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1906                         "/<br *?>/i" => "<br />",
1907                 );
1908                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1909                 */
1910                 $text = $this->strip( $text, $stripState, false );
1911                 $text = $this->pstPass2( $text, $user );
1912                 $text = $this->unstrip( $text, $stripState );
1913                 return $text;
1914         }
1915
1916         /* private */ function pstPass2( $text, &$user )
1917         {
1918                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1919
1920                 # Variable replacement
1921                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1922                 $text = $this->replaceVariables( $text );
1923
1924                 # Signatures
1925                 #
1926                 $n = $user->getName();
1927                 $k = $user->getOption( "nickname" );
1928                 if ( "" == $k ) { $k = $n; }
1929                 if(isset($wgLocaltimezone)) {
1930                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1931                 }
1932                 /* Note: this is an ugly timezone hack for the European wikis */
1933                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1934                   " (" . date( "T" ) . ")";
1935                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1936
1937                 $text = preg_replace( "/~~~~~/", $d, $text );
1938                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1939                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1940                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1941                   Namespace::getUser() ) . ":$n|$k]]", $text );
1942
1943                 # Context links: [[|name]] and [[name (context)|]]
1944                 #
1945                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1946                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1947                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1948                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1949
1950                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1951                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1952                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1953                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1954                                                                                                                 # [[ns:page (cont)|]]
1955                 $context = "";
1956                 $t = $this->mTitle->getText();
1957                 if ( preg_match( $conpat, $t, $m ) ) {
1958                         $context = $m[2];
1959                 }
1960                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1961                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1962                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1963
1964                 if ( "" == $context ) {
1965                         $text = preg_replace( $p2, "[[\\1]]", $text );
1966                 } else {
1967                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1968                 }
1969
1970                 /*
1971                 $mw =& MagicWord::get( MAG_SUBST );
1972                 $wgCurParser = $this->fork();
1973                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1974                 $this->merge( $wgCurParser );
1975                 */
1976
1977                 # Trim trailing whitespace
1978                 # MAG_END (__END__) tag allows for trailing
1979                 # whitespace to be deliberately included
1980                 $text = rtrim( $text );
1981                 $mw =& MagicWord::get( MAG_END );
1982                 $mw->matchAndRemove( $text );
1983
1984                 return $text;
1985         }
1986
1987         # Set up some variables which are usually set up in parse()
1988         # so that an external function can call some class members with confidence
1989         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1990         {
1991                 $this->mTitle =& $title;
1992                 $this->mOptions = $options;
1993                 $this->mOutputType = $outputType;
1994                 if ( $clearState ) {
1995                         $this->clearState();
1996                 }
1997         }
1998
1999         function transformMsg( $text, $options ) {
2000                 global $wgTitle;
2001                 static $executing = false;
2002
2003                 # Guard against infinite recursion
2004                 if ( $executing ) {
2005                         return $text;
2006                 }
2007                 $executing = true;
2008
2009                 $this->mTitle = $wgTitle;
2010                 $this->mOptions = $options;
2011                 $this->mOutputType = OT_MSG;
2012                 $this->clearState();
2013                 $text = $this->replaceVariables( $text );
2014
2015                 $executing = false;
2016                 return $text;
2017         }
2018 }
2019
2020 class ParserOutput
2021 {
2022         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2023         var $mCacheTime; # Used in ParserCache
2024
2025         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2026                 $containsOldMagic = false )
2027         {
2028                 $this->mText = $text;
2029                 $this->mLanguageLinks = $languageLinks;
2030                 $this->mCategoryLinks = $categoryLinks;
2031                 $this->mContainsOldMagic = $containsOldMagic;
2032                 $this->mCacheTime = "";
2033         }
2034
2035         function getText() { return $this->mText; }
2036         function getLanguageLinks() { return $this->mLanguageLinks; }
2037         function getCategoryLinks() { return $this->mCategoryLinks; }
2038         function getCacheTime() { return $this->mCacheTime; }
2039         function containsOldMagic() { return $this->mContainsOldMagic; }
2040         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2041         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2042         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2043         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2044         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2045
2046         function merge( $other ) {
2047                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2048                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2049                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2050         }
2051
2052 }
2053
2054 class ParserOptions
2055 {
2056         # All variables are private
2057         var $mUseTeX;                    # Use texvc to expand <math> tags
2058         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2059         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2060         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2061         var $mAllowExternalImages;       # Allow external images inline
2062         var $mSkin;                      # Reference to the preferred skin
2063         var $mDateFormat;                # Date format index
2064         var $mEditSection;               # Create "edit section" links
2065         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2066         var $mNumberHeadings;            # Automatically number headings
2067         var $mShowToc;                   # Show table of contents
2068
2069         function getUseTeX() { return $this->mUseTeX; }
2070         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2071         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2072         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2073         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2074         function getSkin() { return $this->mSkin; }
2075         function getDateFormat() { return $this->mDateFormat; }
2076         function getEditSection() { return $this->mEditSection; }
2077         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2078         function getNumberHeadings() { return $this->mNumberHeadings; }
2079         function getShowToc() { return $this->mShowToc; }
2080
2081         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2082         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2083         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2084         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2085         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2086         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2087         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2088         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2089         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2090         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2091         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2092
2093         /* static */ function newFromUser( &$user )
2094         {
2095                 $popts = new ParserOptions;
2096                 $popts->initialiseFromUser( $user );
2097                 return $popts;
2098         }
2099
2100         function initialiseFromUser( &$userInput )
2101         {
2102                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2103
2104                 if ( !$userInput ) {
2105                         $user = new User;
2106                         $user->setLoaded( true );
2107                 } else {
2108                         $user =& $userInput;
2109                 }
2110
2111                 $this->mUseTeX = $wgUseTeX;
2112                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2113                 $this->mUseDynamicDates = $wgUseDynamicDates;
2114                 $this->mInterwikiMagic = $wgInterwikiMagic;
2115                 $this->mAllowExternalImages = $wgAllowExternalImages;
2116                 $this->mSkin =& $user->getSkin();
2117                 $this->mDateFormat = $user->getOption( "date" );
2118                 $this->mEditSection = $user->getOption( "editsection" );
2119                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2120                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2121                 $this->mShowToc = $user->getOption( "showtoc" );
2122         }
2123
2124
2125 }
2126
2127 # Regex callbacks, used in Parser::replaceVariables
2128 function wfBraceSubstitution( $matches )
2129 {
2130         global $wgCurParser;
2131         return $wgCurParser->braceSubstitution( $matches );
2132 }
2133
2134 function wfArgSubstitution( $matches )
2135 {
2136         global $wgCurParser;
2137         return $wgCurParser->argSubstitution( $matches );
2138 }
2139
2140 function wfVariableSubstitution( $matches )
2141 {
2142         global $wgCurParser;
2143         return $wgCurParser->variableSubstitution( $matches );
2144 }
2145
2146 ?>