includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 global $wgUseTidy;
  90                 $fname = "Parser::parse";
  91                 wfProfileIn( $fname );
  92
  93                 if ( $clearState ) {
  94                         $this->clearState();
  95                 }
  96
  97                 $this->mOptions = $options;
  98                 $this->mTitle =& $title;
  99                 $this->mOutputType = OT_HTML;
 100
 101                 $stripState = NULL;
 102                 $text = $this->strip( $text, $this->mStripState );
 103                 $text = $this->internalParse( $text, $linestart );
 104                 $text = $this->unstrip( $text, $this->mStripState );
 105                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 106                 if(!$wgUseTidy) {
 107                         $fixtags = array(
 108                                 # french spaces, last one Guillemet-left
 109                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 110                                 # french spaces, Guillemet-right
 111                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 112                                 "/<hr *>/i" => '<hr/>',
 113                                 "/<br *>/i" => '<br/>',
 114                                 "/<center *>/i"=>'<div class="center">',
 115                                 "/<\\/center *>/i" => '</div>',
 116                                 # Clean up spare ampersands; note that we probably ought to be
 117                                 # more careful about named entities.
 118                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 119                         );
 120                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 121                 } else {
 122                         $fixtags = array(
 123                                 # french spaces, last one Guillemet-left
 124                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 125                                 # french spaces, Guillemet-right
 126                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 127                                 "/<center *>/i"=>'<div class="center">',
 128                                 "/<\\/center *>/i" => '</div>'
 129                         );
 130                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 131                 }
 132                 # only once and last
 133                 $text = $this->doBlockLevels( $text, $linestart );
 134                 if($wgUseTidy) {
 135                         $text = $this->tidy($text);
 136                 }
 137                 $this->mOutput->setText( $text );
 138                 wfProfileOut( $fname );
 139                 return $this->mOutput;
 140         }
 141
 142         /* static */ function getRandomString()
 143         {
 144                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 145         }
 146
 147         # Replaces all occurrences of <$tag>content</$tag> in the text
 148         # with a random marker and returns the new text. the output parameter
 149         # $content will be an associative array filled with data on the form
 150         # $unique_marker => content.
 151
 152         # If $content is already set, the additional entries will be appended
 153
 154         # If $tag is set to STRIP_COMMENTS, the function will extract
 155         # <!-- HTML comments -->
 156
 157         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 158                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 159                 if ( !$content ) {
 160                         $content = array( );
 161                 }
 162                 $n = 1;
 163                 $stripped = "";
 164
 165                 while ( "" != $text ) {
 166                         if($tag==STRIP_COMMENTS) {
 167                                 $p = preg_split( "/<!--/i", $text, 2 );
 168                         } else {
 169                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 170                         }
 171                         $stripped .= $p[0];
 172                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 173                                 $text = "";
 174                         } else {
 175                                 if($tag==STRIP_COMMENTS) {
 176                                         $q = preg_split( "/-->/i", $p[1], 2 );
 177                                 } else {
 178                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 179                                 }
 180                                 $marker = $rnd . sprintf("%08X", $n++);
 181                                 $content[$marker] = $q[0];
 182                                 $stripped .= $marker;
 183                                 $text = $q[1];
 184                         }
 185                 }
 186                 return $stripped;
 187         }
 188
 189         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 190         # If $render is set, performs necessary rendering operations on plugins
 191         # Returns the text, and fills an array with data needed in unstrip()
 192         # If the $state is already a valid strip state, it adds to the state
 193
 194         # When $stripcomments is set, HTML comments <!-- like this -->
 195         # will be stripped in addition to other tags. This is important
 196         # for section editing, where these comments cause confusion when
 197         # counting the sections in the wikisource
 198         function strip( $text, &$state, $stripcomments = false )
 199         {
 200                 $render = ($this->mOutputType == OT_HTML);
 201                 $nowiki_content = array();
 202                 $hiero_content = array();
 203                 $timeline_content = array();
 204                 $math_content = array();
 205                 $pre_content = array();
 206                 $comment_content = array();
 207
 208                 # Replace any instances of the placeholders
 209                 $uniq_prefix = UNIQ_PREFIX;
 210                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 211
 212                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 213                 foreach( $nowiki_content as $marker => $content ){
 214                         if( $render ){
 215                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 216                         } else {
 217                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 218                         }
 219                 }
 220
 221                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 222                 foreach( $hiero_content as $marker => $content ){
 223                         if( $render && $GLOBALS['wgUseWikiHiero']){
 224                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 225                         } else {
 226                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 227                         }
 228                 }
 229
 230                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 231                 foreach( $timeline_content as $marker => $content ){
 232                         if( $render && $GLOBALS['wgUseTimeline']){
 233                                 $timeline_content[$marker] = renderTimeline( $content );
 234                         } else {
 235                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 236                         }
 237                 }
 238
 239                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 240                 foreach( $math_content as $marker => $content ){
 241                         if( $render ) {
 242                                 if( $this->mOptions->getUseTeX() ) {
 243                                         $math_content[$marker] = renderMath( $content );
 244                                 } else {
 245                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 246                                 }
 247                         } else {
 248                                 $math_content[$marker] = "<math>$content</math>";
 249                         }
 250                 }
 251
 252                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 253                 foreach( $pre_content as $marker => $content ){
 254                         if( $render ){
 255                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 256                         } else {
 257                                 $pre_content[$marker] = "<pre>$content</pre>";
 258                         }
 259                 }
 260                 if($stripcomments) {
 261                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 262                         foreach( $comment_content as $marker => $content ){
 263                                 $comment_content[$marker] = "<!--$content-->";
 264                         }
 265                 }
 266
 267                 # Merge state with the pre-existing state, if there is one
 268                 if ( $state ) {
 269                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 270                         $state['hiero'] = $state['hiero'] + $hiero_content;
 271                         $state['timeline'] = $state['timeline'] + $timeline_content;
 272                         $state['math'] = $state['math'] + $math_content;
 273                         $state['pre'] = $state['pre'] + $pre_content;
 274                         $state['comment'] = $state['comment'] + $comment_content;
 275                 } else {
 276                         $state = array(
 277                           'nowiki' => $nowiki_content,
 278                           'hiero' => $hiero_content,
 279                           'timeline' => $timeline_content,
 280                           'math' => $math_content,
 281                           'pre' => $pre_content,
 282                           'comment' => $comment_content
 283                         );
 284                 }
 285                 return $text;
 286         }
 287
 288         function unstrip( $text, &$state )
 289         {
 290                 # Must expand in reverse order, otherwise nested tags will be corrupted
 291                 $contentDict = end( $state );
 292                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 293                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 294                                 $text = str_replace( key( $contentDict ), $content, $text );
 295                         }
 296                 }
 297
 298                 return $text;
 299         }
 300
 301         # Add an item to the strip state
 302         # Returns the unique tag which must be inserted into the stripped text
 303         # The tag will be replaced with the original text in unstrip()
 304
 305         function insertStripItem( $text, &$state )
 306         {
 307                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 308                 if ( !$state ) {
 309                         $state = array(
 310                           'nowiki' => array(),
 311                           'hiero' => array(),
 312                           'math' => array(),
 313                           'pre' => array()
 314                         );
 315                 }
 316                 $state['item'][$rnd] = $text;
 317                 return $rnd;
 318         }
 319
 320         # This method generates the list of subcategories and pages for a category
 321         function categoryMagic ()
 322         {
 323                 global $wgLang , $wgUser ;
 324                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 325
 326                 $cns = Namespace::getCategory() ;
 327                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 328
 329                 $r = "<br style=\"clear:both;\"/>\n";
 330
 331
 332                 $sk =& $wgUser->getSkin() ;
 333
 334                 $articles = array() ;
 335                 $children = array() ;
 336                 $data = array () ;
 337                 $id = $this->mTitle->getArticleID() ;
 338
 339                 # FIXME: add limits
 340                 $t = wfStrencode( $this->mTitle->getDBKey() );
 341                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 342                 $res = wfQuery ( $sql, DB_READ ) ;
 343                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 344
 345                 # For all pages that link to this category
 346                 foreach ( $data AS $x )
 347                 {
 348                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 349                         if ( $t != "" ) $t .= ":" ;
 350                         $t .= $x->cur_title ;
 351
 352                         if ( $x->cur_namespace == $cns ) {
 353                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 354                         } else {
 355                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 356                         }
 357                 }
 358                 wfFreeResult ( $res ) ;
 359
 360                 # Showing subcategories
 361                 if ( count ( $children ) > 0 ) {
 362                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 363                         $r .= implode ( ", " , $children ) ;
 364                 }
 365
 366                 # Showing pages in this category
 367                 if ( count ( $articles ) > 0 ) {
 368                         $ti = $this->mTitle->getText() ;
 369                         $h =  wfMsg( "category_header", $ti );
 370                         $r .= "<h2>{$h}</h2>\n" ;
 371                         $r .= implode ( ", " , $articles ) ;
 372                 }
 373
 374
 375                 return $r ;
 376         }
 377
 378         function getHTMLattrs ()
 379         {
 380                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 381                                 "title", "align", "lang", "dir", "width", "height",
 382                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 383                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 384                                 /* FONT */ "type", "start", "value", "compact",
 385                                 /* For various lists, mostly deprecated but safe */
 386                                 "summary", "width", "border", "frame", "rules",
 387                                 "cellspacing", "cellpadding", "valign", "char",
 388                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 389                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 390                                 "id", "class", "name", "style" /* For CSS */
 391                                 );
 392                 return $htmlattrs ;
 393         }
 394
 395         function fixTagAttributes ( $t )
 396         {
 397                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 398                 $htmlattrs = $this->getHTMLattrs() ;
 399
 400                 # Strip non-approved attributes from the tag
 401                 $t = preg_replace(
 402                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 403                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 404                         $t);
 405                 # Strip javascript "expression" from stylesheets. Brute force approach:
 406                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 407
 408                 if( preg_match(
 409                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 410                         wfMungeToUtf8( $t ) ) )
 411                 {
 412                         $t="";
 413                 }
 414
 415                 return trim ( $t ) ;
 416         }
 417
 418         /* interface with html tidy, used if $wgUseTidy = true */
 419         function tidy ( $text ) {
 420                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 421                 global $wgInputEncoding, $wgOutputEncoding;
 422                 $fname = "Parser::tidy";
 423                 wfProfileIn( $fname );
 424
 425                 $cleansource = '';
 426                 switch(strtoupper($wgOutputEncoding)) {
 427                         case 'ISO-8859-1':
 428                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 429                                 break;
 430                         case 'UTF-8':
 431                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 432                                 break;
 433                         default:
 434                                 $wgTidyOpts .= ' -raw';
 435                         }
 436
 437                 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 438 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 439 '<head><title>test</title></head><body>'.$text.'</body></html>';
 440                 $descriptorspec = array(
 441                         0 => array("pipe", "r"),
 442                         1 => array("pipe", "w"),
 443                         2 => array("file", "/dev/null", "a")
 444                 );
 445                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 446                 if (is_resource($process)) {
 447                         fwrite($pipes[0], $text);
 448                         fclose($pipes[0]);
 449                         while (!feof($pipes[1])) {
 450                                 $cleansource .= fgets($pipes[1], 1024);
 451                         }
 452                         fclose($pipes[1]);
 453                         $return_value = proc_close($process);
 454                 }
 455
 456                 wfProfileOut( $fname );
 457
 458                 if( $cleansource == '' && $text != '') {
 459                         wfDebug( "Tidy error detected!\n" );
 460                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 461                 } else {
 462                         return $cleansource;
 463                 }
 464         }
 465
 466         function doTableStuff ( $t )
 467         {
 468                 $t = explode ( "\n" , $t ) ;
 469                 $td = array () ; # Is currently a td tag open?
 470                         $ltd = array () ; # Was it TD or TH?
 471                         $tr = array () ; # Is currently a tr tag open?
 472                         $ltr = array () ; # tr attributes
 473                         foreach ( $t AS $k => $x )
 474                         {
 475                                 $x = trim ( $x ) ;
 476                                 $fc = substr ( $x , 0 , 1 ) ;
 477                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 478                                 {
 479                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 480                                         array_push ( $td , false ) ;
 481                                         array_push ( $ltd , "" ) ;
 482                                         array_push ( $tr , false ) ;
 483                                         array_push ( $ltr , "" ) ;
 484                                 }
 485                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 486                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 487                                 {
 488                                         $z = "</table>\n" ;
 489                                         $l = array_pop ( $ltd ) ;
 490                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 491                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 492                                         array_pop ( $ltr ) ;
 493                                         $t[$k] = $z ;
 494                                 }
 495                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 496                                                 {
 497                                                 $z = trim ( substr ( $x , 2 ) ) ;
 498                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 499                                                 }*/
 500                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 501                                 {
 502                                         $x = substr ( $x , 1 ) ;
 503                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 504                                         $z = "" ;
 505                                         $l = array_pop ( $ltd ) ;
 506                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 507                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 508                                         array_pop ( $ltr ) ;
 509                                         $t[$k] = $z ;
 510                                         array_push ( $tr , false ) ;
 511                                         array_push ( $td , false ) ;
 512                                         array_push ( $ltd , "" ) ;
 513                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 514                                 }
 515                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 516                                 {
 517                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 518                                         {
 519                                                 $fc = "+" ;
 520                                                 $x = substr ( $x , 1 ) ;
 521                                         }
 522                                         $after = substr ( $x , 1 ) ;
 523                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 524                                         $after = explode ( "||" , $after ) ;
 525                                         $t[$k] = "" ;
 526                                         foreach ( $after AS $theline )
 527                                         {
 528                                                 $z = "" ;
 529                                                 if ( $fc != "+" )
 530                                                 {
 531                                                         $tra = array_pop ( $ltr ) ;
 532                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 533                                                         array_push ( $tr , true ) ;
 534                                                         array_push ( $ltr , "" ) ;
 535                                                 }
 536
 537                                                 $l = array_pop ( $ltd ) ;
 538                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 539                                                 if ( $fc == "|" ) $l = "td" ;
 540                                                 else if ( $fc == "!" ) $l = "th" ;
 541                                                 else if ( $fc == "+" ) $l = "caption" ;
 542                                                 else $l = "" ;
 543                                                 array_push ( $ltd , $l ) ;
 544                                                 $y = explode ( "|" , $theline , 2 ) ;
 545                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 546                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 547                                                 $t[$k] .= $y ;
 548                                                 array_push ( $td , true ) ;
 549                                         }
 550                                 }
 551                         }
 552
 553                 # Closing open td, tr && table
 554                 while ( count ( $td ) > 0 )
 555                 {
 556                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 557                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 558                         $t[] = "</table>" ;
 559                 }
 560
 561                 $t = implode ( "\n" , $t ) ;
 562                 #               $t = $this->removeHTMLtags( $t );
 563                 return $t ;
 564         }
 565
 566         # Parses the text and adds the result to the strip state
 567         # Returns the strip tag
 568         function stripParse( $text, $linestart, $args )
 569         {
 570                 $text = $this->strip( $text, $this->mStripState );
 571                 $text = $this->internalParse( $text, $linestart, $args, false );
 572                 if( $linestart ) {
 573                         $text = "\n" . $text;
 574                 }
 575                 return $this->insertStripItem( $text, $this->mStripState );
 576         }
 577
 578         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 579         {
 580                 $fname = "Parser::internalParse";
 581                 wfProfileIn( $fname );
 582
 583                 $text = $this->removeHTMLtags( $text );
 584                 $text = $this->replaceVariables( $text, $args );
 585
 586                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
 587
 588                 $text = $this->doHeadings( $text );
 589                 if($this->mOptions->getUseDynamicDates()) {
 590                         global $wgDateFormatter;
 591                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 592                 }
 593                 $text = $this->doAllQuotes( $text );
 594                 $text = $this->replaceExternalLinks( $text );
 595                 $text = $this->replaceInternalLinks ( $text );
 596                 $text = $this->replaceInternalLinks ( $text );
 597                 //$text = $this->doTokenizedParser ( $text );
 598                 $text = $this->doTableStuff ( $text ) ;
 599                 $text = $this->magicISBN( $text );
 600                 $text = $this->magicRFC( $text );
 601                 $text = $this->formatHeadings( $text, $isMain );
 602                 $sk =& $this->mOptions->getSkin();
 603                 $text = $sk->transformContent( $text );
 604
 605                 if ( !isset ( $this->categoryMagicDone ) ) {
 606                         $text .= $this->categoryMagic () ;
 607                         $this->categoryMagicDone = true ;
 608                 }
 609
 610                 wfProfileOut( $fname );
 611                 return $text;
 612         }
 613
 614
 615         /* private */ function doHeadings( $text )
 616         {
 617                 for ( $i = 6; $i >= 1; --$i ) {
 618                         $h = substr( "======", 0, $i );
 619                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 620                           "<h{$i}>\\1</h{$i}>\\2", $text );
 621                 }
 622                 return $text;
 623         }
 624
 625         /* private */ function doAllQuotes( $text )
 626         {
 627                 $outtext = "";
 628                 $lines = explode( "\n", $text );
 629                 foreach ( $lines as $line ) {
 630                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 631                 }
 632                 return substr($outtext, 0,-1);
 633         }
 634
 635         /* private */ function doQuotes( $pre, $text, $mode )
 636         {
 637                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 638                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 639                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 640                         if ( substr ($m[2], 0, 1) == "'" ) {
 641                                 $m[2] = substr ($m[2], 1);
 642                                 if ($mode == "em") {
 643                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 644                                 } else if ($mode == "strong") {
 645                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 646                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 647                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 648                                 } else if ($mode == "strongem") {
 649                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 650                                 } else {
 651                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 652                                 }
 653                         } else {
 654                                 if ($mode == "strong") {
 655                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 656                                 } else if ($mode == "em") {
 657                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 658                                 } else if ($mode == "emstrong") {
 659                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 660                                 } else if (($mode == "strongem") || ($mode == "both")) {
 661                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 662                                 } else {
 663                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 664                                 }
 665                         }
 666                 } else {
 667                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 668                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 669                         if ($mode == "") {
 670                                 return $pre . $text;
 671                         } else if ($mode == "em") {
 672                                 return $pre . $text_em;
 673                         } else if ($mode == "strong") {
 674                                 return $pre . $text_strong;
 675                         } else if ($mode == "strongem") {
 676                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 677                         } else {
 678                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 679                         }
 680                 }
 681         }
 682
 683         # Note: we have to do external links before the internal ones,
 684         # and otherwise take great care in the order of things here, so
 685         # that we don't end up interpreting some URLs twice.
 686
 687         /* private */ function replaceExternalLinks( $text )
 688         {
 689                 $fname = "Parser::replaceExternalLinks";
 690                 wfProfileIn( $fname );
 691                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 692                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 693                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 694                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 695                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 696                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 697                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 698                 wfProfileOut( $fname );
 699                 return $text;
 700         }
 701
 702         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 703         {
 704                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 705                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 706
 707                 # this is  the list of separators that should be ignored if they
 708                 # are the last character of an URL but that should be included
 709                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 710                 # in this case, the last comma should not become part of the URL,
 711                 # but in "www.foo.com/123,2342,32.htm" it should.
 712                 $sep = ",;\.:";
 713                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 714                 $images = "gif|png|jpg|jpeg";
 715
 716                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 717                 # they are interpreted as part of the string (used to tell PHP
 718                 # that the content of the string should be inserted there).
 719                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 720                   "((?i){$images})([^{$uc}]|$)/";
 721
 722                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 723                 $sk =& $this->mOptions->getSkin();
 724
 725                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 726                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 727                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 728                 }
 729                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 730                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 731                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 732                   "</a>\\5", $s );
 733                 $s = str_replace( $unique, $protocol, $s );
 734
 735                 $a = explode( "[{$protocol}:", " " . $s );
 736                 $s = array_shift( $a );
 737                 $s = substr( $s, 1 );
 738
 739                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 740                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 741
 742                 foreach ( $a as $line ) {
 743                         if ( preg_match( $e1, $line, $m ) ) {
 744                                 $link = "{$protocol}:{$m[1]}";
 745                                 $trail = $m[2];
 746                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 747                                 else { $text = wfEscapeHTML( $link ); }
 748                         } else if ( preg_match( $e2, $line, $m ) ) {
 749                                 $link = "{$protocol}:{$m[1]}";
 750                                 $text = $m[2];
 751                                 $trail = $m[3];
 752                         } else {
 753                                 $s .= "[{$protocol}:" . $line;
 754                                 continue;
 755                         }
 756                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 757                                 $paren = "";
 758                         } else {
 759                                 # Expand the URL for printable version
 760                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 761                         }
 762                         $la = $sk->getExternalLinkAttributes( $link, $text );
 763                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 764
 765                 }
 766                 return $s;
 767         }
 768
 769
 770         /* private */ function replaceInternalLinks( $s )
 771         {
 772                 global $wgLang, $wgLinkCache;
 773                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 774                 static $fname = "Parser::replaceInternalLink" ;
 775                 wfProfileIn( $fname );
 776
 777                 wfProfileIn( "$fname-setup" );
 778                 static $tc = FALSE;
 779                 # the % is needed to support urlencoded titles as well
 780                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 781                 $sk =& $this->mOptions->getSkin();
 782
 783                 $a = explode( "[[", " " . $s );
 784                 $s = array_shift( $a );
 785                 $s = substr( $s, 1 );
 786
 787                 # Match a link having the form [[namespace:link|alternate]]trail
 788                 static $e1 = FALSE;
 789                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 790                 # Match the end of a line for a word that's not followed by whitespace,
 791                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 792                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 793                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 794                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 795
 796
 797                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 798                 static $image = FALSE;
 799                 static $special = FALSE;
 800                 static $media = FALSE;
 801                 static $category = FALSE;
 802                 if ( !$image ) { $image = Namespace::getImage(); }
 803                 if ( !$special ) { $special = Namespace::getSpecial(); }
 804                 if ( !$media ) { $media = Namespace::getMedia(); }
 805                 if ( !$category ) { $category = Namespace::getCategory(); }
 806
 807                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 808
 809                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 810                         $new_prefix = $m[2];
 811                         $s = $m[1];
 812                 } else {
 813                         $new_prefix="";
 814                 }
 815
 816                 wfProfileOut( "$fname-setup" );
 817
 818                 foreach ( $a as $line ) {
 819                         $prefix = $new_prefix;
 820
 821                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 822                                 $text = $m[2];
 823                                 # fix up urlencoded title texts
 824                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 825                                 $trail = $m[3];
 826                         } else { # Invalid form; output directly
 827                                 $s .= $prefix . "[[" . $line ;
 828                                 wfProfileOut( $fname );
 829                                 continue;
 830                         }
 831
 832                         /* Valid link forms:
 833                         Foobar -- normal
 834                         :Foobar -- override special treatment of prefix (images, language links)
 835                         /Foobar -- convert to CurrentPage/Foobar
 836                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 837                         */
 838                         $c = substr($m[1],0,1);
 839                         $noforce = ($c != ":");
 840                         if( $c == "/" ) { # subpage
 841                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 842                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 843                                         $noslash=$m[1];
 844                                 } else {
 845                                         $noslash=substr($m[1],1);
 846                                 }
 847                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 848                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 849                                         if( "" == $text ) {
 850                                                 $text= $m[1];
 851                                         } # this might be changed for ugliness reasons
 852                                 } else {
 853                                         $link = $noslash; # no subpage allowed, use standard link
 854                                 }
 855                         } elseif( $noforce ) { # no subpage
 856                                 $link = $m[1];
 857                         } else {
 858                                 $link = substr( $m[1], 1 );
 859                         }
 860                         $wasblank = ( "" == $text );
 861                         if( $wasblank )
 862                         $text = $link;
 863
 864                         $nt = Title::newFromText( $link );
 865                         if( !$nt ) {
 866                                 $s .= $prefix . "[[" . $line;
 867                                 wfProfileOut( $fname );
 868                                 continue;
 869                         }
 870                         $ns = $nt->getNamespace();
 871                         $iw = $nt->getInterWiki();
 872                         if( $noforce ) {
 873                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 874                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 875                                         $tmp = $prefix . $trail ;
 876                                         wfProfileOut( $fname );
 877                                         $s .= (trim($tmp) == '')? '': $tmp;
 878                                         continue;
 879                                 }
 880                                 if ( $ns == $image ) {
 881                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 882                                         $wgLinkCache->addImageLinkObj( $nt );
 883                                         wfProfileOut( $fname );
 884                                         continue;
 885                                 }
 886                                 if ( $ns == $category ) {
 887                                         $t = $nt->getText() ;
 888                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 889
 890                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 891                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 892                                         $wgLinkCache->resume();
 893
 894                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 895                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 896                                         $this->mOutput->mCategoryLinks[] = $t ;
 897                                         $s .= $prefix . $trail ;
 898                                         wfProfileOut( $fname );
 899                                         continue;
 900                                 }
 901                         }
 902                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 903                         ( strpos( $link, "#" ) == FALSE ) ) {
 904                                 # Self-links are handled specially; generally de-link and change to bold.
 905                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 906                                 wfProfileOut( $fname );
 907                                 continue;
 908                         }
 909
 910                         if( $ns == $media ) {
 911                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 912                                 $wgLinkCache->addImageLinkObj( $nt );
 913                                 wfProfileOut( $fname );
 914                                 continue;
 915                         } elseif( $ns == $special ) {
 916                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 917                                 wfProfileOut( $fname );
 918                                 continue;
 919                         }
 920                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 921                 }
 922                 wfProfileOut( $fname );
 923                 return $s;
 924         }
 925
 926         # Some functions here used by doBlockLevels()
 927         #
 928         /* private */ function closeParagraph()
 929         {
 930                 $result = "";
 931                 if ( '' != $this->mLastSection ) {
 932                         $result = "</" . $this->mLastSection  . ">\n";
 933                 }
 934                 $this->mInPre = false;
 935                 $this->mLastSection = "";
 936                 return $result;
 937         }
 938         # getCommon() returns the length of the longest common substring
 939         # of both arguments, starting at the beginning of both.
 940         #
 941         /* private */ function getCommon( $st1, $st2 )
 942         {
 943                 $fl = strlen( $st1 );
 944                 $shorter = strlen( $st2 );
 945                 if ( $fl < $shorter ) { $shorter = $fl; }
 946
 947                 for ( $i = 0; $i < $shorter; ++$i ) {
 948                         if ( $st1{$i} != $st2{$i} ) { break; }
 949                 }
 950                 return $i;
 951         }
 952         # These next three functions open, continue, and close the list
 953         # element appropriate to the prefix character passed into them.
 954         #
 955         /* private */ function openList( $char )
 956     {
 957                 $result = $this->closeParagraph();
 958
 959                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 960                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 961                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 962                 else if ( ";" == $char ) {
 963                         $result .= "<dl><dt>";
 964                         $this->mDTopen = true;
 965                 }
 966                 else { $result = "<!-- ERR 1 -->"; }
 967
 968                 return $result;
 969         }
 970
 971         /* private */ function nextItem( $char )
 972         {
 973                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 974                 else if ( ":" == $char || ";" == $char ) {
 975                         $close = "</dd>";
 976                         if ( $this->mDTopen ) { $close = "</dt>"; }
 977                         if ( ";" == $char ) {
 978                                 $this->mDTopen = true;
 979                                 return $close . "<dt>";
 980                         } else {
 981                                 $this->mDTopen = false;
 982                                 return $close . "<dd>";
 983                         }
 984                 }
 985                 return "<!-- ERR 2 -->";
 986         }
 987
 988         /* private */function closeList( $char )
 989         {
 990                 if ( "*" == $char ) { $text = "</li></ul>"; }
 991                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 992                 else if ( ":" == $char ) {
 993                         if ( $this->mDTopen ) {
 994                                 $this->mDTopen = false;
 995                                 $text = "</dt></dl>";
 996                         } else {
 997                                 $text = "</dd></dl>";
 998                         }
 999                 }
1000                 else {  return "<!-- ERR 3 -->"; }
1001                 return $text."\n";
1002         }
1003
1004         /* private */ function doBlockLevels( $text, $linestart ) {
1005                 $fname = "Parser::doBlockLevels";
1006                 wfProfileIn( $fname );
1007
1008                 # Parsing through the text line by line.  The main thing
1009                 # happening here is handling of block-level elements p, pre,
1010                 # and making lists from lines starting with * # : etc.
1011                 #
1012                 $textLines = explode( "\n", $text );
1013
1014                 $lastPrefix = $output = $lastLine = '';
1015                 $this->mDTopen = $inBlockElem = false;
1016                 $prefixLength = 0;
1017                 $paragraphStack = false;
1018
1019                 if ( !$linestart ) {
1020                         $output .= array_shift( $textLines );
1021                 }
1022                 foreach ( $textLines as $oLine ) {
1023                         $lastPrefixLength = strlen( $lastPrefix );
1024                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1025                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1026                         if (!$this->mInPre) {
1027                                 $this->mInPre = !empty($preOpenMatch);
1028                         }
1029                         if ( !$this->mInPre ) {
1030                                 # Multiple prefixes may abut each other for nested lists.
1031                                 $prefixLength = strspn( $oLine, "*#:;" );
1032                                 $pref = substr( $oLine, 0, $prefixLength );
1033
1034                                 # eh?
1035                                 $pref2 = str_replace( ";", ":", $pref );
1036                                 $t = substr( $oLine, $prefixLength );
1037                         } else {
1038                                 # Don't interpret any other prefixes in preformatted text
1039                                 $prefixLength = 0;
1040                                 $pref = $pref2 = '';
1041                                 $t = $oLine;
1042                         }
1043
1044                         # List generation
1045                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1046                                 # Same as the last item, so no need to deal with nesting or opening stuff
1047                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1048                                 $paragraphStack = false;
1049
1050                                 if ( ";" == substr( $pref, -1 ) ) {
1051                                         # The one nasty exception: definition lists work like this:
1052                                         # ; title : definition text
1053                                         # So we check for : in the remainder text to split up the
1054                                         # title and definition, without b0rking links.
1055                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1056                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1057                                                 $term = $match[1];
1058                                                 $output .= $term . $this->nextItem( ":" );
1059                                                 $t = $match[2];
1060                                         }
1061                                 }
1062                         } elseif( $prefixLength || $lastPrefixLength ) {
1063                                 # Either open or close a level...
1064                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1065                                 $paragraphStack = false;
1066
1067                                 while( $commonPrefixLength < $lastPrefixLength ) {
1068                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1069                                         --$lastPrefixLength;
1070                                 }
1071                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1072                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1073                                 }
1074                                 while ( $prefixLength > $commonPrefixLength ) {
1075                                         $char = substr( $pref, $commonPrefixLength, 1 );
1076                                         $output .= $this->openList( $char );
1077
1078                                         if ( ";" == $char ) {
1079                                                 # FIXME: This is dupe of code above
1080                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1081                                                         $term = $match[1];
1082                                                         $output .= $term . $this->nextItem( ":" );
1083                                                         $t = $match[2];
1084                                                 }
1085                                         }
1086                                         ++$commonPrefixLength;
1087                                 }
1088                                 $lastPrefix = $pref2;
1089                         }
1090                         if( 0 == $prefixLength ) {
1091                                 # No prefix (not in list)--go to paragraph mode
1092                                 $uniq_prefix = UNIQ_PREFIX;
1093                                 // XXX: use a stack for nestable elements like span, table and div
1094                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1095                                 $closematch = preg_match(
1096                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1097                                         "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1098                                 if ( $openmatch or $closematch ) {
1099                                         $paragraphStack = false;
1100                                         $output .= $this->closeParagraph();
1101                                         if($preOpenMatch and !$preCloseMatch) {
1102                                                 $this->mInPre = true;
1103                                         }
1104                                         if ( $closematch  ) {
1105                                                 $inBlockElem = false;
1106                                         } else {
1107                                                 $inBlockElem = true;
1108                                         }
1109                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1110                                         if ( " " == $t{0} and trim($t) != '' ) {
1111                                                 // pre
1112                                                 if ($this->mLastSection != 'pre') {
1113                                                         $paragraphStack = false;
1114                                                         $output .= $this->closeParagraph().'<pre>';
1115                                                         $this->mLastSection = 'pre';
1116                                                 }
1117                                         } else {
1118                                                 // paragraph
1119                                                 if ( '' == trim($t) ) {
1120                                                         if ( $paragraphStack ) {
1121                                                                 $output .= $paragraphStack.'<br/>';
1122                                                                 $paragraphStack = false;
1123                                                                 $this->mLastSection = 'p';
1124                                                         } else {
1125                                                                 if ($this->mLastSection != 'p' ) {
1126                                                                         $output .= $this->closeParagraph();
1127                                                                         $this->mLastSection = '';
1128                                                                         $paragraphStack = "<p>";
1129                                                                 } else {
1130                                                                         $paragraphStack = '</p><p>';
1131                                                                 }
1132                                                         }
1133                                                 } else {
1134                                                         if ( $paragraphStack ) {
1135                                                                 $output .= $paragraphStack;
1136                                                                 $paragraphStack = false;
1137                                                                 $this->mLastSection = 'p';
1138                                                         } else if ($this->mLastSection != 'p') {
1139                                                                 $output .= $this->closeParagraph().'<p>';
1140                                                                 $this->mLastSection = 'p';
1141                                                         }
1142                                                 }
1143                                         }
1144                                 }
1145                         }
1146                         if ($paragraphStack === false) {
1147                                 $output .= $t."\n";
1148                         }
1149                 }
1150                 while ( $prefixLength ) {
1151                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1152                         --$prefixLength;
1153                 }
1154                 if ( "" != $this->mLastSection ) {
1155                         $output .= "</" . $this->mLastSection . ">";
1156                         $this->mLastSection = "";
1157                 }
1158
1159                 wfProfileOut( $fname );
1160                 return $output;
1161         }
1162
1163         function getVariableValue( $index ) {
1164                 global $wgLang, $wgSitename, $wgServer;
1165
1166                 switch ( $index ) {
1167                         case MAG_CURRENTMONTH:
1168                                 return date( "m" );
1169                         case MAG_CURRENTMONTHNAME:
1170                                 return $wgLang->getMonthName( date("n") );
1171                         case MAG_CURRENTMONTHNAMEGEN:
1172                                 return $wgLang->getMonthNameGen( date("n") );
1173                         case MAG_CURRENTDAY:
1174                                 return date("j");
1175                         case MAG_PAGENAME:
1176                                 return $this->mTitle->getText();
1177                         case MAG_NAMESPACE:
1178                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1179                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1180                         case MAG_CURRENTDAYNAME:
1181                                 return $wgLang->getWeekdayName( date("w")+1 );
1182                         case MAG_CURRENTYEAR:
1183                                 return date( "Y" );
1184                         case MAG_CURRENTTIME:
1185                                 return $wgLang->time( wfTimestampNow(), false );
1186                         case MAG_NUMBEROFARTICLES:
1187                                 return wfNumberOfArticles();
1188                         case MAG_SITENAME:
1189                                 return $wgSitename;
1190                         case MAG_SERVER:
1191                                 return $wgServer;
1192                         default:
1193                                 return NULL;
1194                 }
1195         }
1196
1197         function initialiseVariables()
1198         {
1199                 global $wgVariableIDs;
1200                 $this->mVariables = array();
1201                 foreach ( $wgVariableIDs as $id ) {
1202                         $mw =& MagicWord::get( $id );
1203                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1204                 }
1205         }
1206
1207         /* private */ function replaceVariables( $text, $args = array() )
1208         {
1209                 global $wgLang, $wgScript, $wgArticlePath;
1210
1211                 $fname = "Parser::replaceVariables";
1212                 wfProfileIn( $fname );
1213
1214                 $bail = false;
1215                 if ( !$this->mVariables ) {
1216                         $this->initialiseVariables();
1217                 }
1218                 $titleChars = Title::legalChars();
1219                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1220
1221                 # This function is called recursively. To keep track of arguments we need a stack:
1222                 array_push( $this->mArgStack, $args );
1223
1224                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1225                 $GLOBALS['wgCurParser'] =& $this;
1226
1227
1228                 if ( $this->mOutputType == OT_HTML ) {
1229                         # Variable substitution
1230                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1231
1232                         # Argument substitution
1233                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1234                 }
1235                 # Template substitution
1236                 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1237                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1238
1239                 array_pop( $this->mArgStack );
1240
1241                 wfProfileOut( $fname );
1242                 return $text;
1243         }
1244
1245         function variableSubstitution( $matches )
1246         {
1247                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1248                         $text = $this->mVariables[$matches[1]];
1249                         $this->mOutput->mContainsOldMagic = true;
1250                 } else {
1251                         $text = $matches[0];
1252                 }
1253                 return $text;
1254         }
1255
1256         function braceSubstitution( $matches )
1257         {
1258                 global $wgLinkCache, $wgLang;
1259                 $fname = "Parser::braceSubstitution";
1260                 $found = false;
1261                 $nowiki = false;
1262                 $noparse = false;
1263
1264                 $title = NULL;
1265
1266                 # $newline is an optional newline character before the braces
1267                 # $part1 is the bit before the first |, and must contain only title characters
1268                 # $args is a list of arguments, starting from index 0, not including $part1
1269
1270                 $newline = $matches[1];
1271                 $part1 = $matches[2];
1272                 # If the third subpattern matched anything, it will start with |
1273                 if ( $matches[3] !== "" ) {
1274                         $args = explode( "|", substr( $matches[3], 1 ) );
1275                 } else {
1276                         $args = array();
1277                 }
1278                 $argc = count( $args );
1279
1280                 # {{{}}}
1281                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1282                         $text = $matches[0];
1283                         $found = true;
1284                         $noparse = true;
1285                 }
1286
1287                 # SUBST
1288                 if ( !$found ) {
1289                         $mwSubst =& MagicWord::get( MAG_SUBST );
1290                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1291                                 if ( $this->mOutputType != OT_WIKI ) {
1292                                         # Invalid SUBST not replaced at PST time
1293                                         # Return without further processing
1294                                         $text = $matches[0];
1295                                         $found = true;
1296                                         $noparse= true;
1297                                 }
1298                         } elseif ( $this->mOutputType == OT_WIKI ) {
1299                                 # SUBST not found in PST pass, do nothing
1300                                 $text = $matches[0];
1301                                 $found = true;
1302                         }
1303                 }
1304
1305                 # MSG, MSGNW and INT
1306                 if ( !$found ) {
1307                         # Check for MSGNW:
1308                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1309                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1310                                 $nowiki = true;
1311                         } else {
1312                                 # Remove obsolete MSG:
1313                                 $mwMsg =& MagicWord::get( MAG_MSG );
1314                                 $mwMsg->matchStartAndRemove( $part1 );
1315                         }
1316
1317                         # Check if it is an internal message
1318                         $mwInt =& MagicWord::get( MAG_INT );
1319                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1320                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1321                                         $text = wfMsgReal( $part1, $args, true );
1322                                         $found = true;
1323                                 }
1324                         }
1325                 }
1326
1327                 # NS
1328                 if ( !$found ) {
1329                         # Check for NS: (namespace expansion)
1330                         $mwNs = MagicWord::get( MAG_NS );
1331                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1332                                 if ( intval( $part1 ) ) {
1333                                         $text = $wgLang->getNsText( intval( $part1 ) );
1334                                         $found = true;
1335                                 } else {
1336                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1337                                         if ( !is_null( $index ) ) {
1338                                                 $text = $wgLang->getNsText( $index );
1339                                                 $found = true;
1340                                         }
1341                                 }
1342                         }
1343                 }
1344
1345                 # LOCALURL and LOCALURLE
1346                 if ( !$found ) {
1347                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1348                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1349
1350                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1351                                 $func = 'getLocalURL';
1352                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1353                                 $func = 'escapeLocalURL';
1354                         } else {
1355                                 $func = '';
1356                         }
1357
1358                         if ( $func !== '' ) {
1359                                 $title = Title::newFromText( $part1 );
1360                                 if ( !is_null( $title ) ) {
1361                                         if ( $argc > 0 ) {
1362                                                 $text = $title->$func( $args[0] );
1363                                         } else {
1364                                                 $text = $title->$func();
1365                                         }
1366                                         $found = true;
1367                                 }
1368                         }
1369                 }
1370
1371                 # Internal variables
1372                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1373                         $text = $this->mVariables[$part1];
1374                         $found = true;
1375                         $this->mOutput->mContainsOldMagic = true;
1376                 }
1377 /*
1378                 # Arguments input from the caller
1379                 $inputArgs = end( $this->mArgStack );
1380                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1381                         $text = $inputArgs[$part1];
1382                         $found = true;
1383                 }
1384 */
1385                 # Load from database
1386                 if ( !$found ) {
1387                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1388                         if ( !is_null( $title ) && !$title->isExternal() ) {
1389                                 # Check for excessive inclusion
1390                                 $dbk = $title->getPrefixedDBkey();
1391                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1392                                         $article = new Article( $title );
1393                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1394                                         if ( $articleContent !== false ) {
1395                                                 $found = true;
1396                                                 $text = $articleContent;
1397
1398                                         }
1399                                 }
1400
1401                                 # If the title is valid but undisplayable, make a link to it
1402                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1403                                         $text = "[[" . $title->getPrefixedText() . "]]";
1404                                         $found = true;
1405                                 }
1406                         }
1407                 }
1408
1409                 # Recursive parsing, escaping and link table handling
1410                 # Only for HTML output
1411                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1412                         $text = wfEscapeWikiText( $text );
1413                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1414                         # Clean up argument array
1415                         $assocArgs = array();
1416                         $index = 1;
1417                         foreach( $args as $arg ) {
1418                                 $eqpos = strpos( $arg, "=" );
1419                                 if ( $eqpos === false ) {
1420                                         $assocArgs[$index++] = $arg;
1421                                 } else {
1422                                         $name = trim( substr( $arg, 0, $eqpos ) );
1423                                         $value = trim( substr( $arg, $eqpos+1 ) );
1424                                         if ( $value === false ) {
1425                                                 $value = "";
1426                                         }
1427                                         if ( $name !== false ) {
1428                                                 $assocArgs[$name] = $value;
1429                                         }
1430                                 }
1431                         }
1432
1433                         # Do not enter included links in link table
1434                         if ( !is_null( $title ) ) {
1435                                 $wgLinkCache->suspend();
1436                         }
1437
1438                         # Run full parser on the included text
1439                         $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1440
1441                         # Resume the link cache and register the inclusion as a link
1442                         if ( !is_null( $title ) ) {
1443                                 $wgLinkCache->resume();
1444                                 $wgLinkCache->addLinkObj( $title );
1445                         }
1446                 }
1447
1448                 if ( !$found ) {
1449                         return $matches[0];
1450                 } else {
1451                         return $text;
1452                 }
1453         }
1454
1455         # Triple brace replacement -- used for template arguments
1456         function argSubstitution( $matches )
1457         {
1458                 $newline = $matches[1];
1459                 $arg = trim( $matches[2] );
1460                 $text = $matches[0];
1461                 $inputArgs = end( $this->mArgStack );
1462
1463                 if ( array_key_exists( $arg, $inputArgs ) ) {
1464                         $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1465                 }
1466
1467                 return $text;
1468         }
1469
1470         # Returns true if the function is allowed to include this entity
1471         function incrementIncludeCount( $dbk )
1472         {
1473                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1474                         $this->mIncludeCount[$dbk] = 0;
1475                 }
1476                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1477                         return true;
1478                 } else {
1479                         return false;
1480                 }
1481         }
1482
1483
1484         # Cleans up HTML, removes dangerous tags and attributes
1485         /* private */ function removeHTMLtags( $text )
1486         {
1487                 global $wgUseTidy, $wgUserHtml;
1488                 $fname = "Parser::removeHTMLtags";
1489                 wfProfileIn( $fname );
1490
1491                 if( $wgUserHtml ) {
1492                         $htmlpairs = array( # Tags that must be closed
1493                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1494                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1495                                 "strike", "strong", "tt", "var", "div", "center",
1496                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1497                                 "ruby", "rt" , "rb" , "rp", "p"
1498                         );
1499                         $htmlsingle = array(
1500                                 "br", "hr", "li", "dt", "dd"
1501                         );
1502                         $htmlnest = array( # Tags that can be nested--??
1503                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1504                                 "dl", "font", "big", "small", "sub", "sup"
1505                         );
1506                         $tabletags = array( # Can only appear inside table
1507                                 "td", "th", "tr"
1508                         );
1509                 } else {
1510                         $htmlpairs = array();
1511                         $htmlsingle = array();
1512                         $htmlnest = array();
1513                         $tabletags = array();
1514                 }
1515
1516                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1517                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1518
1519                 $htmlattrs = $this->getHTMLattrs () ;
1520
1521                 # Remove HTML comments
1522                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1523
1524                 $bits = explode( "<", $text );
1525                 $text = array_shift( $bits );
1526                 if(!$wgUseTidy) {
1527                         $tagstack = array(); $tablestack = array();
1528                         foreach ( $bits as $x ) {
1529                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1530                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1531                                 $x, $regs );
1532                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1533                                 error_reporting( $prev );
1534
1535                                 $badtag = 0 ;
1536                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1537                                         # Check our stack
1538                                         if ( $slash ) {
1539                                                 # Closing a tag...
1540                                                 if ( ! in_array( $t, $htmlsingle ) &&
1541                                                 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1542                                                         if(!empty($ot)) array_push( $tagstack, $ot );
1543                                                         $badtag = 1;
1544                                                 } else {
1545                                                         if ( $t == "table" ) {
1546                                                                 $tagstack = array_pop( $tablestack );
1547                                                         }
1548                                                         $newparams = "";
1549                                                 }
1550                                         } else {
1551                                                 # Keep track for later
1552                                                 if ( in_array( $t, $tabletags ) &&
1553                                                 ! in_array( "table", $tagstack ) ) {
1554                                                         $badtag = 1;
1555                                                 } else if ( in_array( $t, $tagstack ) &&
1556                                                 ! in_array ( $t , $htmlnest ) ) {
1557                                                         $badtag = 1 ;
1558                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1559                                                         if ( $t == "table" ) {
1560                                                                 array_push( $tablestack, $tagstack );
1561                                                                 $tagstack = array();
1562                                                         }
1563                                                         array_push( $tagstack, $t );
1564                                                 }
1565                                                 # Strip non-approved attributes from the tag
1566                                                 $newparams = $this->fixTagAttributes($params);
1567
1568                                         }
1569                                         if ( ! $badtag ) {
1570                                                 $rest = str_replace( ">", "&gt;", $rest );
1571                                                 $text .= "<$slash$t $newparams$brace$rest";
1572                                                 continue;
1573                                         }
1574                                 }
1575                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1576                         }
1577                         # Close off any remaining tags
1578                         while ( $t = array_pop( $tagstack ) ) {
1579                                 $text .= "</$t>\n";
1580                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1581                         }
1582                 } else {
1583                         # this might be possible using tidy itself
1584                         foreach ( $bits as $x ) {
1585                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1586                                 $x, $regs );
1587                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1588                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1589                                         $newparams = $this->fixTagAttributes($params);
1590                                         $rest = str_replace( ">", "&gt;", $rest );
1591                                         $text .= "<$slash$t $newparams$brace$rest";
1592                                 } else {
1593                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1594                                 }
1595                         }
1596                 }
1597                 wfProfileOut( $fname );
1598                 return $text;
1599         }
1600
1601
1602 /*
1603  *
1604  * This function accomplishes several tasks:
1605  * 1) Auto-number headings if that option is enabled
1606  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1607  * 3) Add a Table of contents on the top for users who have enabled the option
1608  * 4) Auto-anchor headings
1609  *
1610  * It loops through all headlines, collects the necessary data, then splits up the
1611  * string and re-inserts the newly formatted headlines.
1612  *
1613  */
1614
1615         /* private */ function formatHeadings( $text, $isMain=true )
1616         {
1617                 global $wgInputEncoding;
1618
1619                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1620                 $doShowToc = $this->mOptions->getShowToc();
1621                 if( !$this->mTitle->userCanEdit() ) {
1622                         $showEditLink = 0;
1623                         $rightClickHack = 0;
1624                 } else {
1625                         $showEditLink = $this->mOptions->getEditSection();
1626                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1627                 }
1628
1629                 # Inhibit editsection links if requested in the page
1630                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1631                 if( $esw->matchAndRemove( $text ) ) {
1632                         $showEditLink = 0;
1633                 }
1634                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1635                 # do not add TOC
1636                 $mw =& MagicWord::get( MAG_NOTOC );
1637                 if( $mw->matchAndRemove( $text ) ) {
1638                         $doShowToc = 0;
1639                 }
1640
1641                 # never add the TOC to the Main Page. This is an entry page that should not
1642                 # be more than 1-2 screens large anyway
1643                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1644                         $doShowToc = 0;
1645                 }
1646
1647                 # Get all headlines for numbering them and adding funky stuff like [edit]
1648                 # links - this is for later, but we need the number of headlines right now
1649                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1650
1651                 # if there are fewer than 4 headlines in the article, do not show TOC
1652                 if( $numMatches < 4 ) {
1653                         $doShowToc = 0;
1654                 }
1655
1656                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1657                 # override above conditions and always show TOC
1658                 $mw =& MagicWord::get( MAG_FORCETOC );
1659                 if ($mw->matchAndRemove( $text ) ) {
1660                         $doShowToc = 1;
1661                 }
1662
1663
1664                 # We need this to perform operations on the HTML
1665                 $sk =& $this->mOptions->getSkin();
1666
1667                 # headline counter
1668                 $headlineCount = 0;
1669
1670                 # Ugh .. the TOC should have neat indentation levels which can be
1671                 # passed to the skin functions. These are determined here
1672                 $toclevel = 0;
1673                 $toc = "";
1674                 $full = "";
1675                 $head = array();
1676                 $sublevelCount = array();
1677                 $level = 0;
1678                 $prevlevel = 0;
1679                 foreach( $matches[3] as $headline ) {
1680                         $numbering = "";
1681                         if( $level ) {
1682                                 $prevlevel = $level;
1683                         }
1684                         $level = $matches[1][$headlineCount];
1685                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1686                                 # reset when we enter a new level
1687                                 $sublevelCount[$level] = 0;
1688                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1689                                 $toclevel += $level - $prevlevel;
1690                         }
1691                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1692                                 # reset when we step back a level
1693                                 $sublevelCount[$level+1]=0;
1694                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1695                                 $toclevel -= $prevlevel - $level;
1696                         }
1697                         # count number of headlines for each level
1698                         @$sublevelCount[$level]++;
1699                         if( $doNumberHeadings || $doShowToc ) {
1700                                 $dot = 0;
1701                                 for( $i = 1; $i <= $level; $i++ ) {
1702                                         if( !empty( $sublevelCount[$i] ) ) {
1703                                                 if( $dot ) {
1704                                                         $numbering .= ".";
1705                                                 }
1706                                                 $numbering .= $sublevelCount[$i];
1707                                                 $dot = 1;
1708                                         }
1709                                 }
1710                         }
1711
1712                         # The canonized header is a version of the header text safe to use for links
1713                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1714                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1715
1716                         # strip out HTML
1717                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1718                         $tocline = trim( $canonized_headline );
1719                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1720                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1721                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1722                         $refer[$headlineCount] = $canonized_headline;
1723
1724                         # count how many in assoc. array so we can track dupes in anchors
1725                         @$refers[$canonized_headline]++;
1726                         $refcount[$headlineCount]=$refers[$canonized_headline];
1727
1728                         # Prepend the number to the heading text
1729
1730                         if( $doNumberHeadings || $doShowToc ) {
1731                                 $tocline = $numbering . " " . $tocline;
1732
1733                                 # Don't number the heading if it is the only one (looks silly)
1734                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1735                                         # the two are different if the line contains a link
1736                                         $headline=$numbering . " " . $headline;
1737                                 }
1738                         }
1739
1740                         # Create the anchor for linking from the TOC to the section
1741                         $anchor = $canonized_headline;
1742                         if($refcount[$headlineCount] > 1 ) {
1743                                 $anchor .= "_" . $refcount[$headlineCount];
1744                         }
1745                         if( $doShowToc ) {
1746                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1747                         }
1748                         if( $showEditLink ) {
1749                                 if ( empty( $head[$headlineCount] ) ) {
1750                                         $head[$headlineCount] = "";
1751                                 }
1752                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1753                         }
1754
1755                         # Add the edit section span
1756                         if( $rightClickHack ) {
1757                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1758                         }
1759
1760                         # give headline the correct <h#> tag
1761                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1762
1763                         $headlineCount++;
1764                 }
1765
1766                 if( $doShowToc ) {
1767                         $toclines = $headlineCount;
1768                         $toc .= $sk->tocUnindent( $toclevel );
1769                         $toc = $sk->tocTable( $toc );
1770                 }
1771
1772                 # split up and insert constructed headlines
1773
1774                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1775                 $i = 0;
1776
1777                 foreach( $blocks as $block ) {
1778                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1779                             # This is the [edit] link that appears for the top block of text when
1780                                 # section editing is enabled
1781
1782                                 # Disabled because it broke block formatting
1783                                 # For example, a bullet point in the top line
1784                                 # $full .= $sk->editSectionLink(0);
1785                         }
1786                         $full .= $block;
1787                         if( $doShowToc && !$i && $isMain) {
1788                         # Top anchor now in skin
1789                                 $full = $full.$toc;
1790                         }
1791
1792                         if( !empty( $head[$i] ) ) {
1793                                 $full .= $head[$i];
1794                         }
1795                         $i++;
1796                 }
1797
1798                 return $full;
1799         }
1800
1801         /* private */ function magicISBN( $text )
1802         {
1803                 global $wgLang;
1804
1805                 $a = split( "ISBN ", " $text" );
1806                 if ( count ( $a ) < 2 ) return $text;
1807                 $text = substr( array_shift( $a ), 1);
1808                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1809
1810                 foreach ( $a as $x ) {
1811                         $isbn = $blank = "" ;
1812                         while ( " " == $x{0} ) {
1813                                 $blank .= " ";
1814                                 $x = substr( $x, 1 );
1815                         }
1816                         while ( strstr( $valid, $x{0} ) != false ) {
1817                                 $isbn .= $x{0};
1818                                 $x = substr( $x, 1 );
1819                         }
1820                         $num = str_replace( "-", "", $isbn );
1821                         $num = str_replace( " ", "", $num );
1822
1823                         if ( "" == $num ) {
1824                                 $text .= "ISBN $blank$x";
1825                         } else {
1826                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1827                                 $text .= "<a href=\"" .
1828                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1829                                         "\" class=\"internal\">ISBN $isbn</a>";
1830                                 $text .= $x;
1831                         }
1832                 }
1833                 return $text;
1834         }
1835         /* private */ function magicRFC( $text )
1836         {
1837                 global $wgLang;
1838
1839                 $a = split( "RFC ", " $text" );
1840                 if ( count ( $a ) < 2 ) return $text;
1841                 $text = substr( array_shift( $a ), 1);
1842                 $valid = "0123456789";
1843
1844                 foreach ( $a as $x ) {
1845                         $rfc = $blank = "" ;
1846                         while ( " " == $x{0} ) {
1847                                 $blank .= " ";
1848                                 $x = substr( $x, 1 );
1849                         }
1850                         while ( strstr( $valid, $x{0} ) != false ) {
1851                                 $rfc .= $x{0};
1852                                 $x = substr( $x, 1 );
1853                         }
1854
1855                         if ( "" == $rfc ) {
1856                                 $text .= "RFC $blank$x";
1857                         } else {
1858                                 $url = wfmsg( "rfcurl" );
1859                                 $url = str_replace( "$1", $rfc, $url);
1860                                 $sk =& $this->mOptions->getSkin();
1861                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1862                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1863                         }
1864                 }
1865                 return $text;
1866         }
1867
1868         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1869         {
1870                 $this->mOptions = $options;
1871                 $this->mTitle =& $title;
1872                 $this->mOutputType = OT_WIKI;
1873
1874                 if ( $clearState ) {
1875                         $this->clearState();
1876                 }
1877
1878                 $stripState = false;
1879                 $pairs = array(
1880                         "\r\n" => "\n",
1881                         );
1882                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1883                 // now with regexes
1884                 /*
1885                 $pairs = array(
1886                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1887                         "/<br *?>/i" => "<br/>",
1888                 );
1889                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1890                 */
1891                 $text = $this->strip( $text, $stripState, false );
1892                 $text = $this->pstPass2( $text, $user );
1893                 $text = $this->unstrip( $text, $stripState );
1894                 return $text;
1895         }
1896
1897         /* private */ function pstPass2( $text, &$user )
1898         {
1899                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1900
1901                 # Variable replacement
1902                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1903                 $text = $this->replaceVariables( $text );
1904
1905                 # Signatures
1906                 #
1907                 $n = $user->getName();
1908                 $k = $user->getOption( "nickname" );
1909                 if ( "" == $k ) { $k = $n; }
1910                 if(isset($wgLocaltimezone)) {
1911                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1912                 }
1913                 /* Note: this is an ugly timezone hack for the European wikis */
1914                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1915                   " (" . date( "T" ) . ")";
1916                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1917
1918                 $text = preg_replace( "/~~~~~/", $d, $text );
1919                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1920                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1921                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1922                   Namespace::getUser() ) . ":$n|$k]]", $text );
1923
1924                 # Context links: [[|name]] and [[name (context)|]]
1925                 #
1926                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1927                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1928                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1929                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1930
1931                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1932                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1933                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1934                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1935                                                                                                                 # [[ns:page (cont)|]]
1936                 $context = "";
1937                 $t = $this->mTitle->getText();
1938                 if ( preg_match( $conpat, $t, $m ) ) {
1939                         $context = $m[2];
1940                 }
1941                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1942                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1943                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1944
1945                 if ( "" == $context ) {
1946                         $text = preg_replace( $p2, "[[\\1]]", $text );
1947                 } else {
1948                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1949                 }
1950
1951                 /*
1952                 $mw =& MagicWord::get( MAG_SUBST );
1953                 $wgCurParser = $this->fork();
1954                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1955                 $this->merge( $wgCurParser );
1956                 */
1957
1958                 # Trim trailing whitespace
1959                 # MAG_END (__END__) tag allows for trailing
1960                 # whitespace to be deliberately included
1961                 $text = rtrim( $text );
1962                 $mw =& MagicWord::get( MAG_END );
1963                 $mw->matchAndRemove( $text );
1964
1965                 return $text;
1966         }
1967
1968         # Set up some variables which are usually set up in parse()
1969         # so that an external function can call some class members with confidence
1970         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1971         {
1972                 $this->mTitle =& $title;
1973                 $this->mOptions = $options;
1974                 $this->mOutputType = $outputType;
1975                 if ( $clearState ) {
1976                         $this->clearState();
1977                 }
1978         }
1979
1980         function transformMsg( $text, $options ) {
1981                 global $wgTitle;
1982                 static $executing = false;
1983
1984                 # Guard against infinite recursion
1985                 if ( $executing ) {
1986                         return $text;
1987                 }
1988                 $executing = true;
1989
1990                 $this->mTitle = $wgTitle;
1991                 $this->mOptions = $options;
1992                 $this->mOutputType = OT_MSG;
1993                 $this->clearState();
1994                 $text = $this->replaceVariables( $text );
1995
1996                 $executing = false;
1997                 return $text;
1998         }
1999 }
2000
2001 class ParserOutput
2002 {
2003         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2004         var $mCacheTime; # Used in ParserCache
2005
2006         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2007                 $containsOldMagic = false )
2008         {
2009                 $this->mText = $text;
2010                 $this->mLanguageLinks = $languageLinks;
2011                 $this->mCategoryLinks = $categoryLinks;
2012                 $this->mContainsOldMagic = $containsOldMagic;
2013                 $this->mCacheTime = "";
2014         }
2015
2016         function getText() { return $this->mText; }
2017         function getLanguageLinks() { return $this->mLanguageLinks; }
2018         function getCategoryLinks() { return $this->mCategoryLinks; }
2019         function getCacheTime() { return $this->mCacheTime; }
2020         function containsOldMagic() { return $this->mContainsOldMagic; }
2021         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2022         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2023         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2024         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2025         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2026
2027         function merge( $other ) {
2028                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2029                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2030                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2031         }
2032
2033 }
2034
2035 class ParserOptions
2036 {
2037         # All variables are private
2038         var $mUseTeX;                    # Use texvc to expand <math> tags
2039         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2040         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2041         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2042         var $mAllowExternalImages;       # Allow external images inline
2043         var $mSkin;                      # Reference to the preferred skin
2044         var $mDateFormat;                # Date format index
2045         var $mEditSection;               # Create "edit section" links
2046         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2047         var $mNumberHeadings;            # Automatically number headings
2048         var $mShowToc;                   # Show table of contents
2049
2050         function getUseTeX() { return $this->mUseTeX; }
2051         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2052         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2053         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2054         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2055         function getSkin() { return $this->mSkin; }
2056         function getDateFormat() { return $this->mDateFormat; }
2057         function getEditSection() { return $this->mEditSection; }
2058         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2059         function getNumberHeadings() { return $this->mNumberHeadings; }
2060         function getShowToc() { return $this->mShowToc; }
2061
2062         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2063         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2064         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2065         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2066         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2067         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2068         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2069         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2070         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2071         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2072         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2073
2074         /* static */ function newFromUser( &$user )
2075         {
2076                 $popts = new ParserOptions;
2077                 $popts->initialiseFromUser( $user );
2078                 return $popts;
2079         }
2080
2081         function initialiseFromUser( &$userInput )
2082         {
2083                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2084
2085                 if ( !$userInput ) {
2086                         $user = new User;
2087                         $user->setLoaded( true );
2088                 } else {
2089                         $user =& $userInput;
2090                 }
2091
2092                 $this->mUseTeX = $wgUseTeX;
2093                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2094                 $this->mUseDynamicDates = $wgUseDynamicDates;
2095                 $this->mInterwikiMagic = $wgInterwikiMagic;
2096                 $this->mAllowExternalImages = $wgAllowExternalImages;
2097                 $this->mSkin =& $user->getSkin();
2098                 $this->mDateFormat = $user->getOption( "date" );
2099                 $this->mEditSection = $user->getOption( "editsection" );
2100                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2101                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2102                 $this->mShowToc = $user->getOption( "showtoc" );
2103         }
2104
2105
2106 }
2107
2108 # Regex callbacks, used in Parser::replaceVariables
2109 function wfBraceSubstitution( $matches )
2110 {
2111         global $wgCurParser;
2112         return $wgCurParser->braceSubstitution( $matches );
2113 }
2114
2115 function wfArgSubstitution( $matches )
2116 {
2117         global $wgCurParser;
2118         return $wgCurParser->argSubstitution( $matches );
2119 }
2120
2121 function wfVariableSubstitution( $matches )
2122 {
2123         global $wgCurParser;
2124         return $wgCurParser->variableSubstitution( $matches );
2125 }
2126
2127 ?>