includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80                 $this->mInPre = false;
  81         }
  82
  83         # First pass--just handle <nowiki> sections, pass the rest off
  84         # to internalParse() which does all the real work.
  85         #
  86         # Returns a ParserOutput
  87         #
  88         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  89         {
  90                 global $wgUseTidy;
  91                 $fname = "Parser::parse";
  92                 wfProfileIn( $fname );
  93
  94                 if ( $clearState ) {
  95                         $this->clearState();
  96                 }
  97
  98                 $this->mOptions = $options;
  99                 $this->mTitle =& $title;
 100                 $this->mOutputType = OT_HTML;
 101
 102                 $stripState = NULL;
 103                 $text = $this->strip( $text, $this->mStripState );
 104                 $text = $this->internalParse( $text, $linestart );
 105                 $text = $this->unstrip( $text, $this->mStripState );
 106                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 107                 if(!$wgUseTidy) {
 108                         $fixtags = array(
 109                                 # french spaces, last one Guillemet-left
 110                                 # only if there is something before the space
 111                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 112                                 # french spaces, Guillemet-right
 113                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 114                                 "/<hr *>/i" => '<hr />',
 115                                 "/<br *>/i" => '<br />',
 116                                 "/<center *>/i"=>'<div class="center">',
 117                                 "/<\\/center *>/i" => '</div>',
 118                                 # Clean up spare ampersands; note that we probably ought to be
 119                                 # more careful about named entities.
 120                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 121                         );
 122                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 123                 } else {
 124                         $fixtags = array(
 125                                 # french spaces, last one Guillemet-left
 126                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 127                                 # french spaces, Guillemet-right
 128                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 129                                 "/<center *>/i"=>'<div class="center">',
 130                                 "/<\\/center *>/i" => '</div>'
 131                         );
 132                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 133                 }
 134                 # only once and last
 135                 $text = $this->doBlockLevels( $text, $linestart );
 136                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 137                 if($wgUseTidy) {
 138                         $text = $this->tidy($text);
 139                 }
 140                 $this->mOutput->setText( $text );
 141                 wfProfileOut( $fname );
 142                 return $this->mOutput;
 143         }
 144
 145         /* static */ function getRandomString()
 146         {
 147                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 148         }
 149
 150         # Replaces all occurrences of <$tag>content</$tag> in the text
 151         # with a random marker and returns the new text. the output parameter
 152         # $content will be an associative array filled with data on the form
 153         # $unique_marker => content.
 154
 155         # If $content is already set, the additional entries will be appended
 156
 157         # If $tag is set to STRIP_COMMENTS, the function will extract
 158         # <!-- HTML comments -->
 159
 160         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 161                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 162                 if ( !$content ) {
 163                         $content = array( );
 164                 }
 165                 $n = 1;
 166                 $stripped = "";
 167
 168                 while ( "" != $text ) {
 169                         if($tag==STRIP_COMMENTS) {
 170                                 $p = preg_split( "/<!--/i", $text, 2 );
 171                         } else {
 172                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 173                         }
 174                         $stripped .= $p[0];
 175                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 176                                 $text = "";
 177                         } else {
 178                                 if($tag==STRIP_COMMENTS) {
 179                                         $q = preg_split( "/-->/i", $p[1], 2 );
 180                                 } else {
 181                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 182                                 }
 183                                 $marker = $rnd . sprintf("%08X", $n++);
 184                                 $content[$marker] = $q[0];
 185                                 $stripped .= $marker;
 186                                 $text = $q[1];
 187                         }
 188                 }
 189                 return $stripped;
 190         }
 191
 192         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 193         # If $render is set, performs necessary rendering operations on plugins
 194         # Returns the text, and fills an array with data needed in unstrip()
 195         # If the $state is already a valid strip state, it adds to the state
 196
 197         # When $stripcomments is set, HTML comments <!-- like this -->
 198         # will be stripped in addition to other tags. This is important
 199         # for section editing, where these comments cause confusion when
 200         # counting the sections in the wikisource
 201         function strip( $text, &$state, $stripcomments = false )
 202         {
 203                 $render = ($this->mOutputType == OT_HTML);
 204                 $nowiki_content = array();
 205                 $hiero_content = array();
 206                 $timeline_content = array();
 207                 $math_content = array();
 208                 $pre_content = array();
 209                 $comment_content = array();
 210
 211                 # Replace any instances of the placeholders
 212                 $uniq_prefix = UNIQ_PREFIX;
 213                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 214
 215                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 216                 foreach( $nowiki_content as $marker => $content ){
 217                         if( $render ){
 218                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 219                         } else {
 220                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 221                         }
 222                 }
 223
 224                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 225                 foreach( $hiero_content as $marker => $content ){
 226                         if( $render && $GLOBALS['wgUseWikiHiero']){
 227                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 228                         } else {
 229                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 230                         }
 231                 }
 232
 233                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 234                 foreach( $timeline_content as $marker => $content ){
 235                         if( $render && $GLOBALS['wgUseTimeline']){
 236                                 $timeline_content[$marker] = renderTimeline( $content );
 237                         } else {
 238                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 239                         }
 240                 }
 241
 242                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 243                 foreach( $math_content as $marker => $content ){
 244                         if( $render ) {
 245                                 if( $this->mOptions->getUseTeX() ) {
 246                                         $math_content[$marker] = renderMath( $content );
 247                                 } else {
 248                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 249                                 }
 250                         } else {
 251                                 $math_content[$marker] = "<math>$content</math>";
 252                         }
 253                 }
 254
 255                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 256                 foreach( $pre_content as $marker => $content ){
 257                         if( $render ){
 258                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 259                         } else {
 260                                 $pre_content[$marker] = "<pre>$content</pre>";
 261                         }
 262                 }
 263                 if($stripcomments) {
 264                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 265                         foreach( $comment_content as $marker => $content ){
 266                                 $comment_content[$marker] = "<!--$content-->";
 267                         }
 268                 }
 269
 270                 # Merge state with the pre-existing state, if there is one
 271                 if ( $state ) {
 272                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 273                         $state['hiero'] = $state['hiero'] + $hiero_content;
 274                         $state['timeline'] = $state['timeline'] + $timeline_content;
 275                         $state['math'] = $state['math'] + $math_content;
 276                         $state['pre'] = $state['pre'] + $pre_content;
 277                         $state['comment'] = $state['comment'] + $comment_content;
 278                 } else {
 279                         $state = array(
 280                           'nowiki' => $nowiki_content,
 281                           'hiero' => $hiero_content,
 282                           'timeline' => $timeline_content,
 283                           'math' => $math_content,
 284                           'pre' => $pre_content,
 285                           'comment' => $comment_content
 286                         );
 287                 }
 288                 return $text;
 289         }
 290
 291         # always call unstripNoWiki() after this one
 292         function unstrip( $text, &$state )
 293         {
 294                 # Must expand in reverse order, otherwise nested tags will be corrupted
 295                 $contentDict = end( $state );
 296                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 297                         if( key($state) != 'nowiki') {
 298                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 299                                         $text = str_replace( key( $contentDict ), $content, $text );
 300                                 }
 301                         }
 302                 }
 303
 304                 return $text;
 305         }
 306         # always call this after unstrip() to preserve the order
 307         function unstripNoWiki( $text, &$state )
 308         {
 309                 # Must expand in reverse order, otherwise nested tags will be corrupted
 310                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 311                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 312                 }
 313
 314                 return $text;
 315         }
 316
 317         # Add an item to the strip state
 318         # Returns the unique tag which must be inserted into the stripped text
 319         # The tag will be replaced with the original text in unstrip()
 320
 321         function insertStripItem( $text, &$state )
 322         {
 323                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 324                 if ( !$state ) {
 325                         $state = array(
 326                           'nowiki' => array(),
 327                           'hiero' => array(),
 328                           'math' => array(),
 329                           'pre' => array()
 330                         );
 331                 }
 332                 $state['item'][$rnd] = $text;
 333                 return $rnd;
 334         }
 335
 336         # This method generates the list of subcategories and pages for a category
 337         function categoryMagic ()
 338         {
 339                 global $wgLang , $wgUser ;
 340                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 341
 342                 $cns = Namespace::getCategory() ;
 343                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 344
 345                 $r = "<br style=\"clear:both;\"/>\n";
 346
 347
 348                 $sk =& $wgUser->getSkin() ;
 349
 350                 $articles = array() ;
 351                 $children = array() ;
 352                 $data = array () ;
 353                 $id = $this->mTitle->getArticleID() ;
 354
 355                 # FIXME: add limits
 356                 $t = wfStrencode( $this->mTitle->getDBKey() );
 357                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 358                 $res = wfQuery ( $sql, DB_READ ) ;
 359                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 360
 361                 # For all pages that link to this category
 362                 foreach ( $data AS $x )
 363                 {
 364                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 365                         if ( $t != "" ) $t .= ":" ;
 366                         $t .= $x->cur_title ;
 367
 368                         if ( $x->cur_namespace == $cns ) {
 369                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 370                         } else {
 371                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 372                         }
 373                 }
 374                 wfFreeResult ( $res ) ;
 375
 376                 # Showing subcategories
 377                 if ( count ( $children ) > 0 ) {
 378                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 379                         $r .= implode ( ", " , $children ) ;
 380                 }
 381
 382                 # Showing pages in this category
 383                 if ( count ( $articles ) > 0 ) {
 384                         $ti = $this->mTitle->getText() ;
 385                         $h =  wfMsg( "category_header", $ti );
 386                         $r .= "<h2>{$h}</h2>\n" ;
 387                         $r .= implode ( ", " , $articles ) ;
 388                 }
 389
 390
 391                 return $r ;
 392         }
 393
 394         function getHTMLattrs ()
 395         {
 396                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 397                                 "title", "align", "lang", "dir", "width", "height",
 398                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 399                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 400                                 /* FONT */ "type", "start", "value", "compact",
 401                                 /* For various lists, mostly deprecated but safe */
 402                                 "summary", "width", "border", "frame", "rules",
 403                                 "cellspacing", "cellpadding", "valign", "char",
 404                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 405                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 406                                 "id", "class", "name", "style" /* For CSS */
 407                                 );
 408                 return $htmlattrs ;
 409         }
 410
 411         function fixTagAttributes ( $t )
 412         {
 413                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 414                 $htmlattrs = $this->getHTMLattrs() ;
 415
 416                 # Strip non-approved attributes from the tag
 417                 $t = preg_replace(
 418                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 419                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 420                         $t);
 421                 # Strip javascript "expression" from stylesheets. Brute force approach:
 422                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 423
 424                 if( preg_match(
 425                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 426                         wfMungeToUtf8( $t ) ) )
 427                 {
 428                         $t="";
 429                 }
 430
 431                 return trim ( $t ) ;
 432         }
 433
 434         /* interface with html tidy, used if $wgUseTidy = true */
 435         function tidy ( $text ) {
 436                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 437                 global $wgInputEncoding, $wgOutputEncoding;
 438                 $fname = "Parser::tidy";
 439                 wfProfileIn( $fname );
 440
 441                 $cleansource = '';
 442                 switch(strtoupper($wgOutputEncoding)) {
 443                         case 'ISO-8859-1':
 444                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 445                                 break;
 446                         case 'UTF-8':
 447                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 448                                 break;
 449                         default:
 450                                 $wgTidyOpts .= ' -raw';
 451                         }
 452
 453                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 455 '<head><title>test</title></head><body>'.$text.'</body></html>';
 456                 $descriptorspec = array(
 457                         0 => array("pipe", "r"),
 458                         1 => array("pipe", "w"),
 459                         2 => array("file", "/dev/null", "a")
 460                 );
 461                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 462                 if (is_resource($process)) {
 463                         fwrite($pipes[0], $wrappedtext);
 464                         fclose($pipes[0]);
 465                         while (!feof($pipes[1])) {
 466                                 $cleansource .= fgets($pipes[1], 1024);
 467                         }
 468                         fclose($pipes[1]);
 469                         $return_value = proc_close($process);
 470                 }
 471
 472                 wfProfileOut( $fname );
 473
 474                 if( $cleansource == '' && $text != '') {
 475                         wfDebug( "Tidy error detected!\n" );
 476                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 477                 } else {
 478                         return $cleansource;
 479                 }
 480         }
 481
 482         function doTableStuff ( $t )
 483         {
 484                 $t = explode ( "\n" , $t ) ;
 485                 $td = array () ; # Is currently a td tag open?
 486                         $ltd = array () ; # Was it TD or TH?
 487                         $tr = array () ; # Is currently a tr tag open?
 488                         $ltr = array () ; # tr attributes
 489                         foreach ( $t AS $k => $x )
 490                         {
 491                                 $x = trim ( $x ) ;
 492                                 $fc = substr ( $x , 0 , 1 ) ;
 493                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 494                                 {
 495                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 496                                         array_push ( $td , false ) ;
 497                                         array_push ( $ltd , "" ) ;
 498                                         array_push ( $tr , false ) ;
 499                                         array_push ( $ltr , "" ) ;
 500                                 }
 501                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 502                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 503                                 {
 504                                         $z = "</table>\n" ;
 505                                         $l = array_pop ( $ltd ) ;
 506                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 507                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 508                                         array_pop ( $ltr ) ;
 509                                         $t[$k] = $z ;
 510                                 }
 511                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 512                                                 {
 513                                                 $z = trim ( substr ( $x , 2 ) ) ;
 514                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 515                                                 }*/
 516                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 517                                 {
 518                                         $x = substr ( $x , 1 ) ;
 519                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 520                                         $z = "" ;
 521                                         $l = array_pop ( $ltd ) ;
 522                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 523                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 524                                         array_pop ( $ltr ) ;
 525                                         $t[$k] = $z ;
 526                                         array_push ( $tr , false ) ;
 527                                         array_push ( $td , false ) ;
 528                                         array_push ( $ltd , "" ) ;
 529                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 530                                 }
 531                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 532                                 {
 533                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 534                                         {
 535                                                 $fc = "+" ;
 536                                                 $x = substr ( $x , 1 ) ;
 537                                         }
 538                                         $after = substr ( $x , 1 ) ;
 539                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 540                                         $after = explode ( "||" , $after ) ;
 541                                         $t[$k] = "" ;
 542                                         foreach ( $after AS $theline )
 543                                         {
 544                                                 $z = "" ;
 545                                                 if ( $fc != "+" )
 546                                                 {
 547                                                         $tra = array_pop ( $ltr ) ;
 548                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 549                                                         array_push ( $tr , true ) ;
 550                                                         array_push ( $ltr , "" ) ;
 551                                                 }
 552
 553                                                 $l = array_pop ( $ltd ) ;
 554                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 555                                                 if ( $fc == "|" ) $l = "td" ;
 556                                                 else if ( $fc == "!" ) $l = "th" ;
 557                                                 else if ( $fc == "+" ) $l = "caption" ;
 558                                                 else $l = "" ;
 559                                                 array_push ( $ltd , $l ) ;
 560                                                 $y = explode ( "|" , $theline , 2 ) ;
 561                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 562                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 563                                                 $t[$k] .= $y ;
 564                                                 array_push ( $td , true ) ;
 565                                         }
 566                                 }
 567                         }
 568
 569                 # Closing open td, tr && table
 570                 while ( count ( $td ) > 0 )
 571                 {
 572                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 573                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 574                         $t[] = "</table>" ;
 575                 }
 576
 577                 $t = implode ( "\n" , $t ) ;
 578                 #               $t = $this->removeHTMLtags( $t );
 579                 return $t ;
 580         }
 581
 582         # Parses the text and adds the result to the strip state
 583         # Returns the strip tag
 584         function stripParse( $text, $newline, $args )
 585         {
 586                 $text = $this->strip( $text, $this->mStripState );
 587                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 588                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 589         }
 590
 591         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 592         {
 593                 $fname = "Parser::internalParse";
 594                 wfProfileIn( $fname );
 595
 596                 $text = $this->removeHTMLtags( $text );
 597                 $text = $this->replaceVariables( $text, $args );
 598
 599                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 600
 601                 $text = $this->doHeadings( $text );
 602                 if($this->mOptions->getUseDynamicDates()) {
 603                         global $wgDateFormatter;
 604                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 605                 }
 606                 $text = $this->doAllQuotes( $text );
 607                 $text = $this->replaceExternalLinks( $text );
 608                 $text = $this->replaceInternalLinks ( $text );
 609                 $text = $this->replaceInternalLinks ( $text );
 610                 //$text = $this->doTokenizedParser ( $text );
 611                 $text = $this->doTableStuff ( $text ) ;
 612                 $text = $this->magicISBN( $text );
 613                 $text = $this->magicRFC( $text );
 614                 $text = $this->formatHeadings( $text, $isMain );
 615                 $sk =& $this->mOptions->getSkin();
 616                 $text = $sk->transformContent( $text );
 617
 618                 if ( !isset ( $this->categoryMagicDone ) ) {
 619                         $text .= $this->categoryMagic () ;
 620                         $this->categoryMagicDone = true ;
 621                 }
 622
 623                 wfProfileOut( $fname );
 624                 return $text;
 625         }
 626
 627
 628         /* private */ function doHeadings( $text )
 629         {
 630                 $fname = "Parser::doHeadings";
 631                 wfProfileIn( $fname );
 632                 for ( $i = 6; $i >= 1; --$i ) {
 633                         $h = substr( "======", 0, $i );
 634                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 635                           "<h{$i}>\\1</h{$i}>\\2", $text );
 636                 }
 637                 wfProfileOut( $fname );
 638                 return $text;
 639         }
 640
 641         /* private */ function doAllQuotes( $text )
 642         {
 643                 $fname = "Parser::doAllQuotes";
 644                 wfProfileIn( $fname );
 645                 $outtext = "";
 646                 $lines = explode( "\n", $text );
 647                 foreach ( $lines as $line ) {
 648                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 649                 }
 650                 $outtext = substr($outtext, 0,-1);
 651                 wfProfileOut( $fname );
 652                 return $outtext;
 653         }
 654
 655         /* private */ function doQuotes( $pre, $text, $mode )
 656         {
 657                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 658                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 659                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 660                         if ( substr ($m[2], 0, 1) == "'" ) {
 661                                 $m[2] = substr ($m[2], 1);
 662                                 if ($mode == "em") {
 663                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 664                                 } else if ($mode == "strong") {
 665                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 666                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 667                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 668                                 } else if ($mode == "strongem") {
 669                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 670                                 } else {
 671                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 672                                 }
 673                         } else {
 674                                 if ($mode == "strong") {
 675                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 676                                 } else if ($mode == "em") {
 677                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 678                                 } else if ($mode == "emstrong") {
 679                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 680                                 } else if (($mode == "strongem") || ($mode == "both")) {
 681                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 682                                 } else {
 683                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 684                                 }
 685                         }
 686                 } else {
 687                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 688                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 689                         if ($mode == "") {
 690                                 return $pre . $text;
 691                         } else if ($mode == "em") {
 692                                 return $pre . $text_em;
 693                         } else if ($mode == "strong") {
 694                                 return $pre . $text_strong;
 695                         } else if ($mode == "strongem") {
 696                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 697                         } else {
 698                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 699                         }
 700                 }
 701         }
 702
 703         # Note: we have to do external links before the internal ones,
 704         # and otherwise take great care in the order of things here, so
 705         # that we don't end up interpreting some URLs twice.
 706
 707         /* private */ function replaceExternalLinks( $text )
 708         {
 709                 $fname = "Parser::replaceExternalLinks";
 710                 wfProfileIn( $fname );
 711                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 712                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 713                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 714                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 715                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 716                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 717                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 718                 wfProfileOut( $fname );
 719                 return $text;
 720         }
 721
 722         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 723         {
 724                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 725                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 726
 727                 # this is  the list of separators that should be ignored if they
 728                 # are the last character of an URL but that should be included
 729                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 730                 # in this case, the last comma should not become part of the URL,
 731                 # but in "www.foo.com/123,2342,32.htm" it should.
 732                 $sep = ",;\.:";
 733                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 734                 $images = "gif|png|jpg|jpeg";
 735
 736                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 737                 # they are interpreted as part of the string (used to tell PHP
 738                 # that the content of the string should be inserted there).
 739                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 740                   "((?i){$images})([^{$uc}]|$)/";
 741
 742                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 743                 $sk =& $this->mOptions->getSkin();
 744
 745                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 746                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 747                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 748                 }
 749                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 750                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 751                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 752                   "</a>\\5", $s );
 753                 $s = str_replace( $unique, $protocol, $s );
 754
 755                 $a = explode( "[{$protocol}:", " " . $s );
 756                 $s = array_shift( $a );
 757                 $s = substr( $s, 1 );
 758
 759                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 760                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 761
 762                 foreach ( $a as $line ) {
 763                         if ( preg_match( $e1, $line, $m ) ) {
 764                                 $link = "{$protocol}:{$m[1]}";
 765                                 $trail = $m[2];
 766                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 767                                 else { $text = wfEscapeHTML( $link ); }
 768                         } else if ( preg_match( $e2, $line, $m ) ) {
 769                                 $link = "{$protocol}:{$m[1]}";
 770                                 $text = $m[2];
 771                                 $trail = $m[3];
 772                         } else {
 773                                 $s .= "[{$protocol}:" . $line;
 774                                 continue;
 775                         }
 776                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 777                                 $paren = "";
 778                         } else {
 779                                 # Expand the URL for printable version
 780                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 781                         }
 782                         $la = $sk->getExternalLinkAttributes( $link, $text );
 783                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 784
 785                 }
 786                 return $s;
 787         }
 788
 789
 790         /* private */ function replaceInternalLinks( $s )
 791         {
 792                 global $wgLang, $wgLinkCache;
 793                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 794                 static $fname = "Parser::replaceInternalLinks" ;
 795                 wfProfileIn( $fname );
 796
 797                 wfProfileIn( "$fname-setup" );
 798                 static $tc = FALSE;
 799                 # the % is needed to support urlencoded titles as well
 800                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 801                 $sk =& $this->mOptions->getSkin();
 802
 803                 $a = explode( "[[", " " . $s );
 804                 $s = array_shift( $a );
 805                 $s = substr( $s, 1 );
 806
 807                 # Match a link having the form [[namespace:link|alternate]]trail
 808                 static $e1 = FALSE;
 809                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 810                 # Match the end of a line for a word that's not followed by whitespace,
 811                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 812                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
 813
 814                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
 815                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 816                 static $image = FALSE;
 817                 static $special = FALSE;
 818                 static $media = FALSE;
 819                 static $category = FALSE;
 820                 if ( !$image ) { $image = Namespace::getImage(); }
 821                 if ( !$special ) { $special = Namespace::getSpecial(); }
 822                 if ( !$media ) { $media = Namespace::getMedia(); }
 823                 if ( !$category ) { $category = Namespace::getCategory(); }
 824
 825                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 826
 827                 if ( $useLinkPrefixExtension ) {
 828                         if ( preg_match( $e2, $s, $m ) ) {
 829                                 $first_prefix = $m[2];
 830                                 $s = $m[1];
 831                         } else {
 832                                 $first_prefix = false;
 833                         }
 834                 } else {
 835                         $prefix = '';
 836                 }
 837
 838                 wfProfileOut( "$fname-setup" );
 839
 840                 foreach ( $a as $line ) {
 841                         wfProfileIn( "$fname-prefixhandling" );
 842                         if ( $useLinkPrefixExtension ) {
 843                                 if ( preg_match( $e2, $s, $m ) ) {
 844                                         $prefix = $m[2];
 845                                         $s = $m[1];
 846                                 } else {
 847                                         $prefix='';
 848                                 }
 849                                 # first link
 850                                 if($first_prefix) {
 851                                         $prefix = $first_prefix;
 852                                         $first_prefix = false;
 853                                 }
 854                         }
 855                         wfProfileOut( "$fname-prefixhandling" );
 856
 857                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 858                                 $text = $m[2];
 859                                 # fix up urlencoded title texts
 860                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 861                                 $trail = $m[3];
 862                         } else { # Invalid form; output directly
 863                                 $s .= $prefix . "[[" . $line ;
 864                                 continue;
 865                         }
 866
 867                         /* Valid link forms:
 868                         Foobar -- normal
 869                         :Foobar -- override special treatment of prefix (images, language links)
 870                         /Foobar -- convert to CurrentPage/Foobar
 871                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 872                         */
 873                         $c = substr($m[1],0,1);
 874                         $noforce = ($c != ":");
 875                         if( $c == "/" ) { # subpage
 876                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 877                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 878                                         $noslash=$m[1];
 879                                 } else {
 880                                         $noslash=substr($m[1],1);
 881                                 }
 882                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 883                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 884                                         if( "" == $text ) {
 885                                                 $text= $m[1];
 886                                         } # this might be changed for ugliness reasons
 887                                 } else {
 888                                         $link = $noslash; # no subpage allowed, use standard link
 889                                 }
 890                         } elseif( $noforce ) { # no subpage
 891                                 $link = $m[1];
 892                         } else {
 893                                 $link = substr( $m[1], 1 );
 894                         }
 895                         $wasblank = ( "" == $text );
 896                         if( $wasblank )
 897                         $text = $link;
 898
 899                         $nt = Title::newFromText( $link );
 900                         if( !$nt ) {
 901                                 $s .= $prefix . "[[" . $line;
 902                                 continue;
 903                         }
 904                         $ns = $nt->getNamespace();
 905                         $iw = $nt->getInterWiki();
 906                         if( $noforce ) {
 907                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 908                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 909                                         $tmp = $prefix . $trail ;
 910                                         $s .= (trim($tmp) == '')? '': $tmp;
 911                                         continue;
 912                                 }
 913                                 if ( $ns == $image ) {
 914                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 915                                         $wgLinkCache->addImageLinkObj( $nt );
 916                                         continue;
 917                                 }
 918                                 if ( $ns == $category ) {
 919                                         $t = $nt->getText() ;
 920                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 921
 922                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 923                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 924                                         $wgLinkCache->resume();
 925
 926                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 927                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 928                                         $this->mOutput->mCategoryLinks[] = $t ;
 929                                         $s .= $prefix . $trail ;
 930                                         continue;
 931                                 }
 932                         }
 933                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 934                         ( strpos( $link, "#" ) == FALSE ) ) {
 935                                 # Self-links are handled specially; generally de-link and change to bold.
 936                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 937                                 continue;
 938                         }
 939
 940                         if( $ns == $media ) {
 941                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 942                                 $wgLinkCache->addImageLinkObj( $nt );
 943                                 continue;
 944                         } elseif( $ns == $special ) {
 945                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 946                                 continue;
 947                         }
 948                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail, $prefix );
 949                 }
 950                 wfProfileOut( $fname );
 951                 return $s;
 952         }
 953
 954         # Some functions here used by doBlockLevels()
 955         #
 956         /* private */ function closeParagraph()
 957         {
 958                 $result = "";
 959                 if ( '' != $this->mLastSection ) {
 960                         $result = "</" . $this->mLastSection  . ">\n";
 961                 }
 962                 $this->mInPre = false;
 963                 $this->mLastSection = "";
 964                 return $result;
 965         }
 966         # getCommon() returns the length of the longest common substring
 967         # of both arguments, starting at the beginning of both.
 968         #
 969         /* private */ function getCommon( $st1, $st2 )
 970         {
 971                 $fl = strlen( $st1 );
 972                 $shorter = strlen( $st2 );
 973                 if ( $fl < $shorter ) { $shorter = $fl; }
 974
 975                 for ( $i = 0; $i < $shorter; ++$i ) {
 976                         if ( $st1{$i} != $st2{$i} ) { break; }
 977                 }
 978                 return $i;
 979         }
 980         # These next three functions open, continue, and close the list
 981         # element appropriate to the prefix character passed into them.
 982         #
 983         /* private */ function openList( $char )
 984     {
 985                 $result = $this->closeParagraph();
 986
 987                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 988                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 989                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 990                 else if ( ";" == $char ) {
 991                         $result .= "<dl><dt>";
 992                         $this->mDTopen = true;
 993                 }
 994                 else { $result = "<!-- ERR 1 -->"; }
 995
 996                 return $result;
 997         }
 998
 999         /* private */ function nextItem( $char )
1000         {
1001                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1002                 else if ( ":" == $char || ";" == $char ) {
1003                         $close = "</dd>";
1004                         if ( $this->mDTopen ) { $close = "</dt>"; }
1005                         if ( ";" == $char ) {
1006                                 $this->mDTopen = true;
1007                                 return $close . "<dt>";
1008                         } else {
1009                                 $this->mDTopen = false;
1010                                 return $close . "<dd>";
1011                         }
1012                 }
1013                 return "<!-- ERR 2 -->";
1014         }
1015
1016         /* private */function closeList( $char )
1017         {
1018                 if ( "*" == $char ) { $text = "</li></ul>"; }
1019                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1020                 else if ( ":" == $char ) {
1021                         if ( $this->mDTopen ) {
1022                                 $this->mDTopen = false;
1023                                 $text = "</dt></dl>";
1024                         } else {
1025                                 $text = "</dd></dl>";
1026                         }
1027                 }
1028                 else {  return "<!-- ERR 3 -->"; }
1029                 return $text."\n";
1030         }
1031
1032         /* private */ function doBlockLevels( $text, $linestart ) {
1033                 $fname = "Parser::doBlockLevels";
1034                 wfProfileIn( $fname );
1035
1036                 # Parsing through the text line by line.  The main thing
1037                 # happening here is handling of block-level elements p, pre,
1038                 # and making lists from lines starting with * # : etc.
1039                 #
1040                 $textLines = explode( "\n", $text );
1041
1042                 $lastPrefix = $output = $lastLine = '';
1043                 $this->mDTopen = $inBlockElem = false;
1044                 $prefixLength = 0;
1045                 $paragraphStack = false;
1046
1047                 if ( !$linestart ) {
1048                         $output .= array_shift( $textLines );
1049                 }
1050                 foreach ( $textLines as $oLine ) {
1051                         $lastPrefixLength = strlen( $lastPrefix );
1052                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1053                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1054                         if (!$this->mInPre) {
1055                                 $this->mInPre = !empty($preOpenMatch);
1056                         }
1057                         if ( !$this->mInPre ) {
1058                                 # Multiple prefixes may abut each other for nested lists.
1059                                 $prefixLength = strspn( $oLine, "*#:;" );
1060                                 $pref = substr( $oLine, 0, $prefixLength );
1061
1062                                 # eh?
1063                                 $pref2 = str_replace( ";", ":", $pref );
1064                                 $t = substr( $oLine, $prefixLength );
1065                         } else {
1066                                 # Don't interpret any other prefixes in preformatted text
1067                                 $prefixLength = 0;
1068                                 $pref = $pref2 = '';
1069                                 $t = $oLine;
1070                         }
1071
1072                         # List generation
1073                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1074                                 # Same as the last item, so no need to deal with nesting or opening stuff
1075                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1076                                 $paragraphStack = false;
1077
1078                                 if ( ";" == substr( $pref, -1 ) ) {
1079                                         # The one nasty exception: definition lists work like this:
1080                                         # ; title : definition text
1081                                         # So we check for : in the remainder text to split up the
1082                                         # title and definition, without b0rking links.
1083                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1084                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1085                                                 $term = $match[1];
1086                                                 $output .= $term . $this->nextItem( ":" );
1087                                                 $t = $match[2];
1088                                         }
1089                                 }
1090                         } elseif( $prefixLength || $lastPrefixLength ) {
1091                                 # Either open or close a level...
1092                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1093                                 $paragraphStack = false;
1094
1095                                 while( $commonPrefixLength < $lastPrefixLength ) {
1096                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1097                                         --$lastPrefixLength;
1098                                 }
1099                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1100                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1101                                 }
1102                                 while ( $prefixLength > $commonPrefixLength ) {
1103                                         $char = substr( $pref, $commonPrefixLength, 1 );
1104                                         $output .= $this->openList( $char );
1105
1106                                         if ( ";" == $char ) {
1107                                                 # FIXME: This is dupe of code above
1108                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1109                                                         $term = $match[1];
1110                                                         $output .= $term . $this->nextItem( ":" );
1111                                                         $t = $match[2];
1112                                                 }
1113                                         }
1114                                         ++$commonPrefixLength;
1115                                 }
1116                                 $lastPrefix = $pref2;
1117                         }
1118                         if( 0 == $prefixLength ) {
1119                                 # No prefix (not in list)--go to paragraph mode
1120                                 $uniq_prefix = UNIQ_PREFIX;
1121                                 // XXX: use a stack for nestable elements like span, table and div
1122                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i", $t );
1123                                 $closematch = preg_match(
1124                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1125                                         "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1126                                 if ( $openmatch or $closematch ) {
1127                                         $paragraphStack = false;
1128                                         $output .= $this->closeParagraph();
1129                                         if($preOpenMatch and !$preCloseMatch) {
1130                                                 $this->mInPre = true;
1131                                         }
1132                                         if ( $closematch  ) {
1133                                                 $inBlockElem = false;
1134                                         } else {
1135                                                 $inBlockElem = true;
1136                                         }
1137                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1138                                         if ( " " == $t{0} and trim($t) != '' ) {
1139                                                 // pre
1140                                                 if ($this->mLastSection != 'pre') {
1141                                                         $paragraphStack = false;
1142                                                         $output .= $this->closeParagraph().'<pre>';
1143                                                         $this->mLastSection = 'pre';
1144                                                 }
1145                                         } else {
1146                                                 // paragraph
1147                                                 if ( '' == trim($t) ) {
1148                                                         if ( $paragraphStack ) {
1149                                                                 $output .= $paragraphStack.'<br />';
1150                                                                 $paragraphStack = false;
1151                                                                 $this->mLastSection = 'p';
1152                                                         } else {
1153                                                                 if ($this->mLastSection != 'p' ) {
1154                                                                         $output .= $this->closeParagraph();
1155                                                                         $this->mLastSection = '';
1156                                                                         $paragraphStack = "<p>";
1157                                                                 } else {
1158                                                                         $paragraphStack = '</p><p>';
1159                                                                 }
1160                                                         }
1161                                                 } else {
1162                                                         if ( $paragraphStack ) {
1163                                                                 $output .= $paragraphStack;
1164                                                                 $paragraphStack = false;
1165                                                                 $this->mLastSection = 'p';
1166                                                         } else if ($this->mLastSection != 'p') {
1167                                                                 $output .= $this->closeParagraph().'<p>';
1168                                                                 $this->mLastSection = 'p';
1169                                                         }
1170                                                 }
1171                                         }
1172                                 }
1173                         }
1174                         if ($paragraphStack === false) {
1175                                 $output .= $t."\n";
1176                         }
1177                 }
1178                 while ( $prefixLength ) {
1179                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1180                         --$prefixLength;
1181                 }
1182                 if ( "" != $this->mLastSection ) {
1183                         $output .= "</" . $this->mLastSection . ">";
1184                         $this->mLastSection = "";
1185                 }
1186
1187                 wfProfileOut( $fname );
1188                 return $output;
1189         }
1190
1191         function getVariableValue( $index ) {
1192                 global $wgLang, $wgSitename, $wgServer;
1193
1194                 switch ( $index ) {
1195                         case MAG_CURRENTMONTH:
1196                                 return date( "m" );
1197                         case MAG_CURRENTMONTHNAME:
1198                                 return $wgLang->getMonthName( date("n") );
1199                         case MAG_CURRENTMONTHNAMEGEN:
1200                                 return $wgLang->getMonthNameGen( date("n") );
1201                         case MAG_CURRENTDAY:
1202                                 return date("j");
1203                         case MAG_PAGENAME:
1204                                 return $this->mTitle->getText();
1205                         case MAG_NAMESPACE:
1206                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1207                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1208                         case MAG_CURRENTDAYNAME:
1209                                 return $wgLang->getWeekdayName( date("w")+1 );
1210                         case MAG_CURRENTYEAR:
1211                                 return date( "Y" );
1212                         case MAG_CURRENTTIME:
1213                                 return $wgLang->time( wfTimestampNow(), false );
1214                         case MAG_NUMBEROFARTICLES:
1215                                 return wfNumberOfArticles();
1216                         case MAG_SITENAME:
1217                                 return $wgSitename;
1218                         case MAG_SERVER:
1219                                 return $wgServer;
1220                         default:
1221                                 return NULL;
1222                 }
1223         }
1224
1225         function initialiseVariables()
1226         {
1227                 global $wgVariableIDs;
1228                 $this->mVariables = array();
1229                 foreach ( $wgVariableIDs as $id ) {
1230                         $mw =& MagicWord::get( $id );
1231                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1232                 }
1233         }
1234
1235         /* private */ function replaceVariables( $text, $args = array() )
1236         {
1237                 global $wgLang, $wgScript, $wgArticlePath;
1238
1239                 $fname = "Parser::replaceVariables";
1240                 wfProfileIn( $fname );
1241
1242                 $bail = false;
1243                 if ( !$this->mVariables ) {
1244                         $this->initialiseVariables();
1245                 }
1246                 $titleChars = Title::legalChars();
1247                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1248
1249                 # This function is called recursively. To keep track of arguments we need a stack:
1250                 array_push( $this->mArgStack, $args );
1251
1252                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1253                 $GLOBALS['wgCurParser'] =& $this;
1254
1255
1256                 if ( $this->mOutputType == OT_HTML ) {
1257                         # Variable substitution
1258                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1259
1260                         # Argument substitution
1261                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1262                 }
1263                 # Template substitution
1264                 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1265                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1266
1267                 array_pop( $this->mArgStack );
1268
1269                 wfProfileOut( $fname );
1270                 return $text;
1271         }
1272
1273         function variableSubstitution( $matches )
1274         {
1275                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1276                         $text = $this->mVariables[$matches[1]];
1277                         $this->mOutput->mContainsOldMagic = true;
1278                 } else {
1279                         $text = $matches[0];
1280                 }
1281                 return $text;
1282         }
1283
1284         function braceSubstitution( $matches )
1285         {
1286                 global $wgLinkCache, $wgLang;
1287                 $fname = "Parser::braceSubstitution";
1288                 $found = false;
1289                 $nowiki = false;
1290                 $noparse = false;
1291
1292                 $title = NULL;
1293
1294                 # $newline is an optional newline character before the braces
1295                 # $part1 is the bit before the first |, and must contain only title characters
1296                 # $args is a list of arguments, starting from index 0, not including $part1
1297
1298                 $newline = $matches[1];
1299                 $part1 = $matches[2];
1300                 # If the third subpattern matched anything, it will start with |
1301                 if ( $matches[3] !== "" ) {
1302                         $args = explode( "|", substr( $matches[3], 1 ) );
1303                 } else {
1304                         $args = array();
1305                 }
1306                 $argc = count( $args );
1307
1308                 # {{{}}}
1309                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1310                         $text = $matches[0];
1311                         $found = true;
1312                         $noparse = true;
1313                 }
1314
1315                 # SUBST
1316                 if ( !$found ) {
1317                         $mwSubst =& MagicWord::get( MAG_SUBST );
1318                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1319                                 if ( $this->mOutputType != OT_WIKI ) {
1320                                         # Invalid SUBST not replaced at PST time
1321                                         # Return without further processing
1322                                         $text = $matches[0];
1323                                         $found = true;
1324                                         $noparse= true;
1325                                 }
1326                         } elseif ( $this->mOutputType == OT_WIKI ) {
1327                                 # SUBST not found in PST pass, do nothing
1328                                 $text = $matches[0];
1329                                 $found = true;
1330                         }
1331                 }
1332
1333                 # MSG, MSGNW and INT
1334                 if ( !$found ) {
1335                         # Check for MSGNW:
1336                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1337                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1338                                 $nowiki = true;
1339                         } else {
1340                                 # Remove obsolete MSG:
1341                                 $mwMsg =& MagicWord::get( MAG_MSG );
1342                                 $mwMsg->matchStartAndRemove( $part1 );
1343                         }
1344
1345                         # Check if it is an internal message
1346                         $mwInt =& MagicWord::get( MAG_INT );
1347                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1348                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1349                                         $text = wfMsgReal( $part1, $args, true );
1350                                         $found = true;
1351                                 }
1352                         }
1353                 }
1354
1355                 # NS
1356                 if ( !$found ) {
1357                         # Check for NS: (namespace expansion)
1358                         $mwNs = MagicWord::get( MAG_NS );
1359                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1360                                 if ( intval( $part1 ) ) {
1361                                         $text = $wgLang->getNsText( intval( $part1 ) );
1362                                         $found = true;
1363                                 } else {
1364                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1365                                         if ( !is_null( $index ) ) {
1366                                                 $text = $wgLang->getNsText( $index );
1367                                                 $found = true;
1368                                         }
1369                                 }
1370                         }
1371                 }
1372
1373                 # LOCALURL and LOCALURLE
1374                 if ( !$found ) {
1375                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1376                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1377
1378                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1379                                 $func = 'getLocalURL';
1380                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1381                                 $func = 'escapeLocalURL';
1382                         } else {
1383                                 $func = '';
1384                         }
1385
1386                         if ( $func !== '' ) {
1387                                 $title = Title::newFromText( $part1 );
1388                                 if ( !is_null( $title ) ) {
1389                                         if ( $argc > 0 ) {
1390                                                 $text = $title->$func( $args[0] );
1391                                         } else {
1392                                                 $text = $title->$func();
1393                                         }
1394                                         $found = true;
1395                                 }
1396                         }
1397                 }
1398
1399                 # Internal variables
1400                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1401                         $text = $this->mVariables[$part1];
1402                         $found = true;
1403                         $this->mOutput->mContainsOldMagic = true;
1404                 }
1405 /*
1406                 # Arguments input from the caller
1407                 $inputArgs = end( $this->mArgStack );
1408                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1409                         $text = $inputArgs[$part1];
1410                         $found = true;
1411                 }
1412 */
1413                 # Load from database
1414                 if ( !$found ) {
1415                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1416                         if ( !is_null( $title ) && !$title->isExternal() ) {
1417                                 # Check for excessive inclusion
1418                                 $dbk = $title->getPrefixedDBkey();
1419                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1420                                         $article = new Article( $title );
1421                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1422                                         if ( $articleContent !== false ) {
1423                                                 $found = true;
1424                                                 $text = $articleContent;
1425
1426                                         }
1427                                 }
1428
1429                                 # If the title is valid but undisplayable, make a link to it
1430                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1431                                         $text = "[[" . $title->getPrefixedText() . "]]";
1432                                         $found = true;
1433                                 }
1434                         }
1435                 }
1436
1437                 # Recursive parsing, escaping and link table handling
1438                 # Only for HTML output
1439                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1440                         $text = wfEscapeWikiText( $text );
1441                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1442                         # Clean up argument array
1443                         $assocArgs = array();
1444                         $index = 1;
1445                         foreach( $args as $arg ) {
1446                                 $eqpos = strpos( $arg, "=" );
1447                                 if ( $eqpos === false ) {
1448                                         $assocArgs[$index++] = $arg;
1449                                 } else {
1450                                         $name = trim( substr( $arg, 0, $eqpos ) );
1451                                         $value = trim( substr( $arg, $eqpos+1 ) );
1452                                         if ( $value === false ) {
1453                                                 $value = "";
1454                                         }
1455                                         if ( $name !== false ) {
1456                                                 $assocArgs[$name] = $value;
1457                                         }
1458                                 }
1459                         }
1460
1461                         # Do not enter included links in link table
1462                         if ( !is_null( $title ) ) {
1463                                 $wgLinkCache->suspend();
1464                         }
1465
1466                         # Run full parser on the included text
1467                         $text = $this->stripParse( $text, $newline, $assocArgs );
1468
1469                         # Resume the link cache and register the inclusion as a link
1470                         if ( !is_null( $title ) ) {
1471                                 $wgLinkCache->resume();
1472                                 $wgLinkCache->addLinkObj( $title );
1473                         }
1474                 }
1475
1476                 if ( !$found ) {
1477                         return $matches[0];
1478                 } else {
1479                         return $text;
1480                 }
1481         }
1482
1483         # Triple brace replacement -- used for template arguments
1484         function argSubstitution( $matches )
1485         {
1486                 $newline = $matches[1];
1487                 $arg = trim( $matches[2] );
1488                 $text = $matches[0];
1489                 $inputArgs = end( $this->mArgStack );
1490
1491                 if ( array_key_exists( $arg, $inputArgs ) ) {
1492                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1493                 }
1494
1495                 return $text;
1496         }
1497
1498         # Returns true if the function is allowed to include this entity
1499         function incrementIncludeCount( $dbk )
1500         {
1501                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1502                         $this->mIncludeCount[$dbk] = 0;
1503                 }
1504                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1505                         return true;
1506                 } else {
1507                         return false;
1508                 }
1509         }
1510
1511
1512         # Cleans up HTML, removes dangerous tags and attributes
1513         /* private */ function removeHTMLtags( $text )
1514         {
1515                 global $wgUseTidy, $wgUserHtml;
1516                 $fname = "Parser::removeHTMLtags";
1517                 wfProfileIn( $fname );
1518
1519                 if( $wgUserHtml ) {
1520                         $htmlpairs = array( # Tags that must be closed
1521                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1522                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1523                                 "strike", "strong", "tt", "var", "div", "center",
1524                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1525                                 "ruby", "rt" , "rb" , "rp", "p"
1526                         );
1527                         $htmlsingle = array(
1528                                 "br", "hr", "li", "dt", "dd"
1529                         );
1530                         $htmlnest = array( # Tags that can be nested--??
1531                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1532                                 "dl", "font", "big", "small", "sub", "sup"
1533                         );
1534                         $tabletags = array( # Can only appear inside table
1535                                 "td", "th", "tr"
1536                         );
1537                 } else {
1538                         $htmlpairs = array();
1539                         $htmlsingle = array();
1540                         $htmlnest = array();
1541                         $tabletags = array();
1542                 }
1543
1544                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1545                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1546
1547                 $htmlattrs = $this->getHTMLattrs () ;
1548
1549                 # Remove HTML comments
1550                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1551
1552                 $bits = explode( "<", $text );
1553                 $text = array_shift( $bits );
1554                 if(!$wgUseTidy) {
1555                         $tagstack = array(); $tablestack = array();
1556                         foreach ( $bits as $x ) {
1557                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1558                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1559                                 $x, $regs );
1560                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1561                                 error_reporting( $prev );
1562
1563                                 $badtag = 0 ;
1564                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1565                                         # Check our stack
1566                                         if ( $slash ) {
1567                                                 # Closing a tag...
1568                                                 if ( ! in_array( $t, $htmlsingle ) &&
1569                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1570                                                         @array_push( $tagstack, $ot );
1571                                                         $badtag = 1;
1572                                                 } else {
1573                                                         if ( $t == "table" ) {
1574                                                                 $tagstack = array_pop( $tablestack );
1575                                                         }
1576                                                         $newparams = "";
1577                                                 }
1578                                         } else {
1579                                                 # Keep track for later
1580                                                 if ( in_array( $t, $tabletags ) &&
1581                                                 ! in_array( "table", $tagstack ) ) {
1582                                                         $badtag = 1;
1583                                                 } else if ( in_array( $t, $tagstack ) &&
1584                                                 ! in_array ( $t , $htmlnest ) ) {
1585                                                         $badtag = 1 ;
1586                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1587                                                         if ( $t == "table" ) {
1588                                                                 array_push( $tablestack, $tagstack );
1589                                                                 $tagstack = array();
1590                                                         }
1591                                                         array_push( $tagstack, $t );
1592                                                 }
1593                                                 # Strip non-approved attributes from the tag
1594                                                 $newparams = $this->fixTagAttributes($params);
1595
1596                                         }
1597                                         if ( ! $badtag ) {
1598                                                 $rest = str_replace( ">", "&gt;", $rest );
1599                                                 $text .= "<$slash$t $newparams$brace$rest";
1600                                                 continue;
1601                                         }
1602                                 }
1603                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1604                         }
1605                         # Close off any remaining tags
1606                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1607                                 $text .= "</$t>\n";
1608                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1609                         }
1610                 } else {
1611                         # this might be possible using tidy itself
1612                         foreach ( $bits as $x ) {
1613                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1614                                 $x, $regs );
1615                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1616                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1617                                         $newparams = $this->fixTagAttributes($params);
1618                                         $rest = str_replace( ">", "&gt;", $rest );
1619                                         $text .= "<$slash$t $newparams$brace$rest";
1620                                 } else {
1621                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1622                                 }
1623                         }
1624                 }
1625                 wfProfileOut( $fname );
1626                 return $text;
1627         }
1628
1629
1630 /*
1631  *
1632  * This function accomplishes several tasks:
1633  * 1) Auto-number headings if that option is enabled
1634  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1635  * 3) Add a Table of contents on the top for users who have enabled the option
1636  * 4) Auto-anchor headings
1637  *
1638  * It loops through all headlines, collects the necessary data, then splits up the
1639  * string and re-inserts the newly formatted headlines.
1640  *
1641  */
1642
1643         /* private */ function formatHeadings( $text, $isMain=true )
1644         {
1645                 global $wgInputEncoding;
1646
1647                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1648                 $doShowToc = $this->mOptions->getShowToc();
1649                 if( !$this->mTitle->userCanEdit() ) {
1650                         $showEditLink = 0;
1651                         $rightClickHack = 0;
1652                 } else {
1653                         $showEditLink = $this->mOptions->getEditSection();
1654                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1655                 }
1656
1657                 # Inhibit editsection links if requested in the page
1658                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1659                 if( $esw->matchAndRemove( $text ) ) {
1660                         $showEditLink = 0;
1661                 }
1662                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1663                 # do not add TOC
1664                 $mw =& MagicWord::get( MAG_NOTOC );
1665                 if( $mw->matchAndRemove( $text ) ) {
1666                         $doShowToc = 0;
1667                 }
1668
1669                 # never add the TOC to the Main Page. This is an entry page that should not
1670                 # be more than 1-2 screens large anyway
1671                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1672                         $doShowToc = 0;
1673                 }
1674
1675                 # Get all headlines for numbering them and adding funky stuff like [edit]
1676                 # links - this is for later, but we need the number of headlines right now
1677                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1678
1679                 # if there are fewer than 4 headlines in the article, do not show TOC
1680                 if( $numMatches < 4 ) {
1681                         $doShowToc = 0;
1682                 }
1683
1684                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1685                 # override above conditions and always show TOC
1686                 $mw =& MagicWord::get( MAG_FORCETOC );
1687                 if ($mw->matchAndRemove( $text ) ) {
1688                         $doShowToc = 1;
1689                 }
1690
1691
1692                 # We need this to perform operations on the HTML
1693                 $sk =& $this->mOptions->getSkin();
1694
1695                 # headline counter
1696                 $headlineCount = 0;
1697
1698                 # Ugh .. the TOC should have neat indentation levels which can be
1699                 # passed to the skin functions. These are determined here
1700                 $toclevel = 0;
1701                 $toc = "";
1702                 $full = "";
1703                 $head = array();
1704                 $sublevelCount = array();
1705                 $level = 0;
1706                 $prevlevel = 0;
1707                 foreach( $matches[3] as $headline ) {
1708                         $numbering = "";
1709                         if( $level ) {
1710                                 $prevlevel = $level;
1711                         }
1712                         $level = $matches[1][$headlineCount];
1713                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1714                                 # reset when we enter a new level
1715                                 $sublevelCount[$level] = 0;
1716                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1717                                 $toclevel += $level - $prevlevel;
1718                         }
1719                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1720                                 # reset when we step back a level
1721                                 $sublevelCount[$level+1]=0;
1722                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1723                                 $toclevel -= $prevlevel - $level;
1724                         }
1725                         # count number of headlines for each level
1726                         @$sublevelCount[$level]++;
1727                         if( $doNumberHeadings || $doShowToc ) {
1728                                 $dot = 0;
1729                                 for( $i = 1; $i <= $level; $i++ ) {
1730                                         if( !empty( $sublevelCount[$i] ) ) {
1731                                                 if( $dot ) {
1732                                                         $numbering .= ".";
1733                                                 }
1734                                                 $numbering .= $sublevelCount[$i];
1735                                                 $dot = 1;
1736                                         }
1737                                 }
1738                         }
1739
1740                         # The canonized header is a version of the header text safe to use for links
1741                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1742                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1743                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1744
1745                         # strip out HTML
1746                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1747                         $tocline = trim( $canonized_headline );
1748                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1749                         $replacearray = array(
1750                                 '%3A' => ':',
1751                                 '%' => '.'
1752                         );
1753                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1754                         $refer[$headlineCount] = $canonized_headline;
1755
1756                         # count how many in assoc. array so we can track dupes in anchors
1757                         @$refers[$canonized_headline]++;
1758                         $refcount[$headlineCount]=$refers[$canonized_headline];
1759
1760                         # Prepend the number to the heading text
1761
1762                         if( $doNumberHeadings || $doShowToc ) {
1763                                 $tocline = $numbering . " " . $tocline;
1764
1765                                 # Don't number the heading if it is the only one (looks silly)
1766                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1767                                         # the two are different if the line contains a link
1768                                         $headline=$numbering . " " . $headline;
1769                                 }
1770                         }
1771
1772                         # Create the anchor for linking from the TOC to the section
1773                         $anchor = $canonized_headline;
1774                         if($refcount[$headlineCount] > 1 ) {
1775                                 $anchor .= "_" . $refcount[$headlineCount];
1776                         }
1777                         if( $doShowToc ) {
1778                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1779                         }
1780                         if( $showEditLink ) {
1781                                 if ( empty( $head[$headlineCount] ) ) {
1782                                         $head[$headlineCount] = "";
1783                                 }
1784                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1785                         }
1786
1787                         # Add the edit section span
1788                         if( $rightClickHack ) {
1789                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1790                         }
1791
1792                         # give headline the correct <h#> tag
1793                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1794
1795                         $headlineCount++;
1796                 }
1797
1798                 if( $doShowToc ) {
1799                         $toclines = $headlineCount;
1800                         $toc .= $sk->tocUnindent( $toclevel );
1801                         $toc = $sk->tocTable( $toc );
1802                 }
1803
1804                 # split up and insert constructed headlines
1805
1806                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1807                 $i = 0;
1808
1809                 foreach( $blocks as $block ) {
1810                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1811                             # This is the [edit] link that appears for the top block of text when
1812                                 # section editing is enabled
1813
1814                                 # Disabled because it broke block formatting
1815                                 # For example, a bullet point in the top line
1816                                 # $full .= $sk->editSectionLink(0);
1817                         }
1818                         $full .= $block;
1819                         if( $doShowToc && !$i && $isMain) {
1820                         # Top anchor now in skin
1821                                 $full = $full.$toc;
1822                         }
1823
1824                         if( !empty( $head[$i] ) ) {
1825                                 $full .= $head[$i];
1826                         }
1827                         $i++;
1828                 }
1829
1830                 return $full;
1831         }
1832
1833         /* private */ function magicISBN( $text )
1834         {
1835                 global $wgLang;
1836
1837                 $a = split( "ISBN ", " $text" );
1838                 if ( count ( $a ) < 2 ) return $text;
1839                 $text = substr( array_shift( $a ), 1);
1840                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1841
1842                 foreach ( $a as $x ) {
1843                         $isbn = $blank = "" ;
1844                         while ( " " == $x{0} ) {
1845                                 $blank .= " ";
1846                                 $x = substr( $x, 1 );
1847                         }
1848                         while ( strstr( $valid, $x{0} ) != false ) {
1849                                 $isbn .= $x{0};
1850                                 $x = substr( $x, 1 );
1851                         }
1852                         $num = str_replace( "-", "", $isbn );
1853                         $num = str_replace( " ", "", $num );
1854
1855                         if ( "" == $num ) {
1856                                 $text .= "ISBN $blank$x";
1857                         } else {
1858                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1859                                 $text .= "<a href=\"" .
1860                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1861                                         "\" class=\"internal\">ISBN $isbn</a>";
1862                                 $text .= $x;
1863                         }
1864                 }
1865                 return $text;
1866         }
1867         /* private */ function magicRFC( $text )
1868         {
1869                 global $wgLang;
1870
1871                 $a = split( "RFC ", " $text" );
1872                 if ( count ( $a ) < 2 ) return $text;
1873                 $text = substr( array_shift( $a ), 1);
1874                 $valid = "0123456789";
1875
1876                 foreach ( $a as $x ) {
1877                         $rfc = $blank = "" ;
1878                         while ( " " == $x{0} ) {
1879                                 $blank .= " ";
1880                                 $x = substr( $x, 1 );
1881                         }
1882                         while ( strstr( $valid, $x{0} ) != false ) {
1883                                 $rfc .= $x{0};
1884                                 $x = substr( $x, 1 );
1885                         }
1886
1887                         if ( "" == $rfc ) {
1888                                 $text .= "RFC $blank$x";
1889                         } else {
1890                                 $url = wfmsg( "rfcurl" );
1891                                 $url = str_replace( "$1", $rfc, $url);
1892                                 $sk =& $this->mOptions->getSkin();
1893                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1894                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1895                         }
1896                 }
1897                 return $text;
1898         }
1899
1900         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1901         {
1902                 $this->mOptions = $options;
1903                 $this->mTitle =& $title;
1904                 $this->mOutputType = OT_WIKI;
1905
1906                 if ( $clearState ) {
1907                         $this->clearState();
1908                 }
1909
1910                 $stripState = false;
1911                 $pairs = array(
1912                         "\r\n" => "\n",
1913                         );
1914                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1915                 // now with regexes
1916                 /*
1917                 $pairs = array(
1918                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1919                         "/<br *?>/i" => "<br />",
1920                 );
1921                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1922                 */
1923                 $text = $this->strip( $text, $stripState, false );
1924                 $text = $this->pstPass2( $text, $user );
1925                 $text = $this->unstrip( $text, $stripState );
1926                 $text = $this->unstripNoWiki( $text, $stripState );
1927                 return $text;
1928         }
1929
1930         /* private */ function pstPass2( $text, &$user )
1931         {
1932                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1933
1934                 # Variable replacement
1935                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1936                 $text = $this->replaceVariables( $text );
1937
1938                 # Signatures
1939                 #
1940                 $n = $user->getName();
1941                 $k = $user->getOption( "nickname" );
1942                 if ( "" == $k ) { $k = $n; }
1943                 if(isset($wgLocaltimezone)) {
1944                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1945                 }
1946                 /* Note: this is an ugly timezone hack for the European wikis */
1947                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1948                   " (" . date( "T" ) . ")";
1949                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1950
1951                 $text = preg_replace( "/~~~~~/", $d, $text );
1952                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1953                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1954                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1955                   Namespace::getUser() ) . ":$n|$k]]", $text );
1956
1957                 # Context links: [[|name]] and [[name (context)|]]
1958                 #
1959                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1960                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1961                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1962                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1963
1964                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1965                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1966                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1967                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1968                                                                                                                 # [[ns:page (cont)|]]
1969                 $context = "";
1970                 $t = $this->mTitle->getText();
1971                 if ( preg_match( $conpat, $t, $m ) ) {
1972                         $context = $m[2];
1973                 }
1974                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1975                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1976                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1977
1978                 if ( "" == $context ) {
1979                         $text = preg_replace( $p2, "[[\\1]]", $text );
1980                 } else {
1981                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1982                 }
1983
1984                 /*
1985                 $mw =& MagicWord::get( MAG_SUBST );
1986                 $wgCurParser = $this->fork();
1987                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1988                 $this->merge( $wgCurParser );
1989                 */
1990
1991                 # Trim trailing whitespace
1992                 # MAG_END (__END__) tag allows for trailing
1993                 # whitespace to be deliberately included
1994                 $text = rtrim( $text );
1995                 $mw =& MagicWord::get( MAG_END );
1996                 $mw->matchAndRemove( $text );
1997
1998                 return $text;
1999         }
2000
2001         # Set up some variables which are usually set up in parse()
2002         # so that an external function can call some class members with confidence
2003         function startExternalParse( &$title, $options, $outputType, $clearState = true )
2004         {
2005                 $this->mTitle =& $title;
2006                 $this->mOptions = $options;
2007                 $this->mOutputType = $outputType;
2008                 if ( $clearState ) {
2009                         $this->clearState();
2010                 }
2011         }
2012
2013         function transformMsg( $text, $options ) {
2014                 global $wgTitle;
2015                 static $executing = false;
2016
2017                 # Guard against infinite recursion
2018                 if ( $executing ) {
2019                         return $text;
2020                 }
2021                 $executing = true;
2022
2023                 $this->mTitle = $wgTitle;
2024                 $this->mOptions = $options;
2025                 $this->mOutputType = OT_MSG;
2026                 $this->clearState();
2027                 $text = $this->replaceVariables( $text );
2028
2029                 $executing = false;
2030                 return $text;
2031         }
2032 }
2033
2034 class ParserOutput
2035 {
2036         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2037         var $mCacheTime; # Used in ParserCache
2038
2039         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2040                 $containsOldMagic = false )
2041         {
2042                 $this->mText = $text;
2043                 $this->mLanguageLinks = $languageLinks;
2044                 $this->mCategoryLinks = $categoryLinks;
2045                 $this->mContainsOldMagic = $containsOldMagic;
2046                 $this->mCacheTime = "";
2047         }
2048
2049         function getText() { return $this->mText; }
2050         function getLanguageLinks() { return $this->mLanguageLinks; }
2051         function getCategoryLinks() { return $this->mCategoryLinks; }
2052         function getCacheTime() { return $this->mCacheTime; }
2053         function containsOldMagic() { return $this->mContainsOldMagic; }
2054         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2055         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2056         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2057         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2058         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2059
2060         function merge( $other ) {
2061                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2062                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2063                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2064         }
2065
2066 }
2067
2068 class ParserOptions
2069 {
2070         # All variables are private
2071         var $mUseTeX;                    # Use texvc to expand <math> tags
2072         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2073         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2074         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2075         var $mAllowExternalImages;       # Allow external images inline
2076         var $mSkin;                      # Reference to the preferred skin
2077         var $mDateFormat;                # Date format index
2078         var $mEditSection;               # Create "edit section" links
2079         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2080         var $mNumberHeadings;            # Automatically number headings
2081         var $mShowToc;                   # Show table of contents
2082
2083         function getUseTeX() { return $this->mUseTeX; }
2084         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2085         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2086         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2087         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2088         function getSkin() { return $this->mSkin; }
2089         function getDateFormat() { return $this->mDateFormat; }
2090         function getEditSection() { return $this->mEditSection; }
2091         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2092         function getNumberHeadings() { return $this->mNumberHeadings; }
2093         function getShowToc() { return $this->mShowToc; }
2094
2095         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2096         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2097         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2098         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2099         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2100         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2101         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2102         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2103         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2104         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2105         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2106
2107         /* static */ function newFromUser( &$user )
2108         {
2109                 $popts = new ParserOptions;
2110                 $popts->initialiseFromUser( $user );
2111                 return $popts;
2112         }
2113
2114         function initialiseFromUser( &$userInput )
2115         {
2116                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2117
2118                 if ( !$userInput ) {
2119                         $user = new User;
2120                         $user->setLoaded( true );
2121                 } else {
2122                         $user =& $userInput;
2123                 }
2124
2125                 $this->mUseTeX = $wgUseTeX;
2126                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2127                 $this->mUseDynamicDates = $wgUseDynamicDates;
2128                 $this->mInterwikiMagic = $wgInterwikiMagic;
2129                 $this->mAllowExternalImages = $wgAllowExternalImages;
2130                 $this->mSkin =& $user->getSkin();
2131                 $this->mDateFormat = $user->getOption( "date" );
2132                 $this->mEditSection = $user->getOption( "editsection" );
2133                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2134                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2135                 $this->mShowToc = $user->getOption( "showtoc" );
2136         }
2137
2138
2139 }
2140
2141 # Regex callbacks, used in Parser::replaceVariables
2142 function wfBraceSubstitution( $matches )
2143 {
2144         global $wgCurParser;
2145         return $wgCurParser->braceSubstitution( $matches );
2146 }
2147
2148 function wfArgSubstitution( $matches )
2149 {
2150         global $wgCurParser;
2151         return $wgCurParser->argSubstitution( $matches );
2152 }
2153
2154 function wfVariableSubstitution( $matches )
2155 {
2156         global $wgCurParser;
2157         return $wgCurParser->variableSubstitution( $matches );
2158 }
2159
2160 ?>