includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80                 $this->mInPre = false;
  81         }
  82
  83         # First pass--just handle <nowiki> sections, pass the rest off
  84         # to internalParse() which does all the real work.
  85         #
  86         # Returns a ParserOutput
  87         #
  88         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  89         {
  90                 global $wgUseTidy;
  91                 $fname = "Parser::parse";
  92                 wfProfileIn( $fname );
  93
  94                 if ( $clearState ) {
  95                         $this->clearState();
  96                 }
  97
  98                 $this->mOptions = $options;
  99                 $this->mTitle =& $title;
 100                 $this->mOutputType = OT_HTML;
 101
 102                 $stripState = NULL;
 103                 $text = $this->strip( $text, $this->mStripState );
 104                 $text = $this->internalParse( $text, $linestart );
 105                 $text = $this->unstrip( $text, $this->mStripState );
 106                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 107                 if(!$wgUseTidy) {
 108                         $fixtags = array(
 109                                 # french spaces, last one Guillemet-left
 110                                 # only if there is something before the space
 111                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 112                                 # french spaces, Guillemet-right
 113                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 114                                 "/<hr *>/i" => '<hr />',
 115                                 "/<br *>/i" => '<br />',
 116                                 "/<center *>/i"=>'<div class="center">',
 117                                 "/<\\/center *>/i" => '</div>',
 118                                 # Clean up spare ampersands; note that we probably ought to be
 119                                 # more careful about named entities.
 120                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 121                         );
 122                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 123                 } else {
 124                         $fixtags = array(
 125                                 # french spaces, last one Guillemet-left
 126                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 127                                 # french spaces, Guillemet-right
 128                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 129                                 "/<center *>/i"=>'<div class="center">',
 130                                 "/<\\/center *>/i" => '</div>'
 131                         );
 132                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 133                 }
 134                 # only once and last
 135                 $text = $this->doBlockLevels( $text, $linestart );
 136                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 137                 if($wgUseTidy) {
 138                         $text = $this->tidy($text);
 139                 }
 140                 $this->mOutput->setText( $text );
 141                 wfProfileOut( $fname );
 142                 return $this->mOutput;
 143         }
 144
 145         /* static */ function getRandomString()
 146         {
 147                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 148         }
 149
 150         # Replaces all occurrences of <$tag>content</$tag> in the text
 151         # with a random marker and returns the new text. the output parameter
 152         # $content will be an associative array filled with data on the form
 153         # $unique_marker => content.
 154
 155         # If $content is already set, the additional entries will be appended
 156
 157         # If $tag is set to STRIP_COMMENTS, the function will extract
 158         # <!-- HTML comments -->
 159
 160         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 161                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 162                 if ( !$content ) {
 163                         $content = array( );
 164                 }
 165                 $n = 1;
 166                 $stripped = "";
 167
 168                 while ( "" != $text ) {
 169                         if($tag==STRIP_COMMENTS) {
 170                                 $p = preg_split( "/<!--/i", $text, 2 );
 171                         } else {
 172                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 173                         }
 174                         $stripped .= $p[0];
 175                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 176                                 $text = "";
 177                         } else {
 178                                 if($tag==STRIP_COMMENTS) {
 179                                         $q = preg_split( "/-->/i", $p[1], 2 );
 180                                 } else {
 181                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 182                                 }
 183                                 $marker = $rnd . sprintf("%08X", $n++);
 184                                 $content[$marker] = $q[0];
 185                                 $stripped .= $marker;
 186                                 $text = $q[1];
 187                         }
 188                 }
 189                 return $stripped;
 190         }
 191
 192         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 193         # If $render is set, performs necessary rendering operations on plugins
 194         # Returns the text, and fills an array with data needed in unstrip()
 195         # If the $state is already a valid strip state, it adds to the state
 196
 197         # When $stripcomments is set, HTML comments <!-- like this -->
 198         # will be stripped in addition to other tags. This is important
 199         # for section editing, where these comments cause confusion when
 200         # counting the sections in the wikisource
 201         function strip( $text, &$state, $stripcomments = false )
 202         {
 203                 $render = ($this->mOutputType == OT_HTML);
 204                 $nowiki_content = array();
 205                 $hiero_content = array();
 206                 $timeline_content = array();
 207                 $math_content = array();
 208                 $pre_content = array();
 209                 $comment_content = array();
 210
 211                 # Replace any instances of the placeholders
 212                 $uniq_prefix = UNIQ_PREFIX;
 213                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 214
 215                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 216                 foreach( $nowiki_content as $marker => $content ){
 217                         if( $render ){
 218                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 219                         } else {
 220                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 221                         }
 222                 }
 223
 224                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 225                 foreach( $hiero_content as $marker => $content ){
 226                         if( $render && $GLOBALS['wgUseWikiHiero']){
 227                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 228                         } else {
 229                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 230                         }
 231                 }
 232
 233                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 234                 foreach( $timeline_content as $marker => $content ){
 235                         if( $render && $GLOBALS['wgUseTimeline']){
 236                                 $timeline_content[$marker] = renderTimeline( $content );
 237                         } else {
 238                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 239                         }
 240                 }
 241
 242                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 243                 foreach( $math_content as $marker => $content ){
 244                         if( $render ) {
 245                                 if( $this->mOptions->getUseTeX() ) {
 246                                         $math_content[$marker] = renderMath( $content );
 247                                 } else {
 248                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 249                                 }
 250                         } else {
 251                                 $math_content[$marker] = "<math>$content</math>";
 252                         }
 253                 }
 254
 255                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 256                 foreach( $pre_content as $marker => $content ){
 257                         if( $render ){
 258                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 259                         } else {
 260                                 $pre_content[$marker] = "<pre>$content</pre>";
 261                         }
 262                 }
 263                 if($stripcomments) {
 264                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 265                         foreach( $comment_content as $marker => $content ){
 266                                 $comment_content[$marker] = "<!--$content-->";
 267                         }
 268                 }
 269
 270                 # Merge state with the pre-existing state, if there is one
 271                 if ( $state ) {
 272                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 273                         $state['hiero'] = $state['hiero'] + $hiero_content;
 274                         $state['timeline'] = $state['timeline'] + $timeline_content;
 275                         $state['math'] = $state['math'] + $math_content;
 276                         $state['pre'] = $state['pre'] + $pre_content;
 277                         $state['comment'] = $state['comment'] + $comment_content;
 278                 } else {
 279                         $state = array(
 280                           'nowiki' => $nowiki_content,
 281                           'hiero' => $hiero_content,
 282                           'timeline' => $timeline_content,
 283                           'math' => $math_content,
 284                           'pre' => $pre_content,
 285                           'comment' => $comment_content
 286                         );
 287                 }
 288                 return $text;
 289         }
 290
 291         # always call unstripNoWiki() after this one
 292         function unstrip( $text, &$state )
 293         {
 294                 # Must expand in reverse order, otherwise nested tags will be corrupted
 295                 $contentDict = end( $state );
 296                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 297                         if( key($state) != 'nowiki') {
 298                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 299                                         $text = str_replace( key( $contentDict ), $content, $text );
 300                                 }
 301                         }
 302                 }
 303
 304                 return $text;
 305         }
 306         # always call this after unstrip() to preserve the order
 307         function unstripNoWiki( $text, &$state )
 308         {
 309                 # Must expand in reverse order, otherwise nested tags will be corrupted
 310                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 311                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 312                 }
 313
 314                 return $text;
 315         }
 316
 317         # Add an item to the strip state
 318         # Returns the unique tag which must be inserted into the stripped text
 319         # The tag will be replaced with the original text in unstrip()
 320
 321         function insertStripItem( $text, &$state )
 322         {
 323                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 324                 if ( !$state ) {
 325                         $state = array(
 326                           'nowiki' => array(),
 327                           'hiero' => array(),
 328                           'math' => array(),
 329                           'pre' => array()
 330                         );
 331                 }
 332                 $state['item'][$rnd] = $text;
 333                 return $rnd;
 334         }
 335
 336         # This method generates the list of subcategories and pages for a category
 337         function categoryMagic ()
 338         {
 339                 global $wgLang , $wgUser ;
 340                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 341
 342                 $cns = Namespace::getCategory() ;
 343                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 344
 345                 $r = "<br style=\"clear:both;\"/>\n";
 346
 347
 348                 $sk =& $wgUser->getSkin() ;
 349
 350                 $articles = array() ;
 351                 $children = array() ;
 352                 $data = array () ;
 353                 $id = $this->mTitle->getArticleID() ;
 354
 355                 # FIXME: add limits
 356                 $t = wfStrencode( $this->mTitle->getDBKey() );
 357                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 358                 $res = wfQuery ( $sql, DB_READ ) ;
 359                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 360
 361                 # For all pages that link to this category
 362                 foreach ( $data AS $x )
 363                 {
 364                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 365                         if ( $t != "" ) $t .= ":" ;
 366                         $t .= $x->cur_title ;
 367
 368                         if ( $x->cur_namespace == $cns ) {
 369                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 370                         } else {
 371                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 372                         }
 373                 }
 374                 wfFreeResult ( $res ) ;
 375
 376                 # Showing subcategories
 377                 if ( count ( $children ) > 0 ) {
 378                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 379                         $r .= implode ( ", " , $children ) ;
 380                 }
 381
 382                 # Showing pages in this category
 383                 if ( count ( $articles ) > 0 ) {
 384                         $ti = $this->mTitle->getText() ;
 385                         $h =  wfMsg( "category_header", $ti );
 386                         $r .= "<h2>{$h}</h2>\n" ;
 387                         $r .= implode ( ", " , $articles ) ;
 388                 }
 389
 390
 391                 return $r ;
 392         }
 393
 394         function getHTMLattrs ()
 395         {
 396                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 397                                 "title", "align", "lang", "dir", "width", "height",
 398                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 399                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 400                                 /* FONT */ "type", "start", "value", "compact",
 401                                 /* For various lists, mostly deprecated but safe */
 402                                 "summary", "width", "border", "frame", "rules",
 403                                 "cellspacing", "cellpadding", "valign", "char",
 404                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 405                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 406                                 "id", "class", "name", "style" /* For CSS */
 407                                 );
 408                 return $htmlattrs ;
 409         }
 410
 411         function fixTagAttributes ( $t )
 412         {
 413                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 414                 $htmlattrs = $this->getHTMLattrs() ;
 415
 416                 # Strip non-approved attributes from the tag
 417                 $t = preg_replace(
 418                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 419                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 420                         $t);
 421                 # Strip javascript "expression" from stylesheets. Brute force approach:
 422                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 423
 424                 if( preg_match(
 425                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 426                         wfMungeToUtf8( $t ) ) )
 427                 {
 428                         $t="";
 429                 }
 430
 431                 return trim ( $t ) ;
 432         }
 433
 434         /* interface with html tidy, used if $wgUseTidy = true */
 435         function tidy ( $text ) {
 436                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 437                 global $wgInputEncoding, $wgOutputEncoding;
 438                 $fname = "Parser::tidy";
 439                 wfProfileIn( $fname );
 440
 441                 $cleansource = '';
 442                 switch(strtoupper($wgOutputEncoding)) {
 443                         case 'ISO-8859-1':
 444                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 445                                 break;
 446                         case 'UTF-8':
 447                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 448                                 break;
 449                         default:
 450                                 $wgTidyOpts .= ' -raw';
 451                         }
 452
 453                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 455 '<head><title>test</title></head><body>'.$text.'</body></html>';
 456                 $descriptorspec = array(
 457                         0 => array("pipe", "r"),
 458                         1 => array("pipe", "w"),
 459                         2 => array("file", "/dev/null", "a")
 460                 );
 461                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 462                 if (is_resource($process)) {
 463                         fwrite($pipes[0], $wrappedtext);
 464                         fclose($pipes[0]);
 465                         while (!feof($pipes[1])) {
 466                                 $cleansource .= fgets($pipes[1], 1024);
 467                         }
 468                         fclose($pipes[1]);
 469                         $return_value = proc_close($process);
 470                 }
 471
 472                 wfProfileOut( $fname );
 473
 474                 if( $cleansource == '' && $text != '') {
 475                         wfDebug( "Tidy error detected!\n" );
 476                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 477                 } else {
 478                         return $cleansource;
 479                 }
 480         }
 481
 482         function doTableStuff ( $t )
 483         {
 484                 $t = explode ( "\n" , $t ) ;
 485                 $td = array () ; # Is currently a td tag open?
 486                         $ltd = array () ; # Was it TD or TH?
 487                         $tr = array () ; # Is currently a tr tag open?
 488                         $ltr = array () ; # tr attributes
 489                         foreach ( $t AS $k => $x )
 490                         {
 491                                 $x = trim ( $x ) ;
 492                                 $fc = substr ( $x , 0 , 1 ) ;
 493                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 494                                 {
 495                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 496                                         array_push ( $td , false ) ;
 497                                         array_push ( $ltd , "" ) ;
 498                                         array_push ( $tr , false ) ;
 499                                         array_push ( $ltr , "" ) ;
 500                                 }
 501                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 502                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 503                                 {
 504                                         $z = "</table>\n" ;
 505                                         $l = array_pop ( $ltd ) ;
 506                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 507                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 508                                         array_pop ( $ltr ) ;
 509                                         $t[$k] = $z ;
 510                                 }
 511                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 512                                                 {
 513                                                 $z = trim ( substr ( $x , 2 ) ) ;
 514                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 515                                                 }*/
 516                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 517                                 {
 518                                         $x = substr ( $x , 1 ) ;
 519                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 520                                         $z = "" ;
 521                                         $l = array_pop ( $ltd ) ;
 522                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 523                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 524                                         array_pop ( $ltr ) ;
 525                                         $t[$k] = $z ;
 526                                         array_push ( $tr , false ) ;
 527                                         array_push ( $td , false ) ;
 528                                         array_push ( $ltd , "" ) ;
 529                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 530                                 }
 531                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 532                                 {
 533                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 534                                         {
 535                                                 $fc = "+" ;
 536                                                 $x = substr ( $x , 1 ) ;
 537                                         }
 538                                         $after = substr ( $x , 1 ) ;
 539                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 540                                         $after = explode ( "||" , $after ) ;
 541                                         $t[$k] = "" ;
 542                                         foreach ( $after AS $theline )
 543                                         {
 544                                                 $z = "" ;
 545                                                 if ( $fc != "+" )
 546                                                 {
 547                                                         $tra = array_pop ( $ltr ) ;
 548                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 549                                                         array_push ( $tr , true ) ;
 550                                                         array_push ( $ltr , "" ) ;
 551                                                 }
 552
 553                                                 $l = array_pop ( $ltd ) ;
 554                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 555                                                 if ( $fc == "|" ) $l = "td" ;
 556                                                 else if ( $fc == "!" ) $l = "th" ;
 557                                                 else if ( $fc == "+" ) $l = "caption" ;
 558                                                 else $l = "" ;
 559                                                 array_push ( $ltd , $l ) ;
 560                                                 $y = explode ( "|" , $theline , 2 ) ;
 561                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 562                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 563                                                 $t[$k] .= $y ;
 564                                                 array_push ( $td , true ) ;
 565                                         }
 566                                 }
 567                         }
 568
 569                 # Closing open td, tr && table
 570                 while ( count ( $td ) > 0 )
 571                 {
 572                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 573                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 574                         $t[] = "</table>" ;
 575                 }
 576
 577                 $t = implode ( "\n" , $t ) ;
 578                 #               $t = $this->removeHTMLtags( $t );
 579                 return $t ;
 580         }
 581
 582         # Parses the text and adds the result to the strip state
 583         # Returns the strip tag
 584         function stripParse( $text, $newline, $args )
 585         {
 586                 $text = $this->strip( $text, $this->mStripState );
 587                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 588                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 589         }
 590
 591         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 592         {
 593                 $fname = "Parser::internalParse";
 594                 wfProfileIn( $fname );
 595
 596                 $text = $this->removeHTMLtags( $text );
 597                 $text = $this->replaceVariables( $text, $args );
 598
 599                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 600
 601                 $text = $this->doHeadings( $text );
 602                 if($this->mOptions->getUseDynamicDates()) {
 603                         global $wgDateFormatter;
 604                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 605                 }
 606                 $text = $this->doAllQuotes( $text );
 607                 $text = $this->replaceExternalLinks( $text );
 608                 $text = $this->replaceInternalLinks ( $text );
 609                 $text = $this->replaceInternalLinks ( $text );
 610                 //$text = $this->doTokenizedParser ( $text );
 611                 $text = $this->doTableStuff ( $text ) ;
 612                 $text = $this->magicISBN( $text );
 613                 $text = $this->magicRFC( $text );
 614                 $text = $this->formatHeadings( $text, $isMain );
 615                 $sk =& $this->mOptions->getSkin();
 616                 $text = $sk->transformContent( $text );
 617
 618                 if ( !isset ( $this->categoryMagicDone ) ) {
 619                         $text .= $this->categoryMagic () ;
 620                         $this->categoryMagicDone = true ;
 621                 }
 622
 623                 wfProfileOut( $fname );
 624                 return $text;
 625         }
 626
 627
 628         /* private */ function doHeadings( $text )
 629         {
 630                 for ( $i = 6; $i >= 1; --$i ) {
 631                         $h = substr( "======", 0, $i );
 632                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 633                           "<h{$i}>\\1</h{$i}>\\2", $text );
 634                 }
 635                 return $text;
 636         }
 637
 638         /* private */ function doAllQuotes( $text )
 639         {
 640                 $outtext = "";
 641                 $lines = explode( "\n", $text );
 642                 foreach ( $lines as $line ) {
 643                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 644                 }
 645                 return substr($outtext, 0,-1);
 646         }
 647
 648         /* private */ function doQuotes( $pre, $text, $mode )
 649         {
 650                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 651                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 652                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 653                         if ( substr ($m[2], 0, 1) == "'" ) {
 654                                 $m[2] = substr ($m[2], 1);
 655                                 if ($mode == "em") {
 656                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 657                                 } else if ($mode == "strong") {
 658                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 659                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 660                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 661                                 } else if ($mode == "strongem") {
 662                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 663                                 } else {
 664                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 665                                 }
 666                         } else {
 667                                 if ($mode == "strong") {
 668                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 669                                 } else if ($mode == "em") {
 670                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 671                                 } else if ($mode == "emstrong") {
 672                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 673                                 } else if (($mode == "strongem") || ($mode == "both")) {
 674                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 675                                 } else {
 676                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 677                                 }
 678                         }
 679                 } else {
 680                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 681                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 682                         if ($mode == "") {
 683                                 return $pre . $text;
 684                         } else if ($mode == "em") {
 685                                 return $pre . $text_em;
 686                         } else if ($mode == "strong") {
 687                                 return $pre . $text_strong;
 688                         } else if ($mode == "strongem") {
 689                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 690                         } else {
 691                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 692                         }
 693                 }
 694         }
 695
 696         # Note: we have to do external links before the internal ones,
 697         # and otherwise take great care in the order of things here, so
 698         # that we don't end up interpreting some URLs twice.
 699
 700         /* private */ function replaceExternalLinks( $text )
 701         {
 702                 $fname = "Parser::replaceExternalLinks";
 703                 wfProfileIn( $fname );
 704                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 705                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 706                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 707                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 708                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 709                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 710                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 711                 wfProfileOut( $fname );
 712                 return $text;
 713         }
 714
 715         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 716         {
 717                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 718                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 719
 720                 # this is  the list of separators that should be ignored if they
 721                 # are the last character of an URL but that should be included
 722                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 723                 # in this case, the last comma should not become part of the URL,
 724                 # but in "www.foo.com/123,2342,32.htm" it should.
 725                 $sep = ",;\.:";
 726                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 727                 $images = "gif|png|jpg|jpeg";
 728
 729                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 730                 # they are interpreted as part of the string (used to tell PHP
 731                 # that the content of the string should be inserted there).
 732                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 733                   "((?i){$images})([^{$uc}]|$)/";
 734
 735                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 736                 $sk =& $this->mOptions->getSkin();
 737
 738                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 739                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 740                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 741                 }
 742                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 743                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 744                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 745                   "</a>\\5", $s );
 746                 $s = str_replace( $unique, $protocol, $s );
 747
 748                 $a = explode( "[{$protocol}:", " " . $s );
 749                 $s = array_shift( $a );
 750                 $s = substr( $s, 1 );
 751
 752                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 753                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 754
 755                 foreach ( $a as $line ) {
 756                         if ( preg_match( $e1, $line, $m ) ) {
 757                                 $link = "{$protocol}:{$m[1]}";
 758                                 $trail = $m[2];
 759                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 760                                 else { $text = wfEscapeHTML( $link ); }
 761                         } else if ( preg_match( $e2, $line, $m ) ) {
 762                                 $link = "{$protocol}:{$m[1]}";
 763                                 $text = $m[2];
 764                                 $trail = $m[3];
 765                         } else {
 766                                 $s .= "[{$protocol}:" . $line;
 767                                 continue;
 768                         }
 769                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 770                                 $paren = "";
 771                         } else {
 772                                 # Expand the URL for printable version
 773                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 774                         }
 775                         $la = $sk->getExternalLinkAttributes( $link, $text );
 776                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 777
 778                 }
 779                 return $s;
 780         }
 781
 782
 783         /* private */ function replaceInternalLinks( $s )
 784         {
 785                 global $wgLang, $wgLinkCache;
 786                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 787                 static $fname = "Parser::replaceInternalLinks" ;
 788                 wfProfileIn( $fname );
 789
 790                 wfProfileIn( "$fname-setup" );
 791                 static $tc = FALSE;
 792                 # the % is needed to support urlencoded titles as well
 793                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 794                 $sk =& $this->mOptions->getSkin();
 795
 796                 $a = explode( "[[", " " . $s );
 797                 $s = array_shift( $a );
 798                 $s = substr( $s, 1 );
 799
 800                 # Match a link having the form [[namespace:link|alternate]]trail
 801                 static $e1 = FALSE;
 802                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 803                 # Match the end of a line for a word that's not followed by whitespace,
 804                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 805                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
 806
 807                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
 808                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 809                 static $image = FALSE;
 810                 static $special = FALSE;
 811                 static $media = FALSE;
 812                 static $category = FALSE;
 813                 if ( !$image ) { $image = Namespace::getImage(); }
 814                 if ( !$special ) { $special = Namespace::getSpecial(); }
 815                 if ( !$media ) { $media = Namespace::getMedia(); }
 816                 if ( !$category ) { $category = Namespace::getCategory(); }
 817
 818                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 819
 820                 if ( $useLinkPrefixExtension ) {
 821                         if ( preg_match( $e2, $s, $m ) ) {
 822                                 $first_prefix = $m[2];
 823                                 $s = $m[1];
 824                         } else {
 825                                 $first_prefix = false;
 826                         }
 827                 } else {
 828                         $prefix = '';
 829                 }
 830
 831                 wfProfileOut( "$fname-setup" );
 832
 833                 foreach ( $a as $line ) {
 834                         wfProfileIn( "$fname-prefixhandling" );
 835                         if ( $useLinkPrefixExtension ) {
 836                                 if ( preg_match( $e2, $s, $m ) ) {
 837                                         $prefix = $m[2];
 838                                         $s = $m[1];
 839                                 } else {
 840                                         $prefix='';
 841                                 }
 842                                 # first link
 843                                 if($first_prefix) {
 844                                         $prefix = $first_prefix;
 845                                         $first_prefix = false;
 846                                 }
 847                         }
 848                         wfProfileOut( "$fname-prefixhandling" );
 849
 850                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 851                                 $text = $m[2];
 852                                 # fix up urlencoded title texts
 853                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 854                                 $trail = $m[3];
 855                         } else { # Invalid form; output directly
 856                                 $s .= $prefix . "[[" . $line ;
 857                                 wfProfileOut( $fname );
 858                                 continue;
 859                         }
 860
 861                         /* Valid link forms:
 862                         Foobar -- normal
 863                         :Foobar -- override special treatment of prefix (images, language links)
 864                         /Foobar -- convert to CurrentPage/Foobar
 865                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 866                         */
 867                         $c = substr($m[1],0,1);
 868                         $noforce = ($c != ":");
 869                         if( $c == "/" ) { # subpage
 870                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 871                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 872                                         $noslash=$m[1];
 873                                 } else {
 874                                         $noslash=substr($m[1],1);
 875                                 }
 876                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 877                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 878                                         if( "" == $text ) {
 879                                                 $text= $m[1];
 880                                         } # this might be changed for ugliness reasons
 881                                 } else {
 882                                         $link = $noslash; # no subpage allowed, use standard link
 883                                 }
 884                         } elseif( $noforce ) { # no subpage
 885                                 $link = $m[1];
 886                         } else {
 887                                 $link = substr( $m[1], 1 );
 888                         }
 889                         $wasblank = ( "" == $text );
 890                         if( $wasblank )
 891                         $text = $link;
 892
 893                         $nt = Title::newFromText( $link );
 894                         if( !$nt ) {
 895                                 $s .= $prefix . "[[" . $line;
 896                                 wfProfileOut( $fname );
 897                                 continue;
 898                         }
 899                         $ns = $nt->getNamespace();
 900                         $iw = $nt->getInterWiki();
 901                         if( $noforce ) {
 902                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 903                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 904                                         $tmp = $prefix . $trail ;
 905                                         wfProfileOut( $fname );
 906                                         $s .= (trim($tmp) == '')? '': $tmp;
 907                                         continue;
 908                                 }
 909                                 if ( $ns == $image ) {
 910                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 911                                         $wgLinkCache->addImageLinkObj( $nt );
 912                                         wfProfileOut( $fname );
 913                                         continue;
 914                                 }
 915                                 if ( $ns == $category ) {
 916                                         $t = $nt->getText() ;
 917                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 918
 919                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 920                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 921                                         $wgLinkCache->resume();
 922
 923                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 924                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 925                                         $this->mOutput->mCategoryLinks[] = $t ;
 926                                         $s .= $prefix . $trail ;
 927                                         wfProfileOut( $fname );
 928                                         continue;
 929                                 }
 930                         }
 931                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 932                         ( strpos( $link, "#" ) == FALSE ) ) {
 933                                 # Self-links are handled specially; generally de-link and change to bold.
 934                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 935                                 wfProfileOut( $fname );
 936                                 continue;
 937                         }
 938
 939                         if( $ns == $media ) {
 940                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 941                                 $wgLinkCache->addImageLinkObj( $nt );
 942                                 wfProfileOut( $fname );
 943                                 continue;
 944                         } elseif( $ns == $special ) {
 945                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 946                                 wfProfileOut( $fname );
 947                                 continue;
 948                         }
 949                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail, $prefix );
 950                 }
 951                 wfProfileOut( $fname );
 952                 return $s;
 953         }
 954
 955         # Some functions here used by doBlockLevels()
 956         #
 957         /* private */ function closeParagraph()
 958         {
 959                 $result = "";
 960                 if ( '' != $this->mLastSection ) {
 961                         $result = "</" . $this->mLastSection  . ">\n";
 962                 }
 963                 $this->mInPre = false;
 964                 $this->mLastSection = "";
 965                 return $result;
 966         }
 967         # getCommon() returns the length of the longest common substring
 968         # of both arguments, starting at the beginning of both.
 969         #
 970         /* private */ function getCommon( $st1, $st2 )
 971         {
 972                 $fl = strlen( $st1 );
 973                 $shorter = strlen( $st2 );
 974                 if ( $fl < $shorter ) { $shorter = $fl; }
 975
 976                 for ( $i = 0; $i < $shorter; ++$i ) {
 977                         if ( $st1{$i} != $st2{$i} ) { break; }
 978                 }
 979                 return $i;
 980         }
 981         # These next three functions open, continue, and close the list
 982         # element appropriate to the prefix character passed into them.
 983         #
 984         /* private */ function openList( $char )
 985     {
 986                 $result = $this->closeParagraph();
 987
 988                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 989                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 990                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 991                 else if ( ";" == $char ) {
 992                         $result .= "<dl><dt>";
 993                         $this->mDTopen = true;
 994                 }
 995                 else { $result = "<!-- ERR 1 -->"; }
 996
 997                 return $result;
 998         }
 999
1000         /* private */ function nextItem( $char )
1001         {
1002                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1003                 else if ( ":" == $char || ";" == $char ) {
1004                         $close = "</dd>";
1005                         if ( $this->mDTopen ) { $close = "</dt>"; }
1006                         if ( ";" == $char ) {
1007                                 $this->mDTopen = true;
1008                                 return $close . "<dt>";
1009                         } else {
1010                                 $this->mDTopen = false;
1011                                 return $close . "<dd>";
1012                         }
1013                 }
1014                 return "<!-- ERR 2 -->";
1015         }
1016
1017         /* private */function closeList( $char )
1018         {
1019                 if ( "*" == $char ) { $text = "</li></ul>"; }
1020                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1021                 else if ( ":" == $char ) {
1022                         if ( $this->mDTopen ) {
1023                                 $this->mDTopen = false;
1024                                 $text = "</dt></dl>";
1025                         } else {
1026                                 $text = "</dd></dl>";
1027                         }
1028                 }
1029                 else {  return "<!-- ERR 3 -->"; }
1030                 return $text."\n";
1031         }
1032
1033         /* private */ function doBlockLevels( $text, $linestart ) {
1034                 $fname = "Parser::doBlockLevels";
1035                 wfProfileIn( $fname );
1036
1037                 # Parsing through the text line by line.  The main thing
1038                 # happening here is handling of block-level elements p, pre,
1039                 # and making lists from lines starting with * # : etc.
1040                 #
1041                 $textLines = explode( "\n", $text );
1042
1043                 $lastPrefix = $output = $lastLine = '';
1044                 $this->mDTopen = $inBlockElem = false;
1045                 $prefixLength = 0;
1046                 $paragraphStack = false;
1047
1048                 if ( !$linestart ) {
1049                         $output .= array_shift( $textLines );
1050                 }
1051                 foreach ( $textLines as $oLine ) {
1052                         $lastPrefixLength = strlen( $lastPrefix );
1053                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1054                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1055                         if (!$this->mInPre) {
1056                                 $this->mInPre = !empty($preOpenMatch);
1057                         }
1058                         if ( !$this->mInPre ) {
1059                                 # Multiple prefixes may abut each other for nested lists.
1060                                 $prefixLength = strspn( $oLine, "*#:;" );
1061                                 $pref = substr( $oLine, 0, $prefixLength );
1062
1063                                 # eh?
1064                                 $pref2 = str_replace( ";", ":", $pref );
1065                                 $t = substr( $oLine, $prefixLength );
1066                         } else {
1067                                 # Don't interpret any other prefixes in preformatted text
1068                                 $prefixLength = 0;
1069                                 $pref = $pref2 = '';
1070                                 $t = $oLine;
1071                         }
1072
1073                         # List generation
1074                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1075                                 # Same as the last item, so no need to deal with nesting or opening stuff
1076                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1077                                 $paragraphStack = false;
1078
1079                                 if ( ";" == substr( $pref, -1 ) ) {
1080                                         # The one nasty exception: definition lists work like this:
1081                                         # ; title : definition text
1082                                         # So we check for : in the remainder text to split up the
1083                                         # title and definition, without b0rking links.
1084                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1085                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1086                                                 $term = $match[1];
1087                                                 $output .= $term . $this->nextItem( ":" );
1088                                                 $t = $match[2];
1089                                         }
1090                                 }
1091                         } elseif( $prefixLength || $lastPrefixLength ) {
1092                                 # Either open or close a level...
1093                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1094                                 $paragraphStack = false;
1095
1096                                 while( $commonPrefixLength < $lastPrefixLength ) {
1097                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1098                                         --$lastPrefixLength;
1099                                 }
1100                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1101                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1102                                 }
1103                                 while ( $prefixLength > $commonPrefixLength ) {
1104                                         $char = substr( $pref, $commonPrefixLength, 1 );
1105                                         $output .= $this->openList( $char );
1106
1107                                         if ( ";" == $char ) {
1108                                                 # FIXME: This is dupe of code above
1109                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1110                                                         $term = $match[1];
1111                                                         $output .= $term . $this->nextItem( ":" );
1112                                                         $t = $match[2];
1113                                                 }
1114                                         }
1115                                         ++$commonPrefixLength;
1116                                 }
1117                                 $lastPrefix = $pref2;
1118                         }
1119                         if( 0 == $prefixLength ) {
1120                                 # No prefix (not in list)--go to paragraph mode
1121                                 $uniq_prefix = UNIQ_PREFIX;
1122                                 // XXX: use a stack for nestable elements like span, table and div
1123                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i", $t );
1124                                 $closematch = preg_match(
1125                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1126                                         "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1127                                 if ( $openmatch or $closematch ) {
1128                                         $paragraphStack = false;
1129                                         $output .= $this->closeParagraph();
1130                                         if($preOpenMatch and !$preCloseMatch) {
1131                                                 $this->mInPre = true;
1132                                         }
1133                                         if ( $closematch  ) {
1134                                                 $inBlockElem = false;
1135                                         } else {
1136                                                 $inBlockElem = true;
1137                                         }
1138                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1139                                         if ( " " == $t{0} and trim($t) != '' ) {
1140                                                 // pre
1141                                                 if ($this->mLastSection != 'pre') {
1142                                                         $paragraphStack = false;
1143                                                         $output .= $this->closeParagraph().'<pre>';
1144                                                         $this->mLastSection = 'pre';
1145                                                 }
1146                                         } else {
1147                                                 // paragraph
1148                                                 if ( '' == trim($t) ) {
1149                                                         if ( $paragraphStack ) {
1150                                                                 $output .= $paragraphStack.'<br />';
1151                                                                 $paragraphStack = false;
1152                                                                 $this->mLastSection = 'p';
1153                                                         } else {
1154                                                                 if ($this->mLastSection != 'p' ) {
1155                                                                         $output .= $this->closeParagraph();
1156                                                                         $this->mLastSection = '';
1157                                                                         $paragraphStack = "<p>";
1158                                                                 } else {
1159                                                                         $paragraphStack = '</p><p>';
1160                                                                 }
1161                                                         }
1162                                                 } else {
1163                                                         if ( $paragraphStack ) {
1164                                                                 $output .= $paragraphStack;
1165                                                                 $paragraphStack = false;
1166                                                                 $this->mLastSection = 'p';
1167                                                         } else if ($this->mLastSection != 'p') {
1168                                                                 $output .= $this->closeParagraph().'<p>';
1169                                                                 $this->mLastSection = 'p';
1170                                                         }
1171                                                 }
1172                                         }
1173                                 }
1174                         }
1175                         if ($paragraphStack === false) {
1176                                 $output .= $t."\n";
1177                         }
1178                 }
1179                 while ( $prefixLength ) {
1180                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1181                         --$prefixLength;
1182                 }
1183                 if ( "" != $this->mLastSection ) {
1184                         $output .= "</" . $this->mLastSection . ">";
1185                         $this->mLastSection = "";
1186                 }
1187
1188                 wfProfileOut( $fname );
1189                 return $output;
1190         }
1191
1192         function getVariableValue( $index ) {
1193                 global $wgLang, $wgSitename, $wgServer;
1194
1195                 switch ( $index ) {
1196                         case MAG_CURRENTMONTH:
1197                                 return date( "m" );
1198                         case MAG_CURRENTMONTHNAME:
1199                                 return $wgLang->getMonthName( date("n") );
1200                         case MAG_CURRENTMONTHNAMEGEN:
1201                                 return $wgLang->getMonthNameGen( date("n") );
1202                         case MAG_CURRENTDAY:
1203                                 return date("j");
1204                         case MAG_PAGENAME:
1205                                 return $this->mTitle->getText();
1206                         case MAG_NAMESPACE:
1207                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1208                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1209                         case MAG_CURRENTDAYNAME:
1210                                 return $wgLang->getWeekdayName( date("w")+1 );
1211                         case MAG_CURRENTYEAR:
1212                                 return date( "Y" );
1213                         case MAG_CURRENTTIME:
1214                                 return $wgLang->time( wfTimestampNow(), false );
1215                         case MAG_NUMBEROFARTICLES:
1216                                 return wfNumberOfArticles();
1217                         case MAG_SITENAME:
1218                                 return $wgSitename;
1219                         case MAG_SERVER:
1220                                 return $wgServer;
1221                         default:
1222                                 return NULL;
1223                 }
1224         }
1225
1226         function initialiseVariables()
1227         {
1228                 global $wgVariableIDs;
1229                 $this->mVariables = array();
1230                 foreach ( $wgVariableIDs as $id ) {
1231                         $mw =& MagicWord::get( $id );
1232                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1233                 }
1234         }
1235
1236         /* private */ function replaceVariables( $text, $args = array() )
1237         {
1238                 global $wgLang, $wgScript, $wgArticlePath;
1239
1240                 $fname = "Parser::replaceVariables";
1241                 wfProfileIn( $fname );
1242
1243                 $bail = false;
1244                 if ( !$this->mVariables ) {
1245                         $this->initialiseVariables();
1246                 }
1247                 $titleChars = Title::legalChars();
1248                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1249
1250                 # This function is called recursively. To keep track of arguments we need a stack:
1251                 array_push( $this->mArgStack, $args );
1252
1253                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1254                 $GLOBALS['wgCurParser'] =& $this;
1255
1256
1257                 if ( $this->mOutputType == OT_HTML ) {
1258                         # Variable substitution
1259                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1260
1261                         # Argument substitution
1262                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1263                 }
1264                 # Template substitution
1265                 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1266                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1267
1268                 array_pop( $this->mArgStack );
1269
1270                 wfProfileOut( $fname );
1271                 return $text;
1272         }
1273
1274         function variableSubstitution( $matches )
1275         {
1276                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1277                         $text = $this->mVariables[$matches[1]];
1278                         $this->mOutput->mContainsOldMagic = true;
1279                 } else {
1280                         $text = $matches[0];
1281                 }
1282                 return $text;
1283         }
1284
1285         function braceSubstitution( $matches )
1286         {
1287                 global $wgLinkCache, $wgLang;
1288                 $fname = "Parser::braceSubstitution";
1289                 $found = false;
1290                 $nowiki = false;
1291                 $noparse = false;
1292
1293                 $title = NULL;
1294
1295                 # $newline is an optional newline character before the braces
1296                 # $part1 is the bit before the first |, and must contain only title characters
1297                 # $args is a list of arguments, starting from index 0, not including $part1
1298
1299                 $newline = $matches[1];
1300                 $part1 = $matches[2];
1301                 # If the third subpattern matched anything, it will start with |
1302                 if ( $matches[3] !== "" ) {
1303                         $args = explode( "|", substr( $matches[3], 1 ) );
1304                 } else {
1305                         $args = array();
1306                 }
1307                 $argc = count( $args );
1308
1309                 # {{{}}}
1310                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1311                         $text = $matches[0];
1312                         $found = true;
1313                         $noparse = true;
1314                 }
1315
1316                 # SUBST
1317                 if ( !$found ) {
1318                         $mwSubst =& MagicWord::get( MAG_SUBST );
1319                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1320                                 if ( $this->mOutputType != OT_WIKI ) {
1321                                         # Invalid SUBST not replaced at PST time
1322                                         # Return without further processing
1323                                         $text = $matches[0];
1324                                         $found = true;
1325                                         $noparse= true;
1326                                 }
1327                         } elseif ( $this->mOutputType == OT_WIKI ) {
1328                                 # SUBST not found in PST pass, do nothing
1329                                 $text = $matches[0];
1330                                 $found = true;
1331                         }
1332                 }
1333
1334                 # MSG, MSGNW and INT
1335                 if ( !$found ) {
1336                         # Check for MSGNW:
1337                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1338                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1339                                 $nowiki = true;
1340                         } else {
1341                                 # Remove obsolete MSG:
1342                                 $mwMsg =& MagicWord::get( MAG_MSG );
1343                                 $mwMsg->matchStartAndRemove( $part1 );
1344                         }
1345
1346                         # Check if it is an internal message
1347                         $mwInt =& MagicWord::get( MAG_INT );
1348                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1349                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1350                                         $text = wfMsgReal( $part1, $args, true );
1351                                         $found = true;
1352                                 }
1353                         }
1354                 }
1355
1356                 # NS
1357                 if ( !$found ) {
1358                         # Check for NS: (namespace expansion)
1359                         $mwNs = MagicWord::get( MAG_NS );
1360                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1361                                 if ( intval( $part1 ) ) {
1362                                         $text = $wgLang->getNsText( intval( $part1 ) );
1363                                         $found = true;
1364                                 } else {
1365                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1366                                         if ( !is_null( $index ) ) {
1367                                                 $text = $wgLang->getNsText( $index );
1368                                                 $found = true;
1369                                         }
1370                                 }
1371                         }
1372                 }
1373
1374                 # LOCALURL and LOCALURLE
1375                 if ( !$found ) {
1376                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1377                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1378
1379                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1380                                 $func = 'getLocalURL';
1381                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1382                                 $func = 'escapeLocalURL';
1383                         } else {
1384                                 $func = '';
1385                         }
1386
1387                         if ( $func !== '' ) {
1388                                 $title = Title::newFromText( $part1 );
1389                                 if ( !is_null( $title ) ) {
1390                                         if ( $argc > 0 ) {
1391                                                 $text = $title->$func( $args[0] );
1392                                         } else {
1393                                                 $text = $title->$func();
1394                                         }
1395                                         $found = true;
1396                                 }
1397                         }
1398                 }
1399
1400                 # Internal variables
1401                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1402                         $text = $this->mVariables[$part1];
1403                         $found = true;
1404                         $this->mOutput->mContainsOldMagic = true;
1405                 }
1406 /*
1407                 # Arguments input from the caller
1408                 $inputArgs = end( $this->mArgStack );
1409                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1410                         $text = $inputArgs[$part1];
1411                         $found = true;
1412                 }
1413 */
1414                 # Load from database
1415                 if ( !$found ) {
1416                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1417                         if ( !is_null( $title ) && !$title->isExternal() ) {
1418                                 # Check for excessive inclusion
1419                                 $dbk = $title->getPrefixedDBkey();
1420                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1421                                         $article = new Article( $title );
1422                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1423                                         if ( $articleContent !== false ) {
1424                                                 $found = true;
1425                                                 $text = $articleContent;
1426
1427                                         }
1428                                 }
1429
1430                                 # If the title is valid but undisplayable, make a link to it
1431                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1432                                         $text = "[[" . $title->getPrefixedText() . "]]";
1433                                         $found = true;
1434                                 }
1435                         }
1436                 }
1437
1438                 # Recursive parsing, escaping and link table handling
1439                 # Only for HTML output
1440                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1441                         $text = wfEscapeWikiText( $text );
1442                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1443                         # Clean up argument array
1444                         $assocArgs = array();
1445                         $index = 1;
1446                         foreach( $args as $arg ) {
1447                                 $eqpos = strpos( $arg, "=" );
1448                                 if ( $eqpos === false ) {
1449                                         $assocArgs[$index++] = $arg;
1450                                 } else {
1451                                         $name = trim( substr( $arg, 0, $eqpos ) );
1452                                         $value = trim( substr( $arg, $eqpos+1 ) );
1453                                         if ( $value === false ) {
1454                                                 $value = "";
1455                                         }
1456                                         if ( $name !== false ) {
1457                                                 $assocArgs[$name] = $value;
1458                                         }
1459                                 }
1460                         }
1461
1462                         # Do not enter included links in link table
1463                         if ( !is_null( $title ) ) {
1464                                 $wgLinkCache->suspend();
1465                         }
1466
1467                         # Run full parser on the included text
1468                         $text = $this->stripParse( $text, $newline, $assocArgs );
1469
1470                         # Resume the link cache and register the inclusion as a link
1471                         if ( !is_null( $title ) ) {
1472                                 $wgLinkCache->resume();
1473                                 $wgLinkCache->addLinkObj( $title );
1474                         }
1475                 }
1476
1477                 if ( !$found ) {
1478                         return $matches[0];
1479                 } else {
1480                         return $text;
1481                 }
1482         }
1483
1484         # Triple brace replacement -- used for template arguments
1485         function argSubstitution( $matches )
1486         {
1487                 $newline = $matches[1];
1488                 $arg = trim( $matches[2] );
1489                 $text = $matches[0];
1490                 $inputArgs = end( $this->mArgStack );
1491
1492                 if ( array_key_exists( $arg, $inputArgs ) ) {
1493                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1494                 }
1495
1496                 return $text;
1497         }
1498
1499         # Returns true if the function is allowed to include this entity
1500         function incrementIncludeCount( $dbk )
1501         {
1502                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1503                         $this->mIncludeCount[$dbk] = 0;
1504                 }
1505                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1506                         return true;
1507                 } else {
1508                         return false;
1509                 }
1510         }
1511
1512
1513         # Cleans up HTML, removes dangerous tags and attributes
1514         /* private */ function removeHTMLtags( $text )
1515         {
1516                 global $wgUseTidy, $wgUserHtml;
1517                 $fname = "Parser::removeHTMLtags";
1518                 wfProfileIn( $fname );
1519
1520                 if( $wgUserHtml ) {
1521                         $htmlpairs = array( # Tags that must be closed
1522                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1523                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1524                                 "strike", "strong", "tt", "var", "div", "center",
1525                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1526                                 "ruby", "rt" , "rb" , "rp", "p"
1527                         );
1528                         $htmlsingle = array(
1529                                 "br", "hr", "li", "dt", "dd"
1530                         );
1531                         $htmlnest = array( # Tags that can be nested--??
1532                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1533                                 "dl", "font", "big", "small", "sub", "sup"
1534                         );
1535                         $tabletags = array( # Can only appear inside table
1536                                 "td", "th", "tr"
1537                         );
1538                 } else {
1539                         $htmlpairs = array();
1540                         $htmlsingle = array();
1541                         $htmlnest = array();
1542                         $tabletags = array();
1543                 }
1544
1545                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1546                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1547
1548                 $htmlattrs = $this->getHTMLattrs () ;
1549
1550                 # Remove HTML comments
1551                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1552
1553                 $bits = explode( "<", $text );
1554                 $text = array_shift( $bits );
1555                 if(!$wgUseTidy) {
1556                         $tagstack = array(); $tablestack = array();
1557                         foreach ( $bits as $x ) {
1558                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1559                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1560                                 $x, $regs );
1561                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1562                                 error_reporting( $prev );
1563
1564                                 $badtag = 0 ;
1565                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1566                                         # Check our stack
1567                                         if ( $slash ) {
1568                                                 # Closing a tag...
1569                                                 if ( ! in_array( $t, $htmlsingle ) &&
1570                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1571                                                         @array_push( $tagstack, $ot );
1572                                                         $badtag = 1;
1573                                                 } else {
1574                                                         if ( $t == "table" ) {
1575                                                                 $tagstack = array_pop( $tablestack );
1576                                                         }
1577                                                         $newparams = "";
1578                                                 }
1579                                         } else {
1580                                                 # Keep track for later
1581                                                 if ( in_array( $t, $tabletags ) &&
1582                                                 ! in_array( "table", $tagstack ) ) {
1583                                                         $badtag = 1;
1584                                                 } else if ( in_array( $t, $tagstack ) &&
1585                                                 ! in_array ( $t , $htmlnest ) ) {
1586                                                         $badtag = 1 ;
1587                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1588                                                         if ( $t == "table" ) {
1589                                                                 array_push( $tablestack, $tagstack );
1590                                                                 $tagstack = array();
1591                                                         }
1592                                                         array_push( $tagstack, $t );
1593                                                 }
1594                                                 # Strip non-approved attributes from the tag
1595                                                 $newparams = $this->fixTagAttributes($params);
1596
1597                                         }
1598                                         if ( ! $badtag ) {
1599                                                 $rest = str_replace( ">", "&gt;", $rest );
1600                                                 $text .= "<$slash$t $newparams$brace$rest";
1601                                                 continue;
1602                                         }
1603                                 }
1604                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1605                         }
1606                         # Close off any remaining tags
1607                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1608                                 $text .= "</$t>\n";
1609                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1610                         }
1611                 } else {
1612                         # this might be possible using tidy itself
1613                         foreach ( $bits as $x ) {
1614                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1615                                 $x, $regs );
1616                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1617                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1618                                         $newparams = $this->fixTagAttributes($params);
1619                                         $rest = str_replace( ">", "&gt;", $rest );
1620                                         $text .= "<$slash$t $newparams$brace$rest";
1621                                 } else {
1622                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1623                                 }
1624                         }
1625                 }
1626                 wfProfileOut( $fname );
1627                 return $text;
1628         }
1629
1630
1631 /*
1632  *
1633  * This function accomplishes several tasks:
1634  * 1) Auto-number headings if that option is enabled
1635  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1636  * 3) Add a Table of contents on the top for users who have enabled the option
1637  * 4) Auto-anchor headings
1638  *
1639  * It loops through all headlines, collects the necessary data, then splits up the
1640  * string and re-inserts the newly formatted headlines.
1641  *
1642  */
1643
1644         /* private */ function formatHeadings( $text, $isMain=true )
1645         {
1646                 global $wgInputEncoding;
1647
1648                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1649                 $doShowToc = $this->mOptions->getShowToc();
1650                 if( !$this->mTitle->userCanEdit() ) {
1651                         $showEditLink = 0;
1652                         $rightClickHack = 0;
1653                 } else {
1654                         $showEditLink = $this->mOptions->getEditSection();
1655                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1656                 }
1657
1658                 # Inhibit editsection links if requested in the page
1659                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1660                 if( $esw->matchAndRemove( $text ) ) {
1661                         $showEditLink = 0;
1662                 }
1663                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1664                 # do not add TOC
1665                 $mw =& MagicWord::get( MAG_NOTOC );
1666                 if( $mw->matchAndRemove( $text ) ) {
1667                         $doShowToc = 0;
1668                 }
1669
1670                 # never add the TOC to the Main Page. This is an entry page that should not
1671                 # be more than 1-2 screens large anyway
1672                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1673                         $doShowToc = 0;
1674                 }
1675
1676                 # Get all headlines for numbering them and adding funky stuff like [edit]
1677                 # links - this is for later, but we need the number of headlines right now
1678                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1679
1680                 # if there are fewer than 4 headlines in the article, do not show TOC
1681                 if( $numMatches < 4 ) {
1682                         $doShowToc = 0;
1683                 }
1684
1685                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1686                 # override above conditions and always show TOC
1687                 $mw =& MagicWord::get( MAG_FORCETOC );
1688                 if ($mw->matchAndRemove( $text ) ) {
1689                         $doShowToc = 1;
1690                 }
1691
1692
1693                 # We need this to perform operations on the HTML
1694                 $sk =& $this->mOptions->getSkin();
1695
1696                 # headline counter
1697                 $headlineCount = 0;
1698
1699                 # Ugh .. the TOC should have neat indentation levels which can be
1700                 # passed to the skin functions. These are determined here
1701                 $toclevel = 0;
1702                 $toc = "";
1703                 $full = "";
1704                 $head = array();
1705                 $sublevelCount = array();
1706                 $level = 0;
1707                 $prevlevel = 0;
1708                 foreach( $matches[3] as $headline ) {
1709                         $numbering = "";
1710                         if( $level ) {
1711                                 $prevlevel = $level;
1712                         }
1713                         $level = $matches[1][$headlineCount];
1714                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1715                                 # reset when we enter a new level
1716                                 $sublevelCount[$level] = 0;
1717                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1718                                 $toclevel += $level - $prevlevel;
1719                         }
1720                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1721                                 # reset when we step back a level
1722                                 $sublevelCount[$level+1]=0;
1723                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1724                                 $toclevel -= $prevlevel - $level;
1725                         }
1726                         # count number of headlines for each level
1727                         @$sublevelCount[$level]++;
1728                         if( $doNumberHeadings || $doShowToc ) {
1729                                 $dot = 0;
1730                                 for( $i = 1; $i <= $level; $i++ ) {
1731                                         if( !empty( $sublevelCount[$i] ) ) {
1732                                                 if( $dot ) {
1733                                                         $numbering .= ".";
1734                                                 }
1735                                                 $numbering .= $sublevelCount[$i];
1736                                                 $dot = 1;
1737                                         }
1738                                 }
1739                         }
1740
1741                         # The canonized header is a version of the header text safe to use for links
1742                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1743                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1744                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1745
1746                         # strip out HTML
1747                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1748                         $tocline = trim( $canonized_headline );
1749                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1750                         $canonized_headline = str_replace('%','.',$canonized_headline);
1751                         $refer[$headlineCount] = $canonized_headline;
1752
1753                         # count how many in assoc. array so we can track dupes in anchors
1754                         @$refers[$canonized_headline]++;
1755                         $refcount[$headlineCount]=$refers[$canonized_headline];
1756
1757                         # Prepend the number to the heading text
1758
1759                         if( $doNumberHeadings || $doShowToc ) {
1760                                 $tocline = $numbering . " " . $tocline;
1761
1762                                 # Don't number the heading if it is the only one (looks silly)
1763                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1764                                         # the two are different if the line contains a link
1765                                         $headline=$numbering . " " . $headline;
1766                                 }
1767                         }
1768
1769                         # Create the anchor for linking from the TOC to the section
1770                         $anchor = $canonized_headline;
1771                         if($refcount[$headlineCount] > 1 ) {
1772                                 $anchor .= "_" . $refcount[$headlineCount];
1773                         }
1774                         if( $doShowToc ) {
1775                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1776                         }
1777                         if( $showEditLink ) {
1778                                 if ( empty( $head[$headlineCount] ) ) {
1779                                         $head[$headlineCount] = "";
1780                                 }
1781                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1782                         }
1783
1784                         # Add the edit section span
1785                         if( $rightClickHack ) {
1786                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1787                         }
1788
1789                         # give headline the correct <h#> tag
1790                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1791
1792                         $headlineCount++;
1793                 }
1794
1795                 if( $doShowToc ) {
1796                         $toclines = $headlineCount;
1797                         $toc .= $sk->tocUnindent( $toclevel );
1798                         $toc = $sk->tocTable( $toc );
1799                 }
1800
1801                 # split up and insert constructed headlines
1802
1803                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1804                 $i = 0;
1805
1806                 foreach( $blocks as $block ) {
1807                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1808                             # This is the [edit] link that appears for the top block of text when
1809                                 # section editing is enabled
1810
1811                                 # Disabled because it broke block formatting
1812                                 # For example, a bullet point in the top line
1813                                 # $full .= $sk->editSectionLink(0);
1814                         }
1815                         $full .= $block;
1816                         if( $doShowToc && !$i && $isMain) {
1817                         # Top anchor now in skin
1818                                 $full = $full.$toc;
1819                         }
1820
1821                         if( !empty( $head[$i] ) ) {
1822                                 $full .= $head[$i];
1823                         }
1824                         $i++;
1825                 }
1826
1827                 return $full;
1828         }
1829
1830         /* private */ function magicISBN( $text )
1831         {
1832                 global $wgLang;
1833
1834                 $a = split( "ISBN ", " $text" );
1835                 if ( count ( $a ) < 2 ) return $text;
1836                 $text = substr( array_shift( $a ), 1);
1837                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1838
1839                 foreach ( $a as $x ) {
1840                         $isbn = $blank = "" ;
1841                         while ( " " == $x{0} ) {
1842                                 $blank .= " ";
1843                                 $x = substr( $x, 1 );
1844                         }
1845                         while ( strstr( $valid, $x{0} ) != false ) {
1846                                 $isbn .= $x{0};
1847                                 $x = substr( $x, 1 );
1848                         }
1849                         $num = str_replace( "-", "", $isbn );
1850                         $num = str_replace( " ", "", $num );
1851
1852                         if ( "" == $num ) {
1853                                 $text .= "ISBN $blank$x";
1854                         } else {
1855                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1856                                 $text .= "<a href=\"" .
1857                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1858                                         "\" class=\"internal\">ISBN $isbn</a>";
1859                                 $text .= $x;
1860                         }
1861                 }
1862                 return $text;
1863         }
1864         /* private */ function magicRFC( $text )
1865         {
1866                 global $wgLang;
1867
1868                 $a = split( "RFC ", " $text" );
1869                 if ( count ( $a ) < 2 ) return $text;
1870                 $text = substr( array_shift( $a ), 1);
1871                 $valid = "0123456789";
1872
1873                 foreach ( $a as $x ) {
1874                         $rfc = $blank = "" ;
1875                         while ( " " == $x{0} ) {
1876                                 $blank .= " ";
1877                                 $x = substr( $x, 1 );
1878                         }
1879                         while ( strstr( $valid, $x{0} ) != false ) {
1880                                 $rfc .= $x{0};
1881                                 $x = substr( $x, 1 );
1882                         }
1883
1884                         if ( "" == $rfc ) {
1885                                 $text .= "RFC $blank$x";
1886                         } else {
1887                                 $url = wfmsg( "rfcurl" );
1888                                 $url = str_replace( "$1", $rfc, $url);
1889                                 $sk =& $this->mOptions->getSkin();
1890                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1891                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1892                         }
1893                 }
1894                 return $text;
1895         }
1896
1897         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1898         {
1899                 $this->mOptions = $options;
1900                 $this->mTitle =& $title;
1901                 $this->mOutputType = OT_WIKI;
1902
1903                 if ( $clearState ) {
1904                         $this->clearState();
1905                 }
1906
1907                 $stripState = false;
1908                 $pairs = array(
1909                         "\r\n" => "\n",
1910                         );
1911                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1912                 // now with regexes
1913                 /*
1914                 $pairs = array(
1915                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1916                         "/<br *?>/i" => "<br />",
1917                 );
1918                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1919                 */
1920                 $text = $this->strip( $text, $stripState, false );
1921                 $text = $this->pstPass2( $text, $user );
1922                 $text = $this->unstrip( $text, $stripState );
1923                 $text = $this->unstripNoWiki( $text, $stripState );
1924                 return $text;
1925         }
1926
1927         /* private */ function pstPass2( $text, &$user )
1928         {
1929                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1930
1931                 # Variable replacement
1932                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1933                 $text = $this->replaceVariables( $text );
1934
1935                 # Signatures
1936                 #
1937                 $n = $user->getName();
1938                 $k = $user->getOption( "nickname" );
1939                 if ( "" == $k ) { $k = $n; }
1940                 if(isset($wgLocaltimezone)) {
1941                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1942                 }
1943                 /* Note: this is an ugly timezone hack for the European wikis */
1944                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1945                   " (" . date( "T" ) . ")";
1946                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1947
1948                 $text = preg_replace( "/~~~~~/", $d, $text );
1949                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1950                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1951                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1952                   Namespace::getUser() ) . ":$n|$k]]", $text );
1953
1954                 # Context links: [[|name]] and [[name (context)|]]
1955                 #
1956                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1957                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1958                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1959                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1960
1961                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1962                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1963                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1964                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1965                                                                                                                 # [[ns:page (cont)|]]
1966                 $context = "";
1967                 $t = $this->mTitle->getText();
1968                 if ( preg_match( $conpat, $t, $m ) ) {
1969                         $context = $m[2];
1970                 }
1971                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1972                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1973                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1974
1975                 if ( "" == $context ) {
1976                         $text = preg_replace( $p2, "[[\\1]]", $text );
1977                 } else {
1978                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1979                 }
1980
1981                 /*
1982                 $mw =& MagicWord::get( MAG_SUBST );
1983                 $wgCurParser = $this->fork();
1984                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1985                 $this->merge( $wgCurParser );
1986                 */
1987
1988                 # Trim trailing whitespace
1989                 # MAG_END (__END__) tag allows for trailing
1990                 # whitespace to be deliberately included
1991                 $text = rtrim( $text );
1992                 $mw =& MagicWord::get( MAG_END );
1993                 $mw->matchAndRemove( $text );
1994
1995                 return $text;
1996         }
1997
1998         # Set up some variables which are usually set up in parse()
1999         # so that an external function can call some class members with confidence
2000         function startExternalParse( &$title, $options, $outputType, $clearState = true )
2001         {
2002                 $this->mTitle =& $title;
2003                 $this->mOptions = $options;
2004                 $this->mOutputType = $outputType;
2005                 if ( $clearState ) {
2006                         $this->clearState();
2007                 }
2008         }
2009
2010         function transformMsg( $text, $options ) {
2011                 global $wgTitle;
2012                 static $executing = false;
2013
2014                 # Guard against infinite recursion
2015                 if ( $executing ) {
2016                         return $text;
2017                 }
2018                 $executing = true;
2019
2020                 $this->mTitle = $wgTitle;
2021                 $this->mOptions = $options;
2022                 $this->mOutputType = OT_MSG;
2023                 $this->clearState();
2024                 $text = $this->replaceVariables( $text );
2025
2026                 $executing = false;
2027                 return $text;
2028         }
2029 }
2030
2031 class ParserOutput
2032 {
2033         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2034         var $mCacheTime; # Used in ParserCache
2035
2036         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2037                 $containsOldMagic = false )
2038         {
2039                 $this->mText = $text;
2040                 $this->mLanguageLinks = $languageLinks;
2041                 $this->mCategoryLinks = $categoryLinks;
2042                 $this->mContainsOldMagic = $containsOldMagic;
2043                 $this->mCacheTime = "";
2044         }
2045
2046         function getText() { return $this->mText; }
2047         function getLanguageLinks() { return $this->mLanguageLinks; }
2048         function getCategoryLinks() { return $this->mCategoryLinks; }
2049         function getCacheTime() { return $this->mCacheTime; }
2050         function containsOldMagic() { return $this->mContainsOldMagic; }
2051         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2052         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2053         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2054         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2055         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2056
2057         function merge( $other ) {
2058                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2059                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2060                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2061         }
2062
2063 }
2064
2065 class ParserOptions
2066 {
2067         # All variables are private
2068         var $mUseTeX;                    # Use texvc to expand <math> tags
2069         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2070         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2071         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2072         var $mAllowExternalImages;       # Allow external images inline
2073         var $mSkin;                      # Reference to the preferred skin
2074         var $mDateFormat;                # Date format index
2075         var $mEditSection;               # Create "edit section" links
2076         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2077         var $mNumberHeadings;            # Automatically number headings
2078         var $mShowToc;                   # Show table of contents
2079
2080         function getUseTeX() { return $this->mUseTeX; }
2081         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2082         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2083         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2084         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2085         function getSkin() { return $this->mSkin; }
2086         function getDateFormat() { return $this->mDateFormat; }
2087         function getEditSection() { return $this->mEditSection; }
2088         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2089         function getNumberHeadings() { return $this->mNumberHeadings; }
2090         function getShowToc() { return $this->mShowToc; }
2091
2092         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2093         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2094         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2095         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2096         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2097         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2098         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2099         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2100         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2101         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2102         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2103
2104         /* static */ function newFromUser( &$user )
2105         {
2106                 $popts = new ParserOptions;
2107                 $popts->initialiseFromUser( $user );
2108                 return $popts;
2109         }
2110
2111         function initialiseFromUser( &$userInput )
2112         {
2113                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2114
2115                 if ( !$userInput ) {
2116                         $user = new User;
2117                         $user->setLoaded( true );
2118                 } else {
2119                         $user =& $userInput;
2120                 }
2121
2122                 $this->mUseTeX = $wgUseTeX;
2123                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2124                 $this->mUseDynamicDates = $wgUseDynamicDates;
2125                 $this->mInterwikiMagic = $wgInterwikiMagic;
2126                 $this->mAllowExternalImages = $wgAllowExternalImages;
2127                 $this->mSkin =& $user->getSkin();
2128                 $this->mDateFormat = $user->getOption( "date" );
2129                 $this->mEditSection = $user->getOption( "editsection" );
2130                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2131                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2132                 $this->mShowToc = $user->getOption( "showtoc" );
2133         }
2134
2135
2136 }
2137
2138 # Regex callbacks, used in Parser::replaceVariables
2139 function wfBraceSubstitution( $matches )
2140 {
2141         global $wgCurParser;
2142         return $wgCurParser->braceSubstitution( $matches );
2143 }
2144
2145 function wfArgSubstitution( $matches )
2146 {
2147         global $wgCurParser;
2148         return $wgCurParser->argSubstitution( $matches );
2149 }
2150
2151 function wfVariableSubstitution( $matches )
2152 {
2153         global $wgCurParser;
2154         return $wgCurParser->variableSubstitution( $matches );
2155 }
2156
2157 ?>