includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80                 $this->mInPre = false;
  81         }
  82
  83         # First pass--just handle <nowiki> sections, pass the rest off
  84         # to internalParse() which does all the real work.
  85         #
  86         # Returns a ParserOutput
  87         #
  88         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  89         {
  90                 global $wgUseTidy;
  91                 $fname = "Parser::parse";
  92                 wfProfileIn( $fname );
  93
  94                 if ( $clearState ) {
  95                         $this->clearState();
  96                 }
  97
  98                 $this->mOptions = $options;
  99                 $this->mTitle =& $title;
 100                 $this->mOutputType = OT_HTML;
 101
 102                 $stripState = NULL;
 103                 $text = $this->strip( $text, $this->mStripState );
 104                 $text = $this->internalParse( $text, $linestart );
 105                 $text = $this->unstrip( $text, $this->mStripState );
 106                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 107                 if(!$wgUseTidy) {
 108                         $fixtags = array(
 109                                 # french spaces, last one Guillemet-left
 110                                 # only if there is something before the space
 111                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 112                                 # french spaces, Guillemet-right
 113                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 114                                 "/<hr *>/i" => '<hr />',
 115                                 "/<br *>/i" => '<br />',
 116                                 "/<center *>/i"=>'<div class="center">',
 117                                 "/<\\/center *>/i" => '</div>',
 118                                 # Clean up spare ampersands; note that we probably ought to be
 119                                 # more careful about named entities.
 120                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 121                         );
 122                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 123                 } else {
 124                         $fixtags = array(
 125                                 # french spaces, last one Guillemet-left
 126                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 127                                 # french spaces, Guillemet-right
 128                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 129                                 "/<center *>/i"=>'<div class="center">',
 130                                 "/<\\/center *>/i" => '</div>'
 131                         );
 132                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 133                 }
 134                 # only once and last
 135                 $text = $this->doBlockLevels( $text, $linestart );
 136                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 137                 if($wgUseTidy) {
 138                         $text = $this->tidy($text);
 139                 }
 140                 $this->mOutput->setText( $text );
 141                 wfProfileOut( $fname );
 142                 return $this->mOutput;
 143         }
 144
 145         /* static */ function getRandomString()
 146         {
 147                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 148         }
 149
 150         # Replaces all occurrences of <$tag>content</$tag> in the text
 151         # with a random marker and returns the new text. the output parameter
 152         # $content will be an associative array filled with data on the form
 153         # $unique_marker => content.
 154
 155         # If $content is already set, the additional entries will be appended
 156
 157         # If $tag is set to STRIP_COMMENTS, the function will extract
 158         # <!-- HTML comments -->
 159
 160         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 161                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 162                 if ( !$content ) {
 163                         $content = array( );
 164                 }
 165                 $n = 1;
 166                 $stripped = "";
 167
 168                 while ( "" != $text ) {
 169                         if($tag==STRIP_COMMENTS) {
 170                                 $p = preg_split( "/<!--/i", $text, 2 );
 171                         } else {
 172                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 173                         }
 174                         $stripped .= $p[0];
 175                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 176                                 $text = "";
 177                         } else {
 178                                 if($tag==STRIP_COMMENTS) {
 179                                         $q = preg_split( "/-->/i", $p[1], 2 );
 180                                 } else {
 181                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 182                                 }
 183                                 $marker = $rnd . sprintf("%08X", $n++);
 184                                 $content[$marker] = $q[0];
 185                                 $stripped .= $marker;
 186                                 $text = $q[1];
 187                         }
 188                 }
 189                 return $stripped;
 190         }
 191
 192         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 193         # If $render is set, performs necessary rendering operations on plugins
 194         # Returns the text, and fills an array with data needed in unstrip()
 195         # If the $state is already a valid strip state, it adds to the state
 196
 197         # When $stripcomments is set, HTML comments <!-- like this -->
 198         # will be stripped in addition to other tags. This is important
 199         # for section editing, where these comments cause confusion when
 200         # counting the sections in the wikisource
 201         function strip( $text, &$state, $stripcomments = false )
 202         {
 203                 $render = ($this->mOutputType == OT_HTML);
 204                 $nowiki_content = array();
 205                 $hiero_content = array();
 206                 $timeline_content = array();
 207                 $math_content = array();
 208                 $pre_content = array();
 209                 $comment_content = array();
 210
 211                 # Replace any instances of the placeholders
 212                 $uniq_prefix = UNIQ_PREFIX;
 213                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 214
 215                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 216                 foreach( $nowiki_content as $marker => $content ){
 217                         if( $render ){
 218                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 219                         } else {
 220                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 221                         }
 222                 }
 223
 224                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 225                 foreach( $hiero_content as $marker => $content ){
 226                         if( $render && $GLOBALS['wgUseWikiHiero']){
 227                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 228                         } else {
 229                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 230                         }
 231                 }
 232
 233                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 234                 foreach( $timeline_content as $marker => $content ){
 235                         if( $render && $GLOBALS['wgUseTimeline']){
 236                                 $timeline_content[$marker] = renderTimeline( $content );
 237                         } else {
 238                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 239                         }
 240                 }
 241
 242                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 243                 foreach( $math_content as $marker => $content ){
 244                         if( $render ) {
 245                                 if( $this->mOptions->getUseTeX() ) {
 246                                         $math_content[$marker] = renderMath( $content );
 247                                 } else {
 248                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 249                                 }
 250                         } else {
 251                                 $math_content[$marker] = "<math>$content</math>";
 252                         }
 253                 }
 254
 255                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 256                 foreach( $pre_content as $marker => $content ){
 257                         if( $render ){
 258                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 259                         } else {
 260                                 $pre_content[$marker] = "<pre>$content</pre>";
 261                         }
 262                 }
 263                 if($stripcomments) {
 264                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 265                         foreach( $comment_content as $marker => $content ){
 266                                 $comment_content[$marker] = "<!--$content-->";
 267                         }
 268                 }
 269
 270                 # Merge state with the pre-existing state, if there is one
 271                 if ( $state ) {
 272                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 273                         $state['hiero'] = $state['hiero'] + $hiero_content;
 274                         $state['timeline'] = $state['timeline'] + $timeline_content;
 275                         $state['math'] = $state['math'] + $math_content;
 276                         $state['pre'] = $state['pre'] + $pre_content;
 277                         $state['comment'] = $state['comment'] + $comment_content;
 278                 } else {
 279                         $state = array(
 280                           'nowiki' => $nowiki_content,
 281                           'hiero' => $hiero_content,
 282                           'timeline' => $timeline_content,
 283                           'math' => $math_content,
 284                           'pre' => $pre_content,
 285                           'comment' => $comment_content
 286                         );
 287                 }
 288                 return $text;
 289         }
 290
 291         # always call unstripNoWiki() after this one
 292         function unstrip( $text, &$state )
 293         {
 294                 # Must expand in reverse order, otherwise nested tags will be corrupted
 295                 $contentDict = end( $state );
 296                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 297                         if( key($state) != 'nowiki') {
 298                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 299                                         $text = str_replace( key( $contentDict ), $content, $text );
 300                                 }
 301                         }
 302                 }
 303
 304                 return $text;
 305         }
 306         # always call this after unstrip() to preserve the order
 307         function unstripNoWiki( $text, &$state )
 308         {
 309                 # Must expand in reverse order, otherwise nested tags will be corrupted
 310                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 311                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 312                 }
 313
 314                 return $text;
 315         }
 316
 317         # Add an item to the strip state
 318         # Returns the unique tag which must be inserted into the stripped text
 319         # The tag will be replaced with the original text in unstrip()
 320
 321         function insertStripItem( $text, &$state )
 322         {
 323                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 324                 if ( !$state ) {
 325                         $state = array(
 326                           'nowiki' => array(),
 327                           'hiero' => array(),
 328                           'math' => array(),
 329                           'pre' => array()
 330                         );
 331                 }
 332                 $state['item'][$rnd] = $text;
 333                 return $rnd;
 334         }
 335
 336         # This method generates the list of subcategories and pages for a category
 337         function categoryMagic ()
 338         {
 339                 global $wgLang , $wgUser ;
 340                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 341
 342                 $cns = Namespace::getCategory() ;
 343                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 344
 345                 $r = "<br style=\"clear:both;\"/>\n";
 346
 347
 348                 $sk =& $wgUser->getSkin() ;
 349
 350                 $articles = array() ;
 351                 $children = array() ;
 352                 $data = array () ;
 353                 $id = $this->mTitle->getArticleID() ;
 354
 355                 # FIXME: add limits
 356                 $t = wfStrencode( $this->mTitle->getDBKey() );
 357                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 358                 $res = wfQuery ( $sql, DB_READ ) ;
 359                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 360
 361                 # For all pages that link to this category
 362                 foreach ( $data AS $x )
 363                 {
 364                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 365                         if ( $t != "" ) $t .= ":" ;
 366                         $t .= $x->cur_title ;
 367
 368                         if ( $x->cur_namespace == $cns ) {
 369                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 370                         } else {
 371                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 372                         }
 373                 }
 374                 wfFreeResult ( $res ) ;
 375
 376                 # Showing subcategories
 377                 if ( count ( $children ) > 0 ) {
 378                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 379                         $r .= implode ( ", " , $children ) ;
 380                 }
 381
 382                 # Showing pages in this category
 383                 if ( count ( $articles ) > 0 ) {
 384                         $ti = $this->mTitle->getText() ;
 385                         $h =  wfMsg( "category_header", $ti );
 386                         $r .= "<h2>{$h}</h2>\n" ;
 387                         $r .= implode ( ", " , $articles ) ;
 388                 }
 389
 390
 391                 return $r ;
 392         }
 393
 394         function getHTMLattrs ()
 395         {
 396                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 397                                 "title", "align", "lang", "dir", "width", "height",
 398                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 399                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 400                                 /* FONT */ "type", "start", "value", "compact",
 401                                 /* For various lists, mostly deprecated but safe */
 402                                 "summary", "width", "border", "frame", "rules",
 403                                 "cellspacing", "cellpadding", "valign", "char",
 404                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 405                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 406                                 "id", "class", "name", "style" /* For CSS */
 407                                 );
 408                 return $htmlattrs ;
 409         }
 410
 411         function fixTagAttributes ( $t )
 412         {
 413                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 414                 $htmlattrs = $this->getHTMLattrs() ;
 415
 416                 # Strip non-approved attributes from the tag
 417                 $t = preg_replace(
 418                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 419                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 420                         $t);
 421                 # Strip javascript "expression" from stylesheets. Brute force approach:
 422                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 423
 424                 if( preg_match(
 425                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 426                         wfMungeToUtf8( $t ) ) )
 427                 {
 428                         $t="";
 429                 }
 430
 431                 return trim ( $t ) ;
 432         }
 433
 434         /* interface with html tidy, used if $wgUseTidy = true */
 435         function tidy ( $text ) {
 436                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 437                 global $wgInputEncoding, $wgOutputEncoding;
 438                 $fname = "Parser::tidy";
 439                 wfProfileIn( $fname );
 440
 441                 $cleansource = '';
 442                 switch(strtoupper($wgOutputEncoding)) {
 443                         case 'ISO-8859-1':
 444                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 445                                 break;
 446                         case 'UTF-8':
 447                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 448                                 break;
 449                         default:
 450                                 $wgTidyOpts .= ' -raw';
 451                         }
 452
 453                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 455 '<head><title>test</title></head><body>'.$text.'</body></html>';
 456                 $descriptorspec = array(
 457                         0 => array("pipe", "r"),
 458                         1 => array("pipe", "w"),
 459                         2 => array("file", "/dev/null", "a")
 460                 );
 461                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 462                 if (is_resource($process)) {
 463                         fwrite($pipes[0], $wrappedtext);
 464                         fclose($pipes[0]);
 465                         while (!feof($pipes[1])) {
 466                                 $cleansource .= fgets($pipes[1], 1024);
 467                         }
 468                         fclose($pipes[1]);
 469                         $return_value = proc_close($process);
 470                 }
 471
 472                 wfProfileOut( $fname );
 473
 474                 if( $cleansource == '' && $text != '') {
 475                         wfDebug( "Tidy error detected!\n" );
 476                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 477                 } else {
 478                         return $cleansource;
 479                 }
 480         }
 481
 482         function doTableStuff ( $t )
 483         {
 484                 $t = explode ( "\n" , $t ) ;
 485                 $td = array () ; # Is currently a td tag open?
 486                         $ltd = array () ; # Was it TD or TH?
 487                         $tr = array () ; # Is currently a tr tag open?
 488                         $ltr = array () ; # tr attributes
 489                         foreach ( $t AS $k => $x )
 490                         {
 491                                 $x = trim ( $x ) ;
 492                                 $fc = substr ( $x , 0 , 1 ) ;
 493                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 494                                 {
 495                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 496                                         array_push ( $td , false ) ;
 497                                         array_push ( $ltd , "" ) ;
 498                                         array_push ( $tr , false ) ;
 499                                         array_push ( $ltr , "" ) ;
 500                                 }
 501                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 502                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 503                                 {
 504                                         $z = "</table>\n" ;
 505                                         $l = array_pop ( $ltd ) ;
 506                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 507                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 508                                         array_pop ( $ltr ) ;
 509                                         $t[$k] = $z ;
 510                                 }
 511                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 512                                                 {
 513                                                 $z = trim ( substr ( $x , 2 ) ) ;
 514                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 515                                                 }*/
 516                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 517                                 {
 518                                         $x = substr ( $x , 1 ) ;
 519                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 520                                         $z = "" ;
 521                                         $l = array_pop ( $ltd ) ;
 522                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 523                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 524                                         array_pop ( $ltr ) ;
 525                                         $t[$k] = $z ;
 526                                         array_push ( $tr , false ) ;
 527                                         array_push ( $td , false ) ;
 528                                         array_push ( $ltd , "" ) ;
 529                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 530                                 }
 531                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 532                                 {
 533                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 534                                         {
 535                                                 $fc = "+" ;
 536                                                 $x = substr ( $x , 1 ) ;
 537                                         }
 538                                         $after = substr ( $x , 1 ) ;
 539                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 540                                         $after = explode ( "||" , $after ) ;
 541                                         $t[$k] = "" ;
 542                                         foreach ( $after AS $theline )
 543                                         {
 544                                                 $z = "" ;
 545                                                 if ( $fc != "+" )
 546                                                 {
 547                                                         $tra = array_pop ( $ltr ) ;
 548                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 549                                                         array_push ( $tr , true ) ;
 550                                                         array_push ( $ltr , "" ) ;
 551                                                 }
 552
 553                                                 $l = array_pop ( $ltd ) ;
 554                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 555                                                 if ( $fc == "|" ) $l = "td" ;
 556                                                 else if ( $fc == "!" ) $l = "th" ;
 557                                                 else if ( $fc == "+" ) $l = "caption" ;
 558                                                 else $l = "" ;
 559                                                 array_push ( $ltd , $l ) ;
 560                                                 $y = explode ( "|" , $theline , 2 ) ;
 561                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 562                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 563                                                 $t[$k] .= $y ;
 564                                                 array_push ( $td , true ) ;
 565                                         }
 566                                 }
 567                         }
 568
 569                 # Closing open td, tr && table
 570                 while ( count ( $td ) > 0 )
 571                 {
 572                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 573                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 574                         $t[] = "</table>" ;
 575                 }
 576
 577                 $t = implode ( "\n" , $t ) ;
 578                 #               $t = $this->removeHTMLtags( $t );
 579                 return $t ;
 580         }
 581
 582         # Parses the text and adds the result to the strip state
 583         # Returns the strip tag
 584         function stripParse( $text, $newline, $args )
 585         {
 586                 $text = $this->strip( $text, $this->mStripState );
 587                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 588                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 589         }
 590
 591         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 592         {
 593                 $fname = "Parser::internalParse";
 594                 wfProfileIn( $fname );
 595
 596                 $text = $this->removeHTMLtags( $text );
 597                 $text = $this->replaceVariables( $text, $args );
 598
 599                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 600
 601                 $text = $this->doHeadings( $text );
 602                 if($this->mOptions->getUseDynamicDates()) {
 603                         global $wgDateFormatter;
 604                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 605                 }
 606                 $text = $this->doAllQuotes( $text );
 607                 $text = $this->replaceExternalLinks( $text );
 608                 $text = $this->replaceInternalLinks ( $text );
 609                 $text = $this->replaceInternalLinks ( $text );
 610                 //$text = $this->doTokenizedParser ( $text );
 611                 $text = $this->doTableStuff ( $text ) ;
 612                 $text = $this->magicISBN( $text );
 613                 $text = $this->magicRFC( $text );
 614                 $text = $this->formatHeadings( $text, $isMain );
 615                 $sk =& $this->mOptions->getSkin();
 616                 $text = $sk->transformContent( $text );
 617
 618                 if ( !isset ( $this->categoryMagicDone ) ) {
 619                         $text .= $this->categoryMagic () ;
 620                         $this->categoryMagicDone = true ;
 621                 }
 622
 623                 wfProfileOut( $fname );
 624                 return $text;
 625         }
 626
 627
 628         /* private */ function doHeadings( $text )
 629         {
 630                 for ( $i = 6; $i >= 1; --$i ) {
 631                         $h = substr( "======", 0, $i );
 632                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 633                           "<h{$i}>\\1</h{$i}>\\2", $text );
 634                 }
 635                 return $text;
 636         }
 637
 638         /* private */ function doAllQuotes( $text )
 639         {
 640                 $outtext = "";
 641                 $lines = explode( "\n", $text );
 642                 foreach ( $lines as $line ) {
 643                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 644                 }
 645                 return substr($outtext, 0,-1);
 646         }
 647
 648         /* private */ function doQuotes( $pre, $text, $mode )
 649         {
 650                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 651                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 652                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 653                         if ( substr ($m[2], 0, 1) == "'" ) {
 654                                 $m[2] = substr ($m[2], 1);
 655                                 if ($mode == "em") {
 656                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 657                                 } else if ($mode == "strong") {
 658                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 659                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 660                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 661                                 } else if ($mode == "strongem") {
 662                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 663                                 } else {
 664                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 665                                 }
 666                         } else {
 667                                 if ($mode == "strong") {
 668                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 669                                 } else if ($mode == "em") {
 670                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 671                                 } else if ($mode == "emstrong") {
 672                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 673                                 } else if (($mode == "strongem") || ($mode == "both")) {
 674                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 675                                 } else {
 676                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 677                                 }
 678                         }
 679                 } else {
 680                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 681                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 682                         if ($mode == "") {
 683                                 return $pre . $text;
 684                         } else if ($mode == "em") {
 685                                 return $pre . $text_em;
 686                         } else if ($mode == "strong") {
 687                                 return $pre . $text_strong;
 688                         } else if ($mode == "strongem") {
 689                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 690                         } else {
 691                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 692                         }
 693                 }
 694         }
 695
 696         # Note: we have to do external links before the internal ones,
 697         # and otherwise take great care in the order of things here, so
 698         # that we don't end up interpreting some URLs twice.
 699
 700         /* private */ function replaceExternalLinks( $text )
 701         {
 702                 $fname = "Parser::replaceExternalLinks";
 703                 wfProfileIn( $fname );
 704                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 705                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 706                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 707                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 708                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 709                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 710                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 711                 wfProfileOut( $fname );
 712                 return $text;
 713         }
 714
 715         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 716         {
 717                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 718                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 719
 720                 # this is  the list of separators that should be ignored if they
 721                 # are the last character of an URL but that should be included
 722                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 723                 # in this case, the last comma should not become part of the URL,
 724                 # but in "www.foo.com/123,2342,32.htm" it should.
 725                 $sep = ",;\.:";
 726                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 727                 $images = "gif|png|jpg|jpeg";
 728
 729                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 730                 # they are interpreted as part of the string (used to tell PHP
 731                 # that the content of the string should be inserted there).
 732                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 733                   "((?i){$images})([^{$uc}]|$)/";
 734
 735                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 736                 $sk =& $this->mOptions->getSkin();
 737
 738                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 739                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 740                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 741                 }
 742                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 743                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 744                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 745                   "</a>\\5", $s );
 746                 $s = str_replace( $unique, $protocol, $s );
 747
 748                 $a = explode( "[{$protocol}:", " " . $s );
 749                 $s = array_shift( $a );
 750                 $s = substr( $s, 1 );
 751
 752                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 753                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 754
 755                 foreach ( $a as $line ) {
 756                         if ( preg_match( $e1, $line, $m ) ) {
 757                                 $link = "{$protocol}:{$m[1]}";
 758                                 $trail = $m[2];
 759                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 760                                 else { $text = wfEscapeHTML( $link ); }
 761                         } else if ( preg_match( $e2, $line, $m ) ) {
 762                                 $link = "{$protocol}:{$m[1]}";
 763                                 $text = $m[2];
 764                                 $trail = $m[3];
 765                         } else {
 766                                 $s .= "[{$protocol}:" . $line;
 767                                 continue;
 768                         }
 769                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 770                                 $paren = "";
 771                         } else {
 772                                 # Expand the URL for printable version
 773                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 774                         }
 775                         $la = $sk->getExternalLinkAttributes( $link, $text );
 776                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 777
 778                 }
 779                 return $s;
 780         }
 781
 782
 783         /* private */ function replaceInternalLinks( $s )
 784         {
 785                 global $wgLang, $wgLinkCache;
 786                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 787                 static $fname = "Parser::replaceInternalLink" ;
 788                 wfProfileIn( $fname );
 789
 790                 wfProfileIn( "$fname-setup" );
 791                 static $tc = FALSE;
 792                 # the % is needed to support urlencoded titles as well
 793                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 794                 $sk =& $this->mOptions->getSkin();
 795
 796                 $a = explode( "[[", " " . $s );
 797                 $s = array_shift( $a );
 798                 $s = substr( $s, 1 );
 799
 800                 # Match a link having the form [[namespace:link|alternate]]trail
 801                 static $e1 = FALSE;
 802                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 803                 # Match the end of a line for a word that's not followed by whitespace,
 804                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 805                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 806                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 807                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 808
 809
 810                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 811                 static $image = FALSE;
 812                 static $special = FALSE;
 813                 static $media = FALSE;
 814                 static $category = FALSE;
 815                 if ( !$image ) { $image = Namespace::getImage(); }
 816                 if ( !$special ) { $special = Namespace::getSpecial(); }
 817                 if ( !$media ) { $media = Namespace::getMedia(); }
 818                 if ( !$category ) { $category = Namespace::getCategory(); }
 819
 820                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 821
 822                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 823                         $new_prefix = $m[2];
 824                         $s = $m[1];
 825                 } else {
 826                         $new_prefix="";
 827                 }
 828
 829                 wfProfileOut( "$fname-setup" );
 830
 831                 foreach ( $a as $line ) {
 832                         $prefix = $new_prefix;
 833
 834                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 835                                 $text = $m[2];
 836                                 # fix up urlencoded title texts
 837                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 838                                 $trail = $m[3];
 839                         } else { # Invalid form; output directly
 840                                 $s .= $prefix . "[[" . $line ;
 841                                 wfProfileOut( $fname );
 842                                 continue;
 843                         }
 844
 845                         /* Valid link forms:
 846                         Foobar -- normal
 847                         :Foobar -- override special treatment of prefix (images, language links)
 848                         /Foobar -- convert to CurrentPage/Foobar
 849                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 850                         */
 851                         $c = substr($m[1],0,1);
 852                         $noforce = ($c != ":");
 853                         if( $c == "/" ) { # subpage
 854                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 855                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 856                                         $noslash=$m[1];
 857                                 } else {
 858                                         $noslash=substr($m[1],1);
 859                                 }
 860                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 861                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 862                                         if( "" == $text ) {
 863                                                 $text= $m[1];
 864                                         } # this might be changed for ugliness reasons
 865                                 } else {
 866                                         $link = $noslash; # no subpage allowed, use standard link
 867                                 }
 868                         } elseif( $noforce ) { # no subpage
 869                                 $link = $m[1];
 870                         } else {
 871                                 $link = substr( $m[1], 1 );
 872                         }
 873                         $wasblank = ( "" == $text );
 874                         if( $wasblank )
 875                         $text = $link;
 876
 877                         $nt = Title::newFromText( $link );
 878                         if( !$nt ) {
 879                                 $s .= $prefix . "[[" . $line;
 880                                 wfProfileOut( $fname );
 881                                 continue;
 882                         }
 883                         $ns = $nt->getNamespace();
 884                         $iw = $nt->getInterWiki();
 885                         if( $noforce ) {
 886                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 887                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 888                                         $tmp = $prefix . $trail ;
 889                                         wfProfileOut( $fname );
 890                                         $s .= (trim($tmp) == '')? '': $tmp;
 891                                         continue;
 892                                 }
 893                                 if ( $ns == $image ) {
 894                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 895                                         $wgLinkCache->addImageLinkObj( $nt );
 896                                         wfProfileOut( $fname );
 897                                         continue;
 898                                 }
 899                                 if ( $ns == $category ) {
 900                                         $t = $nt->getText() ;
 901                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 902
 903                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 904                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 905                                         $wgLinkCache->resume();
 906
 907                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 908                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 909                                         $this->mOutput->mCategoryLinks[] = $t ;
 910                                         $s .= $prefix . $trail ;
 911                                         wfProfileOut( $fname );
 912                                         continue;
 913                                 }
 914                         }
 915                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 916                         ( strpos( $link, "#" ) == FALSE ) ) {
 917                                 # Self-links are handled specially; generally de-link and change to bold.
 918                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 919                                 wfProfileOut( $fname );
 920                                 continue;
 921                         }
 922
 923                         if( $ns == $media ) {
 924                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 925                                 $wgLinkCache->addImageLinkObj( $nt );
 926                                 wfProfileOut( $fname );
 927                                 continue;
 928                         } elseif( $ns == $special ) {
 929                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 930                                 wfProfileOut( $fname );
 931                                 continue;
 932                         }
 933                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 934                 }
 935                 wfProfileOut( $fname );
 936                 return $s;
 937         }
 938
 939         # Some functions here used by doBlockLevels()
 940         #
 941         /* private */ function closeParagraph()
 942         {
 943                 $result = "";
 944                 if ( '' != $this->mLastSection ) {
 945                         $result = "</" . $this->mLastSection  . ">\n";
 946                 }
 947                 $this->mInPre = false;
 948                 $this->mLastSection = "";
 949                 return $result;
 950         }
 951         # getCommon() returns the length of the longest common substring
 952         # of both arguments, starting at the beginning of both.
 953         #
 954         /* private */ function getCommon( $st1, $st2 )
 955         {
 956                 $fl = strlen( $st1 );
 957                 $shorter = strlen( $st2 );
 958                 if ( $fl < $shorter ) { $shorter = $fl; }
 959
 960                 for ( $i = 0; $i < $shorter; ++$i ) {
 961                         if ( $st1{$i} != $st2{$i} ) { break; }
 962                 }
 963                 return $i;
 964         }
 965         # These next three functions open, continue, and close the list
 966         # element appropriate to the prefix character passed into them.
 967         #
 968         /* private */ function openList( $char )
 969     {
 970                 $result = $this->closeParagraph();
 971
 972                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 973                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 974                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 975                 else if ( ";" == $char ) {
 976                         $result .= "<dl><dt>";
 977                         $this->mDTopen = true;
 978                 }
 979                 else { $result = "<!-- ERR 1 -->"; }
 980
 981                 return $result;
 982         }
 983
 984         /* private */ function nextItem( $char )
 985         {
 986                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 987                 else if ( ":" == $char || ";" == $char ) {
 988                         $close = "</dd>";
 989                         if ( $this->mDTopen ) { $close = "</dt>"; }
 990                         if ( ";" == $char ) {
 991                                 $this->mDTopen = true;
 992                                 return $close . "<dt>";
 993                         } else {
 994                                 $this->mDTopen = false;
 995                                 return $close . "<dd>";
 996                         }
 997                 }
 998                 return "<!-- ERR 2 -->";
 999         }
1000
1001         /* private */function closeList( $char )
1002         {
1003                 if ( "*" == $char ) { $text = "</li></ul>"; }
1004                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1005                 else if ( ":" == $char ) {
1006                         if ( $this->mDTopen ) {
1007                                 $this->mDTopen = false;
1008                                 $text = "</dt></dl>";
1009                         } else {
1010                                 $text = "</dd></dl>";
1011                         }
1012                 }
1013                 else {  return "<!-- ERR 3 -->"; }
1014                 return $text."\n";
1015         }
1016
1017         /* private */ function doBlockLevels( $text, $linestart ) {
1018                 $fname = "Parser::doBlockLevels";
1019                 wfProfileIn( $fname );
1020
1021                 # Parsing through the text line by line.  The main thing
1022                 # happening here is handling of block-level elements p, pre,
1023                 # and making lists from lines starting with * # : etc.
1024                 #
1025                 $textLines = explode( "\n", $text );
1026
1027                 $lastPrefix = $output = $lastLine = '';
1028                 $this->mDTopen = $inBlockElem = false;
1029                 $prefixLength = 0;
1030                 $paragraphStack = false;
1031
1032                 if ( !$linestart ) {
1033                         $output .= array_shift( $textLines );
1034                 }
1035                 foreach ( $textLines as $oLine ) {
1036                         $lastPrefixLength = strlen( $lastPrefix );
1037                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1038                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1039                         if (!$this->mInPre) {
1040                                 $this->mInPre = !empty($preOpenMatch);
1041                         }
1042                         if ( !$this->mInPre ) {
1043                                 # Multiple prefixes may abut each other for nested lists.
1044                                 $prefixLength = strspn( $oLine, "*#:;" );
1045                                 $pref = substr( $oLine, 0, $prefixLength );
1046
1047                                 # eh?
1048                                 $pref2 = str_replace( ";", ":", $pref );
1049                                 $t = substr( $oLine, $prefixLength );
1050                         } else {
1051                                 # Don't interpret any other prefixes in preformatted text
1052                                 $prefixLength = 0;
1053                                 $pref = $pref2 = '';
1054                                 $t = $oLine;
1055                         }
1056
1057                         # List generation
1058                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1059                                 # Same as the last item, so no need to deal with nesting or opening stuff
1060                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1061                                 $paragraphStack = false;
1062
1063                                 if ( ";" == substr( $pref, -1 ) ) {
1064                                         # The one nasty exception: definition lists work like this:
1065                                         # ; title : definition text
1066                                         # So we check for : in the remainder text to split up the
1067                                         # title and definition, without b0rking links.
1068                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1069                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1070                                                 $term = $match[1];
1071                                                 $output .= $term . $this->nextItem( ":" );
1072                                                 $t = $match[2];
1073                                         }
1074                                 }
1075                         } elseif( $prefixLength || $lastPrefixLength ) {
1076                                 # Either open or close a level...
1077                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1078                                 $paragraphStack = false;
1079
1080                                 while( $commonPrefixLength < $lastPrefixLength ) {
1081                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1082                                         --$lastPrefixLength;
1083                                 }
1084                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1085                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1086                                 }
1087                                 while ( $prefixLength > $commonPrefixLength ) {
1088                                         $char = substr( $pref, $commonPrefixLength, 1 );
1089                                         $output .= $this->openList( $char );
1090
1091                                         if ( ";" == $char ) {
1092                                                 # FIXME: This is dupe of code above
1093                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1094                                                         $term = $match[1];
1095                                                         $output .= $term . $this->nextItem( ":" );
1096                                                         $t = $match[2];
1097                                                 }
1098                                         }
1099                                         ++$commonPrefixLength;
1100                                 }
1101                                 $lastPrefix = $pref2;
1102                         }
1103                         if( 0 == $prefixLength ) {
1104                                 # No prefix (not in list)--go to paragraph mode
1105                                 $uniq_prefix = UNIQ_PREFIX;
1106                                 // XXX: use a stack for nestable elements like span, table and div
1107                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1108                                 $closematch = preg_match(
1109                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1110                                         "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1111                                 if ( $openmatch or $closematch ) {
1112                                         $paragraphStack = false;
1113                                         $output .= $this->closeParagraph();
1114                                         if($preOpenMatch and !$preCloseMatch) {
1115                                                 $this->mInPre = true;
1116                                         }
1117                                         if ( $closematch  ) {
1118                                                 $inBlockElem = false;
1119                                         } else {
1120                                                 $inBlockElem = true;
1121                                         }
1122                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1123                                         if ( " " == $t{0} and trim($t) != '' ) {
1124                                                 // pre
1125                                                 if ($this->mLastSection != 'pre') {
1126                                                         $paragraphStack = false;
1127                                                         $output .= $this->closeParagraph().'<pre>';
1128                                                         $this->mLastSection = 'pre';
1129                                                 }
1130                                         } else {
1131                                                 // paragraph
1132                                                 if ( '' == trim($t) ) {
1133                                                         if ( $paragraphStack ) {
1134                                                                 $output .= $paragraphStack.'<br />';
1135                                                                 $paragraphStack = false;
1136                                                                 $this->mLastSection = 'p';
1137                                                         } else {
1138                                                                 if ($this->mLastSection != 'p' ) {
1139                                                                         $output .= $this->closeParagraph();
1140                                                                         $this->mLastSection = '';
1141                                                                         $paragraphStack = "<p>";
1142                                                                 } else {
1143                                                                         $paragraphStack = '</p><p>';
1144                                                                 }
1145                                                         }
1146                                                 } else {
1147                                                         if ( $paragraphStack ) {
1148                                                                 $output .= $paragraphStack;
1149                                                                 $paragraphStack = false;
1150                                                                 $this->mLastSection = 'p';
1151                                                         } else if ($this->mLastSection != 'p') {
1152                                                                 $output .= $this->closeParagraph().'<p>';
1153                                                                 $this->mLastSection = 'p';
1154                                                         }
1155                                                 }
1156                                         }
1157                                 }
1158                         }
1159                         if ($paragraphStack === false) {
1160                                 $output .= $t."\n";
1161                         }
1162                 }
1163                 while ( $prefixLength ) {
1164                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1165                         --$prefixLength;
1166                 }
1167                 if ( "" != $this->mLastSection ) {
1168                         $output .= "</" . $this->mLastSection . ">";
1169                         $this->mLastSection = "";
1170                 }
1171
1172                 wfProfileOut( $fname );
1173                 return $output;
1174         }
1175
1176         function getVariableValue( $index ) {
1177                 global $wgLang, $wgSitename, $wgServer;
1178
1179                 switch ( $index ) {
1180                         case MAG_CURRENTMONTH:
1181                                 return date( "m" );
1182                         case MAG_CURRENTMONTHNAME:
1183                                 return $wgLang->getMonthName( date("n") );
1184                         case MAG_CURRENTMONTHNAMEGEN:
1185                                 return $wgLang->getMonthNameGen( date("n") );
1186                         case MAG_CURRENTDAY:
1187                                 return date("j");
1188                         case MAG_PAGENAME:
1189                                 return $this->mTitle->getText();
1190                         case MAG_NAMESPACE:
1191                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1192                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1193                         case MAG_CURRENTDAYNAME:
1194                                 return $wgLang->getWeekdayName( date("w")+1 );
1195                         case MAG_CURRENTYEAR:
1196                                 return date( "Y" );
1197                         case MAG_CURRENTTIME:
1198                                 return $wgLang->time( wfTimestampNow(), false );
1199                         case MAG_NUMBEROFARTICLES:
1200                                 return wfNumberOfArticles();
1201                         case MAG_SITENAME:
1202                                 return $wgSitename;
1203                         case MAG_SERVER:
1204                                 return $wgServer;
1205                         default:
1206                                 return NULL;
1207                 }
1208         }
1209
1210         function initialiseVariables()
1211         {
1212                 global $wgVariableIDs;
1213                 $this->mVariables = array();
1214                 foreach ( $wgVariableIDs as $id ) {
1215                         $mw =& MagicWord::get( $id );
1216                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1217                 }
1218         }
1219
1220         /* private */ function replaceVariables( $text, $args = array() )
1221         {
1222                 global $wgLang, $wgScript, $wgArticlePath;
1223
1224                 $fname = "Parser::replaceVariables";
1225                 wfProfileIn( $fname );
1226
1227                 $bail = false;
1228                 if ( !$this->mVariables ) {
1229                         $this->initialiseVariables();
1230                 }
1231                 $titleChars = Title::legalChars();
1232                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1233
1234                 # This function is called recursively. To keep track of arguments we need a stack:
1235                 array_push( $this->mArgStack, $args );
1236
1237                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1238                 $GLOBALS['wgCurParser'] =& $this;
1239
1240
1241                 if ( $this->mOutputType == OT_HTML ) {
1242                         # Variable substitution
1243                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1244
1245                         # Argument substitution
1246                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1247                 }
1248                 # Template substitution
1249                 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1250                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1251
1252                 array_pop( $this->mArgStack );
1253
1254                 wfProfileOut( $fname );
1255                 return $text;
1256         }
1257
1258         function variableSubstitution( $matches )
1259         {
1260                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1261                         $text = $this->mVariables[$matches[1]];
1262                         $this->mOutput->mContainsOldMagic = true;
1263                 } else {
1264                         $text = $matches[0];
1265                 }
1266                 return $text;
1267         }
1268
1269         function braceSubstitution( $matches )
1270         {
1271                 global $wgLinkCache, $wgLang;
1272                 $fname = "Parser::braceSubstitution";
1273                 $found = false;
1274                 $nowiki = false;
1275                 $noparse = false;
1276
1277                 $title = NULL;
1278
1279                 # $newline is an optional newline character before the braces
1280                 # $part1 is the bit before the first |, and must contain only title characters
1281                 # $args is a list of arguments, starting from index 0, not including $part1
1282
1283                 $newline = $matches[1];
1284                 $part1 = $matches[2];
1285                 # If the third subpattern matched anything, it will start with |
1286                 if ( $matches[3] !== "" ) {
1287                         $args = explode( "|", substr( $matches[3], 1 ) );
1288                 } else {
1289                         $args = array();
1290                 }
1291                 $argc = count( $args );
1292
1293                 # {{{}}}
1294                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1295                         $text = $matches[0];
1296                         $found = true;
1297                         $noparse = true;
1298                 }
1299
1300                 # SUBST
1301                 if ( !$found ) {
1302                         $mwSubst =& MagicWord::get( MAG_SUBST );
1303                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1304                                 if ( $this->mOutputType != OT_WIKI ) {
1305                                         # Invalid SUBST not replaced at PST time
1306                                         # Return without further processing
1307                                         $text = $matches[0];
1308                                         $found = true;
1309                                         $noparse= true;
1310                                 }
1311                         } elseif ( $this->mOutputType == OT_WIKI ) {
1312                                 # SUBST not found in PST pass, do nothing
1313                                 $text = $matches[0];
1314                                 $found = true;
1315                         }
1316                 }
1317
1318                 # MSG, MSGNW and INT
1319                 if ( !$found ) {
1320                         # Check for MSGNW:
1321                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1322                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1323                                 $nowiki = true;
1324                         } else {
1325                                 # Remove obsolete MSG:
1326                                 $mwMsg =& MagicWord::get( MAG_MSG );
1327                                 $mwMsg->matchStartAndRemove( $part1 );
1328                         }
1329
1330                         # Check if it is an internal message
1331                         $mwInt =& MagicWord::get( MAG_INT );
1332                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1333                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1334                                         $text = wfMsgReal( $part1, $args, true );
1335                                         $found = true;
1336                                 }
1337                         }
1338                 }
1339
1340                 # NS
1341                 if ( !$found ) {
1342                         # Check for NS: (namespace expansion)
1343                         $mwNs = MagicWord::get( MAG_NS );
1344                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1345                                 if ( intval( $part1 ) ) {
1346                                         $text = $wgLang->getNsText( intval( $part1 ) );
1347                                         $found = true;
1348                                 } else {
1349                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1350                                         if ( !is_null( $index ) ) {
1351                                                 $text = $wgLang->getNsText( $index );
1352                                                 $found = true;
1353                                         }
1354                                 }
1355                         }
1356                 }
1357
1358                 # LOCALURL and LOCALURLE
1359                 if ( !$found ) {
1360                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1361                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1362
1363                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1364                                 $func = 'getLocalURL';
1365                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1366                                 $func = 'escapeLocalURL';
1367                         } else {
1368                                 $func = '';
1369                         }
1370
1371                         if ( $func !== '' ) {
1372                                 $title = Title::newFromText( $part1 );
1373                                 if ( !is_null( $title ) ) {
1374                                         if ( $argc > 0 ) {
1375                                                 $text = $title->$func( $args[0] );
1376                                         } else {
1377                                                 $text = $title->$func();
1378                                         }
1379                                         $found = true;
1380                                 }
1381                         }
1382                 }
1383
1384                 # Internal variables
1385                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1386                         $text = $this->mVariables[$part1];
1387                         $found = true;
1388                         $this->mOutput->mContainsOldMagic = true;
1389                 }
1390 /*
1391                 # Arguments input from the caller
1392                 $inputArgs = end( $this->mArgStack );
1393                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1394                         $text = $inputArgs[$part1];
1395                         $found = true;
1396                 }
1397 */
1398                 # Load from database
1399                 if ( !$found ) {
1400                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1401                         if ( !is_null( $title ) && !$title->isExternal() ) {
1402                                 # Check for excessive inclusion
1403                                 $dbk = $title->getPrefixedDBkey();
1404                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1405                                         $article = new Article( $title );
1406                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1407                                         if ( $articleContent !== false ) {
1408                                                 $found = true;
1409                                                 $text = $articleContent;
1410
1411                                         }
1412                                 }
1413
1414                                 # If the title is valid but undisplayable, make a link to it
1415                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1416                                         $text = "[[" . $title->getPrefixedText() . "]]";
1417                                         $found = true;
1418                                 }
1419                         }
1420                 }
1421
1422                 # Recursive parsing, escaping and link table handling
1423                 # Only for HTML output
1424                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1425                         $text = wfEscapeWikiText( $text );
1426                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1427                         # Clean up argument array
1428                         $assocArgs = array();
1429                         $index = 1;
1430                         foreach( $args as $arg ) {
1431                                 $eqpos = strpos( $arg, "=" );
1432                                 if ( $eqpos === false ) {
1433                                         $assocArgs[$index++] = $arg;
1434                                 } else {
1435                                         $name = trim( substr( $arg, 0, $eqpos ) );
1436                                         $value = trim( substr( $arg, $eqpos+1 ) );
1437                                         if ( $value === false ) {
1438                                                 $value = "";
1439                                         }
1440                                         if ( $name !== false ) {
1441                                                 $assocArgs[$name] = $value;
1442                                         }
1443                                 }
1444                         }
1445
1446                         # Do not enter included links in link table
1447                         if ( !is_null( $title ) ) {
1448                                 $wgLinkCache->suspend();
1449                         }
1450
1451                         # Run full parser on the included text
1452                         $text = $this->stripParse( $text, $newline, $assocArgs );
1453
1454                         # Resume the link cache and register the inclusion as a link
1455                         if ( !is_null( $title ) ) {
1456                                 $wgLinkCache->resume();
1457                                 $wgLinkCache->addLinkObj( $title );
1458                         }
1459                 }
1460
1461                 if ( !$found ) {
1462                         return $matches[0];
1463                 } else {
1464                         return $text;
1465                 }
1466         }
1467
1468         # Triple brace replacement -- used for template arguments
1469         function argSubstitution( $matches )
1470         {
1471                 $newline = $matches[1];
1472                 $arg = trim( $matches[2] );
1473                 $text = $matches[0];
1474                 $inputArgs = end( $this->mArgStack );
1475
1476                 if ( array_key_exists( $arg, $inputArgs ) ) {
1477                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1478                 }
1479
1480                 return $text;
1481         }
1482
1483         # Returns true if the function is allowed to include this entity
1484         function incrementIncludeCount( $dbk )
1485         {
1486                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1487                         $this->mIncludeCount[$dbk] = 0;
1488                 }
1489                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1490                         return true;
1491                 } else {
1492                         return false;
1493                 }
1494         }
1495
1496
1497         # Cleans up HTML, removes dangerous tags and attributes
1498         /* private */ function removeHTMLtags( $text )
1499         {
1500                 global $wgUseTidy, $wgUserHtml;
1501                 $fname = "Parser::removeHTMLtags";
1502                 wfProfileIn( $fname );
1503
1504                 if( $wgUserHtml ) {
1505                         $htmlpairs = array( # Tags that must be closed
1506                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1507                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1508                                 "strike", "strong", "tt", "var", "div", "center",
1509                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1510                                 "ruby", "rt" , "rb" , "rp", "p"
1511                         );
1512                         $htmlsingle = array(
1513                                 "br", "hr", "li", "dt", "dd"
1514                         );
1515                         $htmlnest = array( # Tags that can be nested--??
1516                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1517                                 "dl", "font", "big", "small", "sub", "sup"
1518                         );
1519                         $tabletags = array( # Can only appear inside table
1520                                 "td", "th", "tr"
1521                         );
1522                 } else {
1523                         $htmlpairs = array();
1524                         $htmlsingle = array();
1525                         $htmlnest = array();
1526                         $tabletags = array();
1527                 }
1528
1529                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1530                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1531
1532                 $htmlattrs = $this->getHTMLattrs () ;
1533
1534                 # Remove HTML comments
1535                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1536
1537                 $bits = explode( "<", $text );
1538                 $text = array_shift( $bits );
1539                 if(!$wgUseTidy) {
1540                         $tagstack = array(); $tablestack = array();
1541                         foreach ( $bits as $x ) {
1542                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1543                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1544                                 $x, $regs );
1545                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1546                                 error_reporting( $prev );
1547
1548                                 $badtag = 0 ;
1549                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1550                                         # Check our stack
1551                                         if ( $slash ) {
1552                                                 # Closing a tag...
1553                                                 if ( ! in_array( $t, $htmlsingle ) &&
1554                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1555                                                         @array_push( $tagstack, $ot );
1556                                                         $badtag = 1;
1557                                                 } else {
1558                                                         if ( $t == "table" ) {
1559                                                                 $tagstack = array_pop( $tablestack );
1560                                                         }
1561                                                         $newparams = "";
1562                                                 }
1563                                         } else {
1564                                                 # Keep track for later
1565                                                 if ( in_array( $t, $tabletags ) &&
1566                                                 ! in_array( "table", $tagstack ) ) {
1567                                                         $badtag = 1;
1568                                                 } else if ( in_array( $t, $tagstack ) &&
1569                                                 ! in_array ( $t , $htmlnest ) ) {
1570                                                         $badtag = 1 ;
1571                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1572                                                         if ( $t == "table" ) {
1573                                                                 array_push( $tablestack, $tagstack );
1574                                                                 $tagstack = array();
1575                                                         }
1576                                                         array_push( $tagstack, $t );
1577                                                 }
1578                                                 # Strip non-approved attributes from the tag
1579                                                 $newparams = $this->fixTagAttributes($params);
1580
1581                                         }
1582                                         if ( ! $badtag ) {
1583                                                 $rest = str_replace( ">", "&gt;", $rest );
1584                                                 $text .= "<$slash$t $newparams$brace$rest";
1585                                                 continue;
1586                                         }
1587                                 }
1588                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1589                         }
1590                         # Close off any remaining tags
1591                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1592                                 $text .= "</$t>\n";
1593                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1594                         }
1595                 } else {
1596                         # this might be possible using tidy itself
1597                         foreach ( $bits as $x ) {
1598                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1599                                 $x, $regs );
1600                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1601                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1602                                         $newparams = $this->fixTagAttributes($params);
1603                                         $rest = str_replace( ">", "&gt;", $rest );
1604                                         $text .= "<$slash$t $newparams$brace$rest";
1605                                 } else {
1606                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1607                                 }
1608                         }
1609                 }
1610                 wfProfileOut( $fname );
1611                 return $text;
1612         }
1613
1614
1615 /*
1616  *
1617  * This function accomplishes several tasks:
1618  * 1) Auto-number headings if that option is enabled
1619  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1620  * 3) Add a Table of contents on the top for users who have enabled the option
1621  * 4) Auto-anchor headings
1622  *
1623  * It loops through all headlines, collects the necessary data, then splits up the
1624  * string and re-inserts the newly formatted headlines.
1625  *
1626  */
1627
1628         /* private */ function formatHeadings( $text, $isMain=true )
1629         {
1630                 global $wgInputEncoding;
1631
1632                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1633                 $doShowToc = $this->mOptions->getShowToc();
1634                 if( !$this->mTitle->userCanEdit() ) {
1635                         $showEditLink = 0;
1636                         $rightClickHack = 0;
1637                 } else {
1638                         $showEditLink = $this->mOptions->getEditSection();
1639                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1640                 }
1641
1642                 # Inhibit editsection links if requested in the page
1643                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1644                 if( $esw->matchAndRemove( $text ) ) {
1645                         $showEditLink = 0;
1646                 }
1647                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1648                 # do not add TOC
1649                 $mw =& MagicWord::get( MAG_NOTOC );
1650                 if( $mw->matchAndRemove( $text ) ) {
1651                         $doShowToc = 0;
1652                 }
1653
1654                 # never add the TOC to the Main Page. This is an entry page that should not
1655                 # be more than 1-2 screens large anyway
1656                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1657                         $doShowToc = 0;
1658                 }
1659
1660                 # Get all headlines for numbering them and adding funky stuff like [edit]
1661                 # links - this is for later, but we need the number of headlines right now
1662                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1663
1664                 # if there are fewer than 4 headlines in the article, do not show TOC
1665                 if( $numMatches < 4 ) {
1666                         $doShowToc = 0;
1667                 }
1668
1669                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1670                 # override above conditions and always show TOC
1671                 $mw =& MagicWord::get( MAG_FORCETOC );
1672                 if ($mw->matchAndRemove( $text ) ) {
1673                         $doShowToc = 1;
1674                 }
1675
1676
1677                 # We need this to perform operations on the HTML
1678                 $sk =& $this->mOptions->getSkin();
1679
1680                 # headline counter
1681                 $headlineCount = 0;
1682
1683                 # Ugh .. the TOC should have neat indentation levels which can be
1684                 # passed to the skin functions. These are determined here
1685                 $toclevel = 0;
1686                 $toc = "";
1687                 $full = "";
1688                 $head = array();
1689                 $sublevelCount = array();
1690                 $level = 0;
1691                 $prevlevel = 0;
1692                 foreach( $matches[3] as $headline ) {
1693                         $numbering = "";
1694                         if( $level ) {
1695                                 $prevlevel = $level;
1696                         }
1697                         $level = $matches[1][$headlineCount];
1698                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1699                                 # reset when we enter a new level
1700                                 $sublevelCount[$level] = 0;
1701                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1702                                 $toclevel += $level - $prevlevel;
1703                         }
1704                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1705                                 # reset when we step back a level
1706                                 $sublevelCount[$level+1]=0;
1707                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1708                                 $toclevel -= $prevlevel - $level;
1709                         }
1710                         # count number of headlines for each level
1711                         @$sublevelCount[$level]++;
1712                         if( $doNumberHeadings || $doShowToc ) {
1713                                 $dot = 0;
1714                                 for( $i = 1; $i <= $level; $i++ ) {
1715                                         if( !empty( $sublevelCount[$i] ) ) {
1716                                                 if( $dot ) {
1717                                                         $numbering .= ".";
1718                                                 }
1719                                                 $numbering .= $sublevelCount[$i];
1720                                                 $dot = 1;
1721                                         }
1722                                 }
1723                         }
1724
1725                         # The canonized header is a version of the header text safe to use for links
1726                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1727                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1728                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1729
1730                         # strip out HTML
1731                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1732                         $tocline = trim( $canonized_headline );
1733                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1734                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1735                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1736                         $refer[$headlineCount] = $canonized_headline;
1737
1738                         # count how many in assoc. array so we can track dupes in anchors
1739                         @$refers[$canonized_headline]++;
1740                         $refcount[$headlineCount]=$refers[$canonized_headline];
1741
1742                         # Prepend the number to the heading text
1743
1744                         if( $doNumberHeadings || $doShowToc ) {
1745                                 $tocline = $numbering . " " . $tocline;
1746
1747                                 # Don't number the heading if it is the only one (looks silly)
1748                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1749                                         # the two are different if the line contains a link
1750                                         $headline=$numbering . " " . $headline;
1751                                 }
1752                         }
1753
1754                         # Create the anchor for linking from the TOC to the section
1755                         $anchor = $canonized_headline;
1756                         if($refcount[$headlineCount] > 1 ) {
1757                                 $anchor .= "_" . $refcount[$headlineCount];
1758                         }
1759                         if( $doShowToc ) {
1760                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1761                         }
1762                         if( $showEditLink ) {
1763                                 if ( empty( $head[$headlineCount] ) ) {
1764                                         $head[$headlineCount] = "";
1765                                 }
1766                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1767                         }
1768
1769                         # Add the edit section span
1770                         if( $rightClickHack ) {
1771                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1772                         }
1773
1774                         # give headline the correct <h#> tag
1775                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1776
1777                         $headlineCount++;
1778                 }
1779
1780                 if( $doShowToc ) {
1781                         $toclines = $headlineCount;
1782                         $toc .= $sk->tocUnindent( $toclevel );
1783                         $toc = $sk->tocTable( $toc );
1784                 }
1785
1786                 # split up and insert constructed headlines
1787
1788                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1789                 $i = 0;
1790
1791                 foreach( $blocks as $block ) {
1792                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1793                             # This is the [edit] link that appears for the top block of text when
1794                                 # section editing is enabled
1795
1796                                 # Disabled because it broke block formatting
1797                                 # For example, a bullet point in the top line
1798                                 # $full .= $sk->editSectionLink(0);
1799                         }
1800                         $full .= $block;
1801                         if( $doShowToc && !$i && $isMain) {
1802                         # Top anchor now in skin
1803                                 $full = $full.$toc;
1804                         }
1805
1806                         if( !empty( $head[$i] ) ) {
1807                                 $full .= $head[$i];
1808                         }
1809                         $i++;
1810                 }
1811
1812                 return $full;
1813         }
1814
1815         /* private */ function magicISBN( $text )
1816         {
1817                 global $wgLang;
1818
1819                 $a = split( "ISBN ", " $text" );
1820                 if ( count ( $a ) < 2 ) return $text;
1821                 $text = substr( array_shift( $a ), 1);
1822                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1823
1824                 foreach ( $a as $x ) {
1825                         $isbn = $blank = "" ;
1826                         while ( " " == $x{0} ) {
1827                                 $blank .= " ";
1828                                 $x = substr( $x, 1 );
1829                         }
1830                         while ( strstr( $valid, $x{0} ) != false ) {
1831                                 $isbn .= $x{0};
1832                                 $x = substr( $x, 1 );
1833                         }
1834                         $num = str_replace( "-", "", $isbn );
1835                         $num = str_replace( " ", "", $num );
1836
1837                         if ( "" == $num ) {
1838                                 $text .= "ISBN $blank$x";
1839                         } else {
1840                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1841                                 $text .= "<a href=\"" .
1842                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1843                                         "\" class=\"internal\">ISBN $isbn</a>";
1844                                 $text .= $x;
1845                         }
1846                 }
1847                 return $text;
1848         }
1849         /* private */ function magicRFC( $text )
1850         {
1851                 global $wgLang;
1852
1853                 $a = split( "RFC ", " $text" );
1854                 if ( count ( $a ) < 2 ) return $text;
1855                 $text = substr( array_shift( $a ), 1);
1856                 $valid = "0123456789";
1857
1858                 foreach ( $a as $x ) {
1859                         $rfc = $blank = "" ;
1860                         while ( " " == $x{0} ) {
1861                                 $blank .= " ";
1862                                 $x = substr( $x, 1 );
1863                         }
1864                         while ( strstr( $valid, $x{0} ) != false ) {
1865                                 $rfc .= $x{0};
1866                                 $x = substr( $x, 1 );
1867                         }
1868
1869                         if ( "" == $rfc ) {
1870                                 $text .= "RFC $blank$x";
1871                         } else {
1872                                 $url = wfmsg( "rfcurl" );
1873                                 $url = str_replace( "$1", $rfc, $url);
1874                                 $sk =& $this->mOptions->getSkin();
1875                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1876                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1877                         }
1878                 }
1879                 return $text;
1880         }
1881
1882         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1883         {
1884                 $this->mOptions = $options;
1885                 $this->mTitle =& $title;
1886                 $this->mOutputType = OT_WIKI;
1887
1888                 if ( $clearState ) {
1889                         $this->clearState();
1890                 }
1891
1892                 $stripState = false;
1893                 $pairs = array(
1894                         "\r\n" => "\n",
1895                         );
1896                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1897                 // now with regexes
1898                 /*
1899                 $pairs = array(
1900                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1901                         "/<br *?>/i" => "<br />",
1902                 );
1903                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1904                 */
1905                 $text = $this->strip( $text, $stripState, false );
1906                 $text = $this->pstPass2( $text, $user );
1907                 $text = $this->unstrip( $text, $stripState );
1908                 $text = $this->unstripNoWiki( $text, $stripState );
1909                 return $text;
1910         }
1911
1912         /* private */ function pstPass2( $text, &$user )
1913         {
1914                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1915
1916                 # Variable replacement
1917                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1918                 $text = $this->replaceVariables( $text );
1919
1920                 # Signatures
1921                 #
1922                 $n = $user->getName();
1923                 $k = $user->getOption( "nickname" );
1924                 if ( "" == $k ) { $k = $n; }
1925                 if(isset($wgLocaltimezone)) {
1926                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1927                 }
1928                 /* Note: this is an ugly timezone hack for the European wikis */
1929                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1930                   " (" . date( "T" ) . ")";
1931                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1932
1933                 $text = preg_replace( "/~~~~~/", $d, $text );
1934                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1935                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1936                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1937                   Namespace::getUser() ) . ":$n|$k]]", $text );
1938
1939                 # Context links: [[|name]] and [[name (context)|]]
1940                 #
1941                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1942                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1943                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1944                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1945
1946                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1947                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1948                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1949                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1950                                                                                                                 # [[ns:page (cont)|]]
1951                 $context = "";
1952                 $t = $this->mTitle->getText();
1953                 if ( preg_match( $conpat, $t, $m ) ) {
1954                         $context = $m[2];
1955                 }
1956                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1957                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1958                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1959
1960                 if ( "" == $context ) {
1961                         $text = preg_replace( $p2, "[[\\1]]", $text );
1962                 } else {
1963                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1964                 }
1965
1966                 /*
1967                 $mw =& MagicWord::get( MAG_SUBST );
1968                 $wgCurParser = $this->fork();
1969                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1970                 $this->merge( $wgCurParser );
1971                 */
1972
1973                 # Trim trailing whitespace
1974                 # MAG_END (__END__) tag allows for trailing
1975                 # whitespace to be deliberately included
1976                 $text = rtrim( $text );
1977                 $mw =& MagicWord::get( MAG_END );
1978                 $mw->matchAndRemove( $text );
1979
1980                 return $text;
1981         }
1982
1983         # Set up some variables which are usually set up in parse()
1984         # so that an external function can call some class members with confidence
1985         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1986         {
1987                 $this->mTitle =& $title;
1988                 $this->mOptions = $options;
1989                 $this->mOutputType = $outputType;
1990                 if ( $clearState ) {
1991                         $this->clearState();
1992                 }
1993         }
1994
1995         function transformMsg( $text, $options ) {
1996                 global $wgTitle;
1997                 static $executing = false;
1998
1999                 # Guard against infinite recursion
2000                 if ( $executing ) {
2001                         return $text;
2002                 }
2003                 $executing = true;
2004
2005                 $this->mTitle = $wgTitle;
2006                 $this->mOptions = $options;
2007                 $this->mOutputType = OT_MSG;
2008                 $this->clearState();
2009                 $text = $this->replaceVariables( $text );
2010
2011                 $executing = false;
2012                 return $text;
2013         }
2014 }
2015
2016 class ParserOutput
2017 {
2018         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2019         var $mCacheTime; # Used in ParserCache
2020
2021         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2022                 $containsOldMagic = false )
2023         {
2024                 $this->mText = $text;
2025                 $this->mLanguageLinks = $languageLinks;
2026                 $this->mCategoryLinks = $categoryLinks;
2027                 $this->mContainsOldMagic = $containsOldMagic;
2028                 $this->mCacheTime = "";
2029         }
2030
2031         function getText() { return $this->mText; }
2032         function getLanguageLinks() { return $this->mLanguageLinks; }
2033         function getCategoryLinks() { return $this->mCategoryLinks; }
2034         function getCacheTime() { return $this->mCacheTime; }
2035         function containsOldMagic() { return $this->mContainsOldMagic; }
2036         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2037         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2038         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2039         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2040         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2041
2042         function merge( $other ) {
2043                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2044                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2045                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2046         }
2047
2048 }
2049
2050 class ParserOptions
2051 {
2052         # All variables are private
2053         var $mUseTeX;                    # Use texvc to expand <math> tags
2054         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2055         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2056         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2057         var $mAllowExternalImages;       # Allow external images inline
2058         var $mSkin;                      # Reference to the preferred skin
2059         var $mDateFormat;                # Date format index
2060         var $mEditSection;               # Create "edit section" links
2061         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2062         var $mNumberHeadings;            # Automatically number headings
2063         var $mShowToc;                   # Show table of contents
2064
2065         function getUseTeX() { return $this->mUseTeX; }
2066         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2067         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2068         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2069         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2070         function getSkin() { return $this->mSkin; }
2071         function getDateFormat() { return $this->mDateFormat; }
2072         function getEditSection() { return $this->mEditSection; }
2073         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2074         function getNumberHeadings() { return $this->mNumberHeadings; }
2075         function getShowToc() { return $this->mShowToc; }
2076
2077         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2078         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2079         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2080         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2081         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2082         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2083         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2084         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2085         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2086         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2087         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2088
2089         /* static */ function newFromUser( &$user )
2090         {
2091                 $popts = new ParserOptions;
2092                 $popts->initialiseFromUser( $user );
2093                 return $popts;
2094         }
2095
2096         function initialiseFromUser( &$userInput )
2097         {
2098                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2099
2100                 if ( !$userInput ) {
2101                         $user = new User;
2102                         $user->setLoaded( true );
2103                 } else {
2104                         $user =& $userInput;
2105                 }
2106
2107                 $this->mUseTeX = $wgUseTeX;
2108                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2109                 $this->mUseDynamicDates = $wgUseDynamicDates;
2110                 $this->mInterwikiMagic = $wgInterwikiMagic;
2111                 $this->mAllowExternalImages = $wgAllowExternalImages;
2112                 $this->mSkin =& $user->getSkin();
2113                 $this->mDateFormat = $user->getOption( "date" );
2114                 $this->mEditSection = $user->getOption( "editsection" );
2115                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2116                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2117                 $this->mShowToc = $user->getOption( "showtoc" );
2118         }
2119
2120
2121 }
2122
2123 # Regex callbacks, used in Parser::replaceVariables
2124 function wfBraceSubstitution( $matches )
2125 {
2126         global $wgCurParser;
2127         return $wgCurParser->braceSubstitution( $matches );
2128 }
2129
2130 function wfArgSubstitution( $matches )
2131 {
2132         global $wgCurParser;
2133         return $wgCurParser->argSubstitution( $matches );
2134 }
2135
2136 function wfVariableSubstitution( $matches )
2137 {
2138         global $wgCurParser;
2139         return $wgCurParser->variableSubstitution( $matches );
2140 }
2141
2142 ?>