includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         include_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 global $wgUseTidy;
  90                 $fname = "Parser::parse";
  91                 wfProfileIn( $fname );
  92
  93                 if ( $clearState ) {
  94                         $this->clearState();
  95                 }
  96
  97                 $this->mOptions = $options;
  98                 $this->mTitle =& $title;
  99                 $this->mOutputType = OT_HTML;
 100
 101                 $stripState = NULL;
 102                 $text = $this->strip( $text, $this->mStripState );
 103                 $text = $this->internalParse( $text, $linestart );
 104                 $text = $this->unstrip( $text, $this->mStripState );
 105                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 106                 if(!$wgUseTidy) {
 107                         $fixtags = array(
 108                                 "/<hr *>/i" => '<hr/>',
 109                                 "/<br *>/i" => '<br/>',
 110                                 "/<center *>/i"=>'<div class="center">',
 111                                 "/<\\/center *>/i" => '</div>',
 112                                 # Clean up spare ampersands; note that we probably ought to be
 113                                 # more careful about named entities.
 114                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 115                         );
 116                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 117                 } else {
 118                         $fixtags = array(
 119                                 "/<center *>/i"=>'<div class="center">',
 120                                 "/<\\/center *>/i" => '</div>'
 121                         );
 122                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 123                 }
 124                 # only once and last
 125                 $text = $this->doBlockLevels( $text, $linestart );
 126                 if($wgUseTidy) {
 127                         $text = $this->tidy($text);
 128                 }
 129                 $this->mOutput->setText( $text );
 130                 wfProfileOut( $fname );
 131                 return $this->mOutput;
 132         }
 133
 134         /* static */ function getRandomString()
 135         {
 136                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 137         }
 138
 139         # Replaces all occurrences of <$tag>content</$tag> in the text
 140         # with a random marker and returns the new text. the output parameter
 141         # $content will be an associative array filled with data on the form
 142         # $unique_marker => content.
 143
 144         # If $content is already set, the additional entries will be appended
 145
 146         # If $tag is set to STRIP_COMMENTS, the function will extract
 147         # <!-- HTML comments -->
 148
 149         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 150                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 151                 if ( !$content ) {
 152                         $content = array( );
 153                 }
 154                 $n = 1;
 155                 $stripped = "";
 156
 157                 while ( "" != $text ) {
 158                         if($tag==STRIP_COMMENTS) {
 159                                 $p = preg_split( "/<!--/i", $text, 2 );
 160                         } else {
 161                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 162                         }
 163                         $stripped .= $p[0];
 164                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 165                                 $text = "";
 166                         } else {
 167                                 if($tag==STRIP_COMMENTS) {
 168                                         $q = preg_split( "/-->/i", $p[1], 2 );
 169                                 } else {
 170                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 171                                 }
 172                                 $marker = $rnd . sprintf("%08X", $n++);
 173                                 $content[$marker] = $q[0];
 174                                 $stripped .= $marker;
 175                                 $text = $q[1];
 176                         }
 177                 }
 178                 return $stripped;
 179         }
 180
 181         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 182         # If $render is set, performs necessary rendering operations on plugins
 183         # Returns the text, and fills an array with data needed in unstrip()
 184         # If the $state is already a valid strip state, it adds to the state
 185
 186         # When $stripcomments is set, HTML comments <!-- like this -->
 187         # will be stripped in addition to other tags. This is important
 188         # for section editing, where these comments cause confusion when
 189         # counting the sections in the wikisource
 190         function strip( $text, &$state, $stripcomments = false )
 191         {
 192                 $render = ($this->mOutputType == OT_HTML);
 193                 $nowiki_content = array();
 194                 $hiero_content = array();
 195                 $math_content = array();
 196                 $pre_content = array();
 197                 $comment_content = array();
 198
 199                 # Replace any instances of the placeholders
 200                 $uniq_prefix = UNIQ_PREFIX;
 201                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 202
 203                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 204                 foreach( $nowiki_content as $marker => $content ){
 205                         if( $render ){
 206                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 207                         } else {
 208                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 209                         }
 210                 }
 211
 212                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 213                 foreach( $hiero_content as $marker => $content ){
 214                         if( $render && $GLOBALS['wgUseWikiHiero']){
 215                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 216                         } else {
 217                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 218                         }
 219                 }
 220
 221                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 222                 foreach( $math_content as $marker => $content ){
 223                         if( $render && $this->mOptions->getUseTeX() ){
 224                                 $math_content[$marker] = renderMath( $content );
 225                         } else {
 226                                 $math_content[$marker] = "<math>$content</math>";
 227                         }
 228                 }
 229
 230                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 231                 foreach( $pre_content as $marker => $content ){
 232                         if( $render ){
 233                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 234                         } else {
 235                                 $pre_content[$marker] = "<pre>$content</pre>";
 236                         }
 237                 }
 238                 if($stripcomments) {
 239                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 240                         foreach( $comment_content as $marker => $content ){
 241                                 $comment_content[$marker] = "<!--$content-->";
 242                         }
 243                 }
 244
 245                 # Merge state with the pre-existing state, if there is one
 246                 if ( $state ) {
 247                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 248                         $state['hiero'] = $state['hiero'] + $hiero_content;
 249                         $state['math'] = $state['math'] + $math_content;
 250                         $state['pre'] = $state['pre'] + $pre_content;
 251                         $state['comment'] = $state['comment'] + $comment_content;
 252                 } else {
 253                         $state = array(
 254                           'nowiki' => $nowiki_content,
 255                           'hiero' => $hiero_content,
 256                           'math' => $math_content,
 257                           'pre' => $pre_content,
 258                           'comment' => $comment_content
 259                         );
 260                 }
 261                 return $text;
 262         }
 263
 264         function unstrip( $text, &$state )
 265         {
 266                 # Must expand in reverse order, otherwise nested tags will be corrupted
 267                 $contentDict = end( $state );
 268                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 269                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 270                                 $text = str_replace( key( $contentDict ), $content, $text );
 271                         }
 272                 }
 273
 274                 return $text;
 275         }
 276
 277         # Add an item to the strip state
 278         # Returns the unique tag which must be inserted into the stripped text
 279         # The tag will be replaced with the original text in unstrip()
 280
 281         function insertStripItem( $text, &$state )
 282         {
 283                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 284                 if ( !$state ) {
 285                         $state = array(
 286                           'nowiki' => array(),
 287                           'hiero' => array(),
 288                           'math' => array(),
 289                           'pre' => array()
 290                         );
 291                 }
 292                 $state['item'][$rnd] = $text;
 293                 return $rnd;
 294         }
 295
 296         # This method generates the list of subcategories and pages for a category
 297         function categoryMagic ()
 298         {
 299                 global $wgLang , $wgUser ;
 300                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 301
 302                 $cns = Namespace::getCategory() ;
 303                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 304
 305                 $r = "<br style=\"clear:both;\"/>\n";
 306
 307
 308                 $sk =& $wgUser->getSkin() ;
 309
 310                 $articles = array() ;
 311                 $children = array() ;
 312                 $data = array () ;
 313                 $id = $this->mTitle->getArticleID() ;
 314
 315                 # For existing categories
 316                 if( $id ) {
 317                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 318                         $res = wfQuery ( $sql, DB_READ ) ;
 319                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 320                 } else {
 321                         # For non-existing categories
 322                         $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
 323                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
 324                         $res = wfQuery ( $sql, DB_READ ) ;
 325                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 326                 }
 327
 328                 # For all pages that link to this category
 329                 foreach ( $data AS $x )
 330                 {
 331                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 332                         if ( $t != "" ) $t .= ":" ;
 333                         $t .= $x->cur_title ;
 334
 335                         if ( $x->cur_namespace == $cns ) {
 336                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 337                         } else {
 338                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 339                         }
 340                 }
 341                 wfFreeResult ( $res ) ;
 342
 343                 # Showing subcategories
 344                 if ( count ( $children ) > 0 )
 345                 {
 346                         asort ( $children ) ;
 347                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 348                         $r .= implode ( ", " , $children ) ;
 349                 }
 350
 351                 # Showing pages in this category
 352                 if ( count ( $articles ) > 0 )
 353                 {
 354                         $ti = $this->mTitle->getText() ;
 355                         asort ( $articles ) ;
 356                         $h =  wfMsg( "category_header", $ti );
 357                         $r .= "<h2>{$h}</h2>\n" ;
 358                         $r .= implode ( ", " , $articles ) ;
 359                 }
 360
 361
 362                 return $r ;
 363         }
 364
 365         function getHTMLattrs ()
 366         {
 367                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 368                                 "title", "align", "lang", "dir", "width", "height",
 369                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 370                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 371                                 /* FONT */ "type", "start", "value", "compact",
 372                                 /* For various lists, mostly deprecated but safe */
 373                                 "summary", "width", "border", "frame", "rules",
 374                                 "cellspacing", "cellpadding", "valign", "char",
 375                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 376                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 377                                 "id", "class", "name", "style" /* For CSS */
 378                                 );
 379                 return $htmlattrs ;
 380         }
 381
 382         function fixTagAttributes ( $t )
 383         {
 384                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 385                 $htmlattrs = $this->getHTMLattrs() ;
 386
 387                 # Strip non-approved attributes from the tag
 388                 $t = preg_replace(
 389                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 390                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 391                         $t);
 392                 # Strip javascript "expression" from stylesheets. Brute force approach:
 393                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 394
 395                 if( preg_match(
 396                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 397                         wfMungeToUtf8( $t ) ) )
 398                 {
 399                         $t="";
 400                 }
 401
 402                 return trim ( $t ) ;
 403         }
 404
 405         /* interface with html tidy, used if $wgUseTidy = true */
 406         function tidy ( $text ) {
 407                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 408                 $cleansource = '';
 409                 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 410 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 411 '<head><title>test</title></head><body>'.$text.'</body></html>';
 412                 $descriptorspec = array(
 413                         0 => array("pipe", "r"),
 414                         1 => array("pipe", "w"),
 415                         2 => array("file", "/dev/null", "a")
 416                 );
 417                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 418                 if (is_resource($process)) {
 419                         fwrite($pipes[0], $text);
 420                         fclose($pipes[0]);
 421                         while (!feof($pipes[1])) {
 422                                 $cleansource .= fgets($pipes[1], 1024);
 423                         }
 424                         fclose($pipes[1]);
 425                         $return_value = proc_close($process);
 426                 }
 427                 if( $cleansource == '' && $text != '') {
 428                         return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
 429                 } else {
 430                         return $cleansource;
 431                 }
 432         }
 433
 434         function doTableStuff ( $t )
 435         {
 436                 $t = explode ( "\n" , $t ) ;
 437                 $td = array () ; # Is currently a td tag open?
 438                         $ltd = array () ; # Was it TD or TH?
 439                         $tr = array () ; # Is currently a tr tag open?
 440                         $ltr = array () ; # tr attributes
 441                         foreach ( $t AS $k => $x )
 442                         {
 443                                 $x = trim ( $x ) ;
 444                                 $fc = substr ( $x , 0 , 1 ) ;
 445                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 446                                 {
 447                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 448                                         array_push ( $td , false ) ;
 449                                         array_push ( $ltd , "" ) ;
 450                                         array_push ( $tr , false ) ;
 451                                         array_push ( $ltr , "" ) ;
 452                                 }
 453                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 454                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 455                                 {
 456                                         $z = "</table>\n" ;
 457                                         $l = array_pop ( $ltd ) ;
 458                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 459                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 460                                         array_pop ( $ltr ) ;
 461                                         $t[$k] = $z ;
 462                                 }
 463                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 464                                                 {
 465                                                 $z = trim ( substr ( $x , 2 ) ) ;
 466                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 467                                                 }*/
 468                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 469                                 {
 470                                         $x = substr ( $x , 1 ) ;
 471                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 472                                         $z = "" ;
 473                                         $l = array_pop ( $ltd ) ;
 474                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 475                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 476                                         array_pop ( $ltr ) ;
 477                                         $t[$k] = $z ;
 478                                         array_push ( $tr , false ) ;
 479                                         array_push ( $td , false ) ;
 480                                         array_push ( $ltd , "" ) ;
 481                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 482                                 }
 483                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 484                                 {
 485                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 486                                         {
 487                                                 $fc = "+" ;
 488                                                 $x = substr ( $x , 1 ) ;
 489                                         }
 490                                         $after = substr ( $x , 1 ) ;
 491                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 492                                         $after = explode ( "||" , $after ) ;
 493                                         $t[$k] = "" ;
 494                                         foreach ( $after AS $theline )
 495                                         {
 496                                                 $z = "" ;
 497                                                 if ( $fc != "+" )
 498                                                 {
 499                                                         $tra = array_pop ( $ltr ) ;
 500                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 501                                                         array_push ( $tr , true ) ;
 502                                                         array_push ( $ltr , "" ) ;
 503                                                 }
 504
 505                                                 $l = array_pop ( $ltd ) ;
 506                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 507                                                 if ( $fc == "|" ) $l = "td" ;
 508                                                 else if ( $fc == "!" ) $l = "th" ;
 509                                                 else if ( $fc == "+" ) $l = "caption" ;
 510                                                 else $l = "" ;
 511                                                 array_push ( $ltd , $l ) ;
 512                                                 $y = explode ( "|" , $theline , 2 ) ;
 513                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 514                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 515                                                 $t[$k] .= $y ;
 516                                                 array_push ( $td , true ) ;
 517                                         }
 518                                 }
 519                         }
 520
 521                 # Closing open td, tr && table
 522                 while ( count ( $td ) > 0 )
 523                 {
 524                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 525                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 526                         $t[] = "</table>" ;
 527                 }
 528
 529                 $t = implode ( "\n" , $t ) ;
 530                 #               $t = $this->removeHTMLtags( $t );
 531                 return $t ;
 532         }
 533
 534         function internalParse( $text, $linestart, $args = array() )
 535         {
 536                 $fname = "Parser::internalParse";
 537                 wfProfileIn( $fname );
 538
 539                 $text = $this->removeHTMLtags( $text );
 540                 $text = $this->replaceVariables( $text, $args );
 541
 542                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 543
 544                 $text = $this->doHeadings( $text );
 545                 if($this->mOptions->getUseDynamicDates()) {
 546                         global $wgDateFormatter;
 547                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 548                 }
 549                 $text = $this->replaceExternalLinks( $text );
 550                 $text = $this->doTokenizedParser ( $text );
 551                 $text = $this->doTableStuff ( $text ) ;
 552                 $text = $this->formatHeadings( $text );
 553                 $sk =& $this->mOptions->getSkin();
 554                 $text = $sk->transformContent( $text );
 555
 556                 if ( !isset ( $this->categoryMagicDone ) ) {
 557                    $text .= $this->categoryMagic () ;
 558                    $this->categoryMagicDone = true ;
 559                    }
 560
 561                 wfProfileOut( $fname );
 562                 return $text;
 563         }
 564
 565
 566         /* private */ function doHeadings( $text )
 567         {
 568                 for ( $i = 6; $i >= 1; --$i ) {
 569                         $h = substr( "======", 0, $i );
 570                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 571                           "<h{$i}>\\1</h{$i}>\\2", $text );
 572                 }
 573                 return $text;
 574         }
 575
 576         # Note: we have to do external links before the internal ones,
 577         # and otherwise take great care in the order of things here, so
 578         # that we don't end up interpreting some URLs twice.
 579
 580         /* private */ function replaceExternalLinks( $text )
 581         {
 582                 $fname = "Parser::replaceExternalLinks";
 583                 wfProfileIn( $fname );
 584                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 585                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 586                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 587                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 588                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 589                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 590                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 591                 wfProfileOut( $fname );
 592                 return $text;
 593         }
 594
 595         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 596         {
 597                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 598                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 599
 600                 # this is  the list of separators that should be ignored if they
 601                 # are the last character of an URL but that should be included
 602                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 603                 # in this case, the last comma should not become part of the URL,
 604                 # but in "www.foo.com/123,2342,32.htm" it should.
 605                 $sep = ",;\.:";
 606                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 607                 $images = "gif|png|jpg|jpeg";
 608
 609                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 610                 # they are interpreted as part of the string (used to tell PHP
 611                 # that the content of the string should be inserted there).
 612                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 613                   "((?i){$images})([^{$uc}]|$)/";
 614
 615                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 616                 $sk =& $this->mOptions->getSkin();
 617
 618                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 619                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 620                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 621                 }
 622                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 623                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 624                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 625                   "</a>\\5", $s );
 626                 $s = str_replace( $unique, $protocol, $s );
 627
 628                 $a = explode( "[{$protocol}:", " " . $s );
 629                 $s = array_shift( $a );
 630                 $s = substr( $s, 1 );
 631
 632                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 633                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 634
 635                 foreach ( $a as $line ) {
 636                         if ( preg_match( $e1, $line, $m ) ) {
 637                                 $link = "{$protocol}:{$m[1]}";
 638                                 $trail = $m[2];
 639                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 640                                 else { $text = wfEscapeHTML( $link ); }
 641                         } else if ( preg_match( $e2, $line, $m ) ) {
 642                                 $link = "{$protocol}:{$m[1]}";
 643                                 $text = $m[2];
 644                                 $trail = $m[3];
 645                         } else {
 646                                 $s .= "[{$protocol}:" . $line;
 647                                 continue;
 648                         }
 649                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 650                                 $paren = "";
 651                         } else {
 652                                 # Expand the URL for printable version
 653                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 654                         }
 655                         $la = $sk->getExternalLinkAttributes( $link, $text );
 656                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 657
 658                 }
 659                 return $s;
 660         }
 661
 662         /* private */ function handle3Quotes( &$state, $token )
 663         {
 664                 if ( $state["strong"] !== false ) {
 665                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 666                         {
 667                                 # ''' lala ''lala '''
 668                                 $s = "</em></strong><em>";
 669                         } else {
 670                                 $s = "</strong>";
 671                         }
 672                         $state["strong"] = FALSE;
 673                 } else {
 674                         $s = "<strong>";
 675                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 676                 }
 677                 return $s;
 678         }
 679
 680         /* private */ function handle2Quotes( &$state, $token )
 681         {
 682                 if ( $state["em"] !== false ) {
 683                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 684                         {
 685                                 # ''lala'''lala'' ....'''
 686                                 $s = "</strong></em><strong>";
 687                         } else {
 688                                 $s = "</em>";
 689                         }
 690                         $state["em"] = FALSE;
 691                 } else {
 692                         $s = "<em>";
 693                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 694
 695                 }
 696                 return $s;
 697         }
 698
 699         /* private */ function handle5Quotes( &$state, $token )
 700         {
 701                 $s = "";
 702                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 703                         if ( $state["em"] < $state["strong"] ) {
 704                                 $s .= "</strong></em>";
 705                         } else {
 706                                 $s .= "</em></strong>";
 707                         }
 708                         $state["strong"] = $state["em"] = FALSE;
 709                 } elseif ( $state["em"] !== false ) {
 710                         $s .= "</em><strong>";
 711                         $state["em"] = FALSE;
 712                         $state["strong"] = $token["pos"];
 713                 } elseif ( $state["strong"] !== false ) {
 714                         $s .= "</strong><em>";
 715                         $state["strong"] = FALSE;
 716                         $state["em"] = $token["pos"];
 717                 } else { # not $em and not $strong
 718                         $s .= "<strong><em>";
 719                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 720                 }
 721                 return $s;
 722         }
 723
 724         /* private */ function doTokenizedParser( $str )
 725         {
 726                 global $wgLang; # for language specific parser hook
 727                 global $wgUploadDirectory, $wgUseTimeline;
 728
 729                 $tokenizer=Tokenizer::newFromString( $str );
 730                 $tokenStack = array();
 731
 732                 $s="";
 733                 $state["em"]      = FALSE;
 734                 $state["strong"]  = FALSE;
 735                 $tagIsOpen = FALSE;
 736                 $threeopen = false;
 737
 738                 # The tokenizer splits the text into tokens and returns them one by one.
 739                 # Every call to the tokenizer returns a new token.
 740                 while ( $token = $tokenizer->nextToken() )
 741                 {
 742                         switch ( $token["type"] )
 743                         {
 744                                 case "text":
 745                                         # simple text with no further markup
 746                                         $txt = $token["text"];
 747                                         break;
 748                                 case "blank":
 749                                         # Text that contains blanks that have to be converted to
 750                                         # non-breakable spaces for French.
 751                                         # U+202F NARROW NO-BREAK SPACE might be a better choice, but
 752                                         # browser support for Unicode spacing is poor.
 753                                         $txt = str_replace( " ", "&nbsp;", $token["text"] );
 754                                         break;
 755                                 case "[[[":
 756                                         # remember the tag opened with 3 [
 757                                         $threeopen = true;
 758                                 case "[[":
 759                                         # link opening tag.
 760                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 761                                         $tagIsOpen = TRUE;
 762                                         array_push( $tokenStack, $token );
 763                                         $txt="";
 764                                         break;
 765
 766                                 case "]]]":
 767                                 case "]]":
 768                                         # link close tag.
 769                                         # get text from stack, glue it together, and call the code to handle a
 770                                         # link
 771
 772                                         if ( count( $tokenStack ) == 0 )
 773                                         {
 774                                                 # stack empty. Found a ]] without an opening [[
 775                                                 $txt = "]]";
 776                                         } else {
 777                                                 $linkText = "";
 778                                                 $lastToken = array_pop( $tokenStack );
 779                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 780                                                 {
 781                                                         if( !empty( $lastToken["text"] ) ) {
 782                                                                 $linkText = $lastToken["text"] . $linkText;
 783                                                         }
 784                                                         $lastToken = array_pop( $tokenStack );
 785                                                 }
 786
 787                                                 $txt = $linkText ."]]";
 788
 789                                                 if( isset( $lastToken["text"] ) ) {
 790                                                         $prefix = $lastToken["text"];
 791                                                 } else {
 792                                                         $prefix = "";
 793                                                 }
 794                                                 $nextToken = $tokenizer->previewToken();
 795                                                 if ( $nextToken["type"] == "text" )
 796                                                 {
 797                                                         # Preview just looks at it. Now we have to fetch it.
 798                                                         $nextToken = $tokenizer->nextToken();
 799                                                         $txt .= $nextToken["text"];
 800                                                 }
 801                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 802
 803                                                 # did the tag start with 3 [ ?
 804                                                 if($threeopen) {
 805                                                         # show the first as text
 806                                                         $txt = "[".$txt;
 807                                                         $threeopen=false;
 808                                                 }
 809
 810                                         }
 811                                         $tagIsOpen = (count( $tokenStack ) != 0);
 812                                         break;
 813                                 case "----":
 814                                         $txt = "\n<hr />\n";
 815                                         break;
 816                                 case "'''":
 817                                         # This and the three next ones handle quotes
 818                                         $txt = $this->handle3Quotes( $state, $token );
 819                                         break;
 820                                 case "''":
 821                                         $txt = $this->handle2Quotes( $state, $token );
 822                                         break;
 823                                 case "'''''":
 824                                         $txt = $this->handle5Quotes( $state, $token );
 825                                         break;
 826                                 case "":
 827                                         # empty token
 828                                         $txt="";
 829                                         break;
 830                                 case "RFC ":
 831                                         if ( $tagIsOpen ) {
 832                                                 $txt = "RFC ";
 833                                         } else {
 834                                                 $txt = $this->doMagicRFC( $tokenizer );
 835                                         }
 836                                         break;
 837                                 case "ISBN ":
 838                                         if ( $tagIsOpen ) {
 839                                                 $txt = "ISBN ";
 840                                         } else {
 841                                                 $txt = $this->doMagicISBN( $tokenizer );
 842                                         }
 843                                         break;
 844                                 case "<timeline>":
 845                                         if ( $wgUseTimeline &&
 846                                              "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
 847                                         {
 848                                                 $txt = renderTimeline( $timelinesrc );
 849                                         } else {
 850                                                 $txt=$token["text"];
 851                                         }
 852                                         break;
 853                                 default:
 854                                         # Call language specific Hook.
 855                                         $txt = $wgLang->processToken( $token, $tokenStack );
 856                                         if ( NULL == $txt ) {
 857                                                 # An unkown token. Highlight.
 858                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 859                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 860                                         }
 861                                         break;
 862                         }
 863                         # If we're parsing the interior of a link, don't append the interior to $s,
 864                         # but push it to the stack so it can be processed when a ]] token is found.
 865                         if ( $tagIsOpen  && $txt != "" ) {
 866                                 $token["type"] = "text";
 867                                 $token["text"] = $txt;
 868                                 array_push( $tokenStack, $token );
 869                         } else {
 870                                 $s .= $txt;
 871                         }
 872                 } #end while
 873                 if ( count( $tokenStack ) != 0 )
 874                 {
 875                         # still objects on stack. opened [[ tag without closing ]] tag.
 876                         $txt = "";
 877                         while ( $lastToken = array_pop( $tokenStack ) )
 878                         {
 879                                 if ( $lastToken["type"] == "text" )
 880                                 {
 881                                         $txt = $lastToken["text"] . $txt;
 882                                 } else {
 883                                         $txt = $lastToken["type"] . $txt;
 884                                 }
 885                         }
 886                         $s .= $txt;
 887                 }
 888                 return $s;
 889         }
 890
 891         /* private */ function handleInternalLink( $line, $prefix )
 892         {
 893                 global $wgLang, $wgLinkCache;
 894                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 895                 static $fname = "Parser::handleInternalLink" ;
 896                 wfProfileIn( $fname );
 897
 898                 wfProfileIn( "$fname-setup" );
 899                 static $tc = FALSE;
 900                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 901                 $sk =& $this->mOptions->getSkin();
 902
 903                 # Match a link having the form [[namespace:link|alternate]]trail
 904                 static $e1 = FALSE;
 905                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 906                 # Match the end of a line for a word that's not followed by whitespace,
 907                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 908                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 909                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 910                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 911
 912
 913                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 914                 static $image = FALSE;
 915                 static $special = FALSE;
 916                 static $media = FALSE;
 917                 static $category = FALSE;
 918                 if ( !$image ) { $image = Namespace::getImage(); }
 919                 if ( !$special ) { $special = Namespace::getSpecial(); }
 920                 if ( !$media ) { $media = Namespace::getMedia(); }
 921                 if ( !$category ) { $category = Namespace::getCategory(); }
 922
 923                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 924
 925                 wfProfileOut( "$fname-setup" );
 926                 $s = "";
 927
 928                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 929                         $text = $m[2];
 930                         $trail = $m[3];
 931                 } else { # Invalid form; output directly
 932                         $s .= $prefix . "[[" . $line ;
 933                         return $s;
 934                 }
 935
 936                 /* Valid link forms:
 937                 Foobar -- normal
 938                 :Foobar -- override special treatment of prefix (images, language links)
 939                 /Foobar -- convert to CurrentPage/Foobar
 940                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 941                 */
 942                 $c = substr($m[1],0,1);
 943                 $noforce = ($c != ":");
 944                 if( $c == "/" ) { # subpage
 945                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 946                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 947                                 $noslash=$m[1];
 948                         } else {
 949                                 $noslash=substr($m[1],1);
 950                         }
 951                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 952                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 953                                 if( "" == $text ) {
 954                                         $text= $m[1];
 955                                 } # this might be changed for ugliness reasons
 956                         } else {
 957                                 $link = $noslash; # no subpage allowed, use standard link
 958                         }
 959                 } elseif( $noforce ) { # no subpage
 960                         $link = $m[1];
 961                 } else {
 962                         $link = substr( $m[1], 1 );
 963                 }
 964                 if( "" == $text )
 965                         $text = $link;
 966
 967                 $nt = Title::newFromText( $link );
 968                 if( !$nt ) {
 969                         $s .= $prefix . "[[" . $line;
 970                         return $s;
 971                 }
 972                 $ns = $nt->getNamespace();
 973                 $iw = $nt->getInterWiki();
 974                 if( $noforce ) {
 975                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 976                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 977                                 $s .= $prefix . $trail ;
 978                                 return (trim($s) == '')? '': $s;
 979                         }
 980                         if( $ns == $image ) {
 981                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 982                                 $wgLinkCache->addImageLinkObj( $nt );
 983                                 return $s;
 984                         }
 985                         if ( $ns == $category ) {
 986                                 $t = $nt->getText() ;
 987                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 988                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 989                                 $this->mOutput->mCategoryLinks[] = $t ;
 990                                 $s .= $prefix . $trail ;
 991                                 return $s ;
 992                         }
 993                 }
 994                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 995                     ( strpos( $link, "#" ) == FALSE ) ) {
 996                         # Self-links are handled specially; generally de-link and change to bold.
 997                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 998                         return $s;
 999                 }
1000
1001                 if( $ns == $media ) {
1002                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1003                         $wgLinkCache->addImageLinkObj( $nt );
1004                         return $s;
1005                 } elseif( $ns == $special ) {
1006                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
1007                         return $s;
1008                 }
1009                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
1010
1011                 wfProfileOut( $fname );
1012                 return $s;
1013         }
1014
1015         # Some functions here used by doBlockLevels()
1016         #
1017         /* private */ function closeParagraph()
1018         {
1019                 $result = "";
1020                 if ( '' != $this->mLastSection ) {
1021                         $result = "</" . $this->mLastSection  . ">\n";
1022                 }
1023                 $this->mInPre = false;
1024                 $this->mLastSection = "";
1025                 return $result;
1026         }
1027         # getCommon() returns the length of the longest common substring
1028         # of both arguments, starting at the beginning of both.
1029         #
1030         /* private */ function getCommon( $st1, $st2 )
1031         {
1032                 $fl = strlen( $st1 );
1033                 $shorter = strlen( $st2 );
1034                 if ( $fl < $shorter ) { $shorter = $fl; }
1035
1036                 for ( $i = 0; $i < $shorter; ++$i ) {
1037                         if ( $st1{$i} != $st2{$i} ) { break; }
1038                 }
1039                 return $i;
1040         }
1041         # These next three functions open, continue, and close the list
1042         # element appropriate to the prefix character passed into them.
1043         #
1044         /* private */ function openList( $char )
1045     {
1046                 $result = $this->closeParagraph();
1047
1048                 if ( "*" == $char ) { $result .= "<ul><li>"; }
1049                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
1050                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
1051                 else if ( ";" == $char ) {
1052                         $result .= "<dl><dt>";
1053                         $this->mDTopen = true;
1054                 }
1055                 else { $result = "<!-- ERR 1 -->"; }
1056
1057                 return $result;
1058         }
1059
1060         /* private */ function nextItem( $char )
1061         {
1062                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1063                 else if ( ":" == $char || ";" == $char ) {
1064                         $close = "</dd>";
1065                         if ( $this->mDTopen ) { $close = "</dt>"; }
1066                         if ( ";" == $char ) {
1067                                 $this->mDTopen = true;
1068                                 return $close . "<dt>";
1069                         } else {
1070                                 $this->mDTopen = false;
1071                                 return $close . "<dd>";
1072                         }
1073                 }
1074                 return "<!-- ERR 2 -->";
1075         }
1076
1077         /* private */function closeList( $char )
1078         {
1079                 if ( "*" == $char ) { $text = "</li></ul>"; }
1080                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1081                 else if ( ":" == $char ) {
1082                         if ( $this->mDTopen ) {
1083                                 $this->mDTopen = false;
1084                                 $text = "</dt></dl>";
1085                         } else {
1086                                 $text = "</dd></dl>";
1087                         }
1088                 }
1089                 else {  return "<!-- ERR 3 -->"; }
1090                 return $text."\n";
1091         }
1092
1093         /* private */ function doBlockLevels( $text, $linestart ) {
1094                 $fname = "Parser::doBlockLevels";
1095                 wfProfileIn( $fname );
1096
1097                 # Parsing through the text line by line.  The main thing
1098                 # happening here is handling of block-level elements p, pre,
1099                 # and making lists from lines starting with * # : etc.
1100                 #
1101                 $textLines = explode( "\n", $text );
1102
1103                 $lastPrefix = $output = $lastLine = '';
1104                 $this->mDTopen = $inBlockElem = false;
1105                 $prefixLength = 0;
1106                 $paragraphStack = false;
1107
1108                 if ( !$linestart ) {
1109                         $output .= array_shift( $textLines );
1110                 }
1111                 foreach ( $textLines as $oLine ) {
1112                         $lastPrefixLength = strlen( $lastPrefix );
1113                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1114                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1115                         if (!$this->mInPre) {
1116                                 $this->mInPre = !empty($preOpenMatch);
1117                         }
1118                         if ( !$this->mInPre ) {
1119                                 # Multiple prefixes may abut each other for nested lists.
1120                                 $prefixLength = strspn( $oLine, "*#:;" );
1121                                 $pref = substr( $oLine, 0, $prefixLength );
1122
1123                                 # eh?
1124                                 $pref2 = str_replace( ";", ":", $pref );
1125                                 $t = substr( $oLine, $prefixLength );
1126                         } else {
1127                                 # Don't interpret any other prefixes in preformatted text
1128                                 $prefixLength = 0;
1129                                 $pref = $pref2 = '';
1130                                 $t = $oLine;
1131                         }
1132
1133                         # List generation
1134                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1135                                 # Same as the last item, so no need to deal with nesting or opening stuff
1136                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1137                                 $paragraphStack = false;
1138
1139                                 if ( ";" == substr( $pref, -1 ) ) {
1140                                         # The one nasty exception: definition lists work like this:
1141                                         # ; title : definition text
1142                                         # So we check for : in the remainder text to split up the
1143                                         # title and definition, without b0rking links.
1144                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1145                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1146                                                 $term = $match[1];
1147                                                 $output .= $term . $this->nextItem( ":" );
1148                                                 $t = $match[2];
1149                                         }
1150                                 }
1151                         } elseif( $prefixLength || $lastPrefixLength ) {
1152                                 # Either open or close a level...
1153                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1154                                 $paragraphStack = false;
1155
1156                                 while( $commonPrefixLength < $lastPrefixLength ) {
1157                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1158                                         --$lastPrefixLength;
1159                                 }
1160                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1161                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1162                                 }
1163                                 while ( $prefixLength > $commonPrefixLength ) {
1164                                         $char = substr( $pref, $commonPrefixLength, 1 );
1165                                         $output .= $this->openList( $char );
1166
1167                                         if ( ";" == $char ) {
1168                                                 # FIXME: This is dupe of code above
1169                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1170                                                         $term = $match[1];
1171                                                         $output .= $term . $this->nextItem( ":" );
1172                                                         $t = $match[2];
1173                                                 }
1174                                         }
1175                                         ++$commonPrefixLength;
1176                                 }
1177                                 $lastPrefix = $pref2;
1178                         }
1179                         if( 0 == $prefixLength ) {
1180                                 # No prefix (not in list)--go to paragraph mode
1181                                 $uniq_prefix = UNIQ_PREFIX;
1182                                 // XXX: use a stack for nestable elements like span, table and div
1183                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1184                                 $closematch = preg_match(
1185                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1186                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1187                                 if ( $openmatch or $closematch ) {
1188                                         $paragraphStack = false;
1189                                         $output .= $this->closeParagraph();
1190                                         if($preOpenMatch and !$preCloseMatch) {
1191                                                 $this->mInPre = true;
1192                                         }
1193                                         if ( $closematch  ) {
1194                                                 $inBlockElem = false;
1195                                         } else {
1196                                                 $inBlockElem = true;
1197                                         }
1198                                 } else if ( !$inBlockElem ) {
1199                                         if ( " " == $t{0} ) {
1200                                                 // pre
1201                                                 if ($this->mLastSection != 'pre') {
1202                                                         $paragraphStack = false;
1203                                                         $output .= $this->closeParagraph().'<pre>';
1204                                                         $this->mLastSection = 'pre';
1205                                                 }
1206                                         } else {
1207                                                 // paragraph
1208                                                 if ( '' == trim($t) ) {
1209                                                         if ( $paragraphStack ) {
1210                                                                 $output .= $paragraphStack.'<br/>';
1211                                                                 $paragraphStack = false;
1212                                                                 $this->mLastSection = 'p';
1213                                                         } else {
1214                                                                 if ($this->mLastSection != 'p' ) {
1215                                                                         $output .= $this->closeParagraph();
1216                                                                         $this->mLastSection = '';
1217                                                                         $paragraphStack = "<p>";
1218                                                                 } else {
1219                                                                         $paragraphStack = '</p><p>';
1220                                                                 }
1221                                                         }
1222                                                 } else {
1223                                                         if ( $paragraphStack ) {
1224                                                                 $output .= $paragraphStack;
1225                                                                 $paragraphStack = false;
1226                                                                 $this->mLastSection = 'p';
1227                                                         } else if ($this->mLastSection != 'p') {
1228                                                                 $output .= $this->closeParagraph().'<p>';
1229                                                                 $this->mLastSection = 'p';
1230                                                         }
1231                                                 }
1232                                         }
1233                                 }
1234                         }
1235                         if ($paragraphStack === false) {
1236                                 $output .= $t."\n";
1237                         }
1238                 }
1239                 while ( $prefixLength ) {
1240                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1241                         --$prefixLength;
1242                 }
1243                 if ( "" != $this->mLastSection ) {
1244                         $output .= "</" . $this->mLastSection . ">";
1245                         $this->mLastSection = "";
1246                 }
1247
1248                 wfProfileOut( $fname );
1249                 return $output;
1250         }
1251
1252         function getVariableValue( $index ) {
1253                 global $wgLang, $wgSitename, $wgServer;
1254
1255                 switch ( $index ) {
1256                         case MAG_CURRENTMONTH:
1257                                 return date( "m" );
1258                         case MAG_CURRENTMONTHNAME:
1259                                 return $wgLang->getMonthName( date("n") );
1260                         case MAG_CURRENTMONTHNAMEGEN:
1261                                 return $wgLang->getMonthNameGen( date("n") );
1262                         case MAG_CURRENTDAY:
1263                                 return date("j");
1264                         case MAG_PAGENAME:
1265                                 return $this->mTitle->getText();
1266                         case MAG_NAMESPACE:
1267                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1268                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1269                         case MAG_CURRENTDAYNAME:
1270                                 return $wgLang->getWeekdayName( date("w")+1 );
1271                         case MAG_CURRENTYEAR:
1272                                 return date( "Y" );
1273                         case MAG_CURRENTTIME:
1274                                 return $wgLang->time( wfTimestampNow(), false );
1275                         case MAG_NUMBEROFARTICLES:
1276                                 return wfNumberOfArticles();
1277                         case MAG_SITENAME:
1278                                 return $wgSitename;
1279                         case MAG_SERVER:
1280                                 return $wgServer;
1281                         default:
1282                                 return NULL;
1283                 }
1284         }
1285
1286         function initialiseVariables()
1287         {
1288                 global $wgVariableIDs;
1289                 $this->mVariables = array();
1290                 foreach ( $wgVariableIDs as $id ) {
1291                         $mw =& MagicWord::get( $id );
1292                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1293                 }
1294         }
1295
1296         /* private */ function replaceVariables( $text, $args = array() )
1297         {
1298                 global $wgLang, $wgScript, $wgArticlePath;
1299
1300                 $fname = "Parser::replaceVariables";
1301                 wfProfileIn( $fname );
1302
1303                 $bail = false;
1304                 if ( !$this->mVariables ) {
1305                         $this->initialiseVariables();
1306                 }
1307                 $titleChars = Title::legalChars();
1308                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1309
1310                 # This function is called recursively. To keep track of arguments we need a stack:
1311                 array_push( $this->mArgStack, $args );
1312
1313                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1314                 $GLOBALS['wgCurParser'] =& $this;
1315                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1316
1317                 array_pop( $this->mArgStack );
1318
1319                 return $text;
1320         }
1321
1322         function braceSubstitution( $matches )
1323         {
1324                 global $wgLinkCache, $wgLang;
1325                 $fname = "Parser::braceSubstitution";
1326                 $found = false;
1327                 $nowiki = false;
1328                 $title = NULL;
1329
1330                 # $newline is an optional newline character before the braces
1331                 # $part1 is the bit before the first |, and must contain only title characters
1332                 # $args is a list of arguments, starting from index 0, not including $part1
1333
1334                 $newline = $matches[1];
1335                 $part1 = $matches[2];
1336                 # If the third subpattern matched anything, it will start with |
1337                 if ( $matches[3] !== "" ) {
1338                         $args = explode( "|", substr( $matches[3], 1 ) );
1339                 } else {
1340                         $args = array();
1341                 }
1342                 $argc = count( $args );
1343
1344                 # SUBST
1345                 $mwSubst =& MagicWord::get( MAG_SUBST );
1346                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1347                         if ( $this->mOutputType != OT_WIKI ) {
1348                                 # Invalid SUBST not replaced at PST time
1349                                 # Return without further processing
1350                                 $text = $matches[0];
1351                                 $found = true;
1352                         }
1353                 } elseif ( $this->mOutputType == OT_WIKI ) {
1354                         # SUBST not found in PST pass, do nothing
1355                         $text = $matches[0];
1356                         $found = true;
1357                 }
1358
1359                 # MSG, MSGNW and INT
1360                 if ( !$found ) {
1361                         # Check for MSGNW:
1362                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1363                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1364                                 $nowiki = true;
1365                         } else {
1366                                 # Remove obsolete MSG:
1367                                 $mwMsg =& MagicWord::get( MAG_MSG );
1368                                 $mwMsg->matchStartAndRemove( $part1 );
1369                         }
1370
1371                         # Check if it is an internal message
1372                         $mwInt =& MagicWord::get( MAG_INT );
1373                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1374                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1375                                         $text = wfMsgReal( $part1, $args, true );
1376                                         $found = true;
1377                                 }
1378                         }
1379                 }
1380
1381                 # NS
1382                 if ( !$found ) {
1383                         # Check for NS: (namespace expansion)
1384                         $mwNs = MagicWord::get( MAG_NS );
1385                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1386                                 if ( intval( $part1 ) ) {
1387                                         $text = $wgLang->getNsText( intval( $part1 ) );
1388                                         $found = true;
1389                                 } else {
1390                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1391                                         if ( !is_null( $index ) ) {
1392                                                 $text = $wgLang->getNsText( $index );
1393                                                 $found = true;
1394                                         }
1395                                 }
1396                         }
1397                 }
1398
1399                 # LOCALURL and LOCALURLE
1400                 if ( !$found ) {
1401                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1402                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1403
1404                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1405                                 $func = 'getLocalURL';
1406                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1407                                 $func = 'escapeLocalURL';
1408                         } else {
1409                                 $func = '';
1410                         }
1411
1412                         if ( $func !== '' ) {
1413                                 $title = Title::newFromText( $part1 );
1414                                 if ( !is_null( $title ) ) {
1415                                         if ( $argc > 0 ) {
1416                                                 $text = $title->$func( $args[0] );
1417                                         } else {
1418                                                 $text = $title->$func();
1419                                         }
1420                                         $found = true;
1421                                 }
1422                         }
1423                 }
1424
1425                 # Internal variables
1426                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1427                         $text = $this->mVariables[$part1];
1428                         $found = true;
1429                         $this->mOutput->mContainsOldMagic = true;
1430                 }
1431
1432                 # Arguments input from the caller
1433                 $inputArgs = end( $this->mArgStack );
1434                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1435                         $text = $inputArgs[$part1];
1436                         $found = true;
1437                 }
1438
1439                 # Load from database
1440                 if ( !$found ) {
1441                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1442                         if ( !is_null( $title ) && !$title->isExternal() ) {
1443                                 # Check for excessive inclusion
1444                                 $dbk = $title->getPrefixedDBkey();
1445                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1446                                         $article = new Article( $title );
1447                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1448                                         if ( $articleContent !== false ) {
1449                                                 $found = true;
1450                                                 $text = $articleContent;
1451
1452                                         }
1453                                 }
1454
1455                                 # If the title is valid but undisplayable, make a link to it
1456                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1457                                         $text = "[[" . $title->getPrefixedText() . "]]";
1458                                         $found = true;
1459                                 }
1460                         }
1461                 }
1462
1463                 # Recursive parsing, escaping and link table handling
1464                 # Only for HTML output
1465                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1466                         $text = wfEscapeWikiText( $text );
1467                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1468                         # Clean up argument array
1469                         $assocArgs = array();
1470                         $index = 1;
1471                         foreach( $args as $arg ) {
1472                                 $eqpos = strpos( $arg, "=" );
1473                                 if ( $eqpos === false ) {
1474                                         $assocArgs[$index++] = $arg;
1475                                 } else {
1476                                         $name = trim( substr( $arg, 0, $eqpos ) );
1477                                         $value = trim( substr( $arg, $eqpos+1 ) );
1478                                         if ( $value === false ) {
1479                                                 $value = "";
1480                                         }
1481                                         if ( $name !== false ) {
1482                                                 $assocArgs[$name] = $value;
1483                                         }
1484                                 }
1485                         }
1486
1487                         # Do not enter included links in link table
1488                         if ( !is_null( $title ) ) {
1489                                 $wgLinkCache->suspend();
1490                         }
1491
1492                         # Run full parser on the included text
1493                         $text = $this->strip( $text, $this->mStripState );
1494                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1495
1496                         # Add the result to the strip state for re-inclusion after
1497                         # the rest of the processing
1498                         $text = $this->insertStripItem( $text, $this->mStripState );
1499
1500                         # Resume the link cache and register the inclusion as a link
1501                         if ( !is_null( $title ) ) {
1502                                 $wgLinkCache->resume();
1503                                 $wgLinkCache->addLinkObj( $title );
1504                         }
1505                 }
1506
1507                 if ( !$found ) {
1508                         return $matches[0];
1509                 } else {
1510                         return $text;
1511                 }
1512         }
1513
1514         # Returns true if the function is allowed to include this entity
1515         function incrementIncludeCount( $dbk )
1516         {
1517                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1518                         $this->mIncludeCount[$dbk] = 0;
1519                 }
1520                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1521                         return true;
1522                 } else {
1523                         return false;
1524                 }
1525         }
1526
1527
1528         # Cleans up HTML, removes dangerous tags and attributes
1529         /* private */ function removeHTMLtags( $text )
1530         {
1531                 global $wgUseTidy;
1532                 $fname = "Parser::removeHTMLtags";
1533                 wfProfileIn( $fname );
1534                 $htmlpairs = array( # Tags that must be closed
1535                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1536                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1537                         "strike", "strong", "tt", "var", "div", "center",
1538                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1539                         "ruby", "rt" , "rb" , "rp", "p"
1540                 );
1541                 $htmlsingle = array(
1542                         "br", "hr", "li", "dt", "dd"
1543                 );
1544                 $htmlnest = array( # Tags that can be nested--??
1545                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1546                         "dl", "font", "big", "small", "sub", "sup"
1547                 );
1548                 $tabletags = array( # Can only appear inside table
1549                         "td", "th", "tr"
1550                 );
1551
1552                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1553                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1554
1555                 $htmlattrs = $this->getHTMLattrs () ;
1556
1557                 # Remove HTML comments
1558                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1559
1560                 $bits = explode( "<", $text );
1561                 $text = array_shift( $bits );
1562                 if(!$wgUseTidy) {
1563                         $tagstack = array(); $tablestack = array();
1564                         foreach ( $bits as $x ) {
1565                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1566                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1567                                 $x, $regs );
1568                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1569                                 error_reporting( $prev );
1570
1571                                 $badtag = 0 ;
1572                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1573                                         # Check our stack
1574                                         if ( $slash ) {
1575                                                 # Closing a tag...
1576                                                 if ( ! in_array( $t, $htmlsingle ) &&
1577                                                 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1578                                                         if(!empty($ot)) array_push( $tagstack, $ot );
1579                                                         $badtag = 1;
1580                                                 } else {
1581                                                         if ( $t == "table" ) {
1582                                                                 $tagstack = array_pop( $tablestack );
1583                                                         }
1584                                                         $newparams = "";
1585                                                 }
1586                                         } else {
1587                                                 # Keep track for later
1588                                                 if ( in_array( $t, $tabletags ) &&
1589                                                 ! in_array( "table", $tagstack ) ) {
1590                                                         $badtag = 1;
1591                                                 } else if ( in_array( $t, $tagstack ) &&
1592                                                 ! in_array ( $t , $htmlnest ) ) {
1593                                                         $badtag = 1 ;
1594                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1595                                                         if ( $t == "table" ) {
1596                                                                 array_push( $tablestack, $tagstack );
1597                                                                 $tagstack = array();
1598                                                         }
1599                                                         array_push( $tagstack, $t );
1600                                                 }
1601                                                 # Strip non-approved attributes from the tag
1602                                                 $newparams = $this->fixTagAttributes($params);
1603
1604                                         }
1605                                         if ( ! $badtag ) {
1606                                                 $rest = str_replace( ">", "&gt;", $rest );
1607                                                 $text .= "<$slash$t $newparams$brace$rest";
1608                                                 continue;
1609                                         }
1610                                 }
1611                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1612                         }
1613                         # Close off any remaining tags
1614                         while ( $t = array_pop( $tagstack ) ) {
1615                                 $text .= "</$t>\n";
1616                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1617                         }
1618                 } else {
1619                         # this might be possible using tidy itself
1620                         foreach ( $bits as $x ) {
1621                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1622                                 $x, $regs );
1623                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1624                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1625                                         $newparams = $this->fixTagAttributes($params);
1626                                         $rest = str_replace( ">", "&gt;", $rest );
1627                                         $text .= "<$slash$t $newparams$brace$rest";
1628                                 } else {
1629                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1630                                 }
1631                         }
1632                 }
1633                 wfProfileOut( $fname );
1634                 return $text;
1635         }
1636
1637
1638 /*
1639  *
1640  * This function accomplishes several tasks:
1641  * 1) Auto-number headings if that option is enabled
1642  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1643  * 3) Add a Table of contents on the top for users who have enabled the option
1644  * 4) Auto-anchor headings
1645  *
1646  * It loops through all headlines, collects the necessary data, then splits up the
1647  * string and re-inserts the newly formatted headlines.
1648  *
1649  */
1650
1651         /* private */ function formatHeadings( $text )
1652         {
1653                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1654                 $doShowToc = $this->mOptions->getShowToc();
1655                 if( !$this->mTitle->userCanEdit() ) {
1656                         $showEditLink = 0;
1657                         $rightClickHack = 0;
1658                 } else {
1659                         $showEditLink = $this->mOptions->getEditSection();
1660                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1661                 }
1662
1663                 # Inhibit editsection links if requested in the page
1664                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1665                 if( $esw->matchAndRemove( $text ) ) {
1666                         $showEditLink = 0;
1667                 }
1668                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1669                 # do not add TOC
1670                 $mw =& MagicWord::get( MAG_NOTOC );
1671                 if( $mw->matchAndRemove( $text ) ) {
1672                         $doShowToc = 0;
1673                 }
1674
1675                 # never add the TOC to the Main Page. This is an entry page that should not
1676                 # be more than 1-2 screens large anyway
1677                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1678                         $doShowToc = 0;
1679                 }
1680
1681                 # Get all headlines for numbering them and adding funky stuff like [edit]
1682                 # links - this is for later, but we need the number of headlines right now
1683                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1684
1685                 # if there are fewer than 4 headlines in the article, do not show TOC
1686                 if( $numMatches < 4 ) {
1687                         $doShowToc = 0;
1688                 }
1689
1690                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1691                 # override above conditions and always show TOC
1692                 $mw =& MagicWord::get( MAG_FORCETOC );
1693                 if ($mw->matchAndRemove( $text ) ) {
1694                         $doShowToc = 1;
1695                 }
1696
1697
1698                 # We need this to perform operations on the HTML
1699                 $sk =& $this->mOptions->getSkin();
1700
1701                 # headline counter
1702                 $headlineCount = 0;
1703
1704                 # Ugh .. the TOC should have neat indentation levels which can be
1705                 # passed to the skin functions. These are determined here
1706                 $toclevel = 0;
1707                 $toc = "";
1708                 $full = "";
1709                 $head = array();
1710                 $sublevelCount = array();
1711                 $level = 0;
1712                 $prevlevel = 0;
1713                 foreach( $matches[3] as $headline ) {
1714                         $numbering = "";
1715                         if( $level ) {
1716                                 $prevlevel = $level;
1717                         }
1718                         $level = $matches[1][$headlineCount];
1719                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1720                                 # reset when we enter a new level
1721                                 $sublevelCount[$level] = 0;
1722                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1723                                 $toclevel += $level - $prevlevel;
1724                         }
1725                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1726                                 # reset when we step back a level
1727                                 $sublevelCount[$level+1]=0;
1728                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1729                                 $toclevel -= $prevlevel - $level;
1730                         }
1731                         # count number of headlines for each level
1732                         @$sublevelCount[$level]++;
1733                         if( $doNumberHeadings || $doShowToc ) {
1734                                 $dot = 0;
1735                                 for( $i = 1; $i <= $level; $i++ ) {
1736                                         if( !empty( $sublevelCount[$i] ) ) {
1737                                                 if( $dot ) {
1738                                                         $numbering .= ".";
1739                                                 }
1740                                                 $numbering .= $sublevelCount[$i];
1741                                                 $dot = 1;
1742                                         }
1743                                 }
1744                         }
1745
1746                         # The canonized header is a version of the header text safe to use for links
1747                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1748                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1749
1750                         # strip out HTML
1751                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1752                         $tocline = trim( $canonized_headline );
1753                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1754                         $refer[$headlineCount] = $canonized_headline;
1755
1756                         # count how many in assoc. array so we can track dupes in anchors
1757                         @$refers[$canonized_headline]++;
1758                         $refcount[$headlineCount]=$refers[$canonized_headline];
1759
1760                         # Prepend the number to the heading text
1761
1762                         if( $doNumberHeadings || $doShowToc ) {
1763                                 $tocline = $numbering . " " . $tocline;
1764
1765                                 # Don't number the heading if it is the only one (looks silly)
1766                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1767                                         # the two are different if the line contains a link
1768                                         $headline=$numbering . " " . $headline;
1769                                 }
1770                         }
1771
1772                         # Create the anchor for linking from the TOC to the section
1773                         $anchor = $canonized_headline;
1774                         if($refcount[$headlineCount] > 1 ) {
1775                                 $anchor .= "_" . $refcount[$headlineCount];
1776                         }
1777                         if( $doShowToc ) {
1778                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1779                         }
1780                         if( $showEditLink ) {
1781                                 if ( empty( $head[$headlineCount] ) ) {
1782                                         $head[$headlineCount] = "";
1783                                 }
1784                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1785                         }
1786
1787                         # Add the edit section span
1788                         if( $rightClickHack ) {
1789                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1790                         }
1791
1792                         # give headline the correct <h#> tag
1793                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1794
1795                         $headlineCount++;
1796                 }
1797
1798                 if( $doShowToc ) {
1799                         $toclines = $headlineCount;
1800                         $toc .= $sk->tocUnindent( $toclevel );
1801                         $toc = $sk->tocTable( $toc );
1802                 }
1803
1804                 # split up and insert constructed headlines
1805
1806                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1807                 $i = 0;
1808
1809                 foreach( $blocks as $block ) {
1810                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1811                             # This is the [edit] link that appears for the top block of text when
1812                                 # section editing is enabled
1813
1814                                 # Disabled because it broke block formatting
1815                                 # For example, a bullet point in the top line
1816                                 # $full .= $sk->editSectionLink(0);
1817                         }
1818                         $full .= $block;
1819                         if( $doShowToc && !$i) {
1820                         # Top anchor now in skin
1821                                 $full = $full.$toc;
1822                         }
1823
1824                         if( !empty( $head[$i] ) ) {
1825                                 $full .= $head[$i];
1826                         }
1827                         $i++;
1828                 }
1829
1830                 return $full;
1831         }
1832
1833         /* private */ function doMagicISBN( &$tokenizer )
1834         {
1835                 global $wgLang;
1836
1837                 # Check whether next token is a text token
1838                 # If yes, fetch it and convert the text into a
1839                 # Special::BookSources link
1840                 $token = $tokenizer->previewToken();
1841                 while ( $token["type"] == "" )
1842                 {
1843                         $tokenizer->nextToken();
1844                         $token = $tokenizer->previewToken();
1845                 }
1846                 if ( $token["type"] == "text" )
1847                 {
1848                         $token = $tokenizer->nextToken();
1849                         $x = $token["text"];
1850                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1851
1852                         $isbn = $blank = "" ;
1853                         while ( " " == $x{0} ) {
1854                                 $blank .= " ";
1855                                 $x = substr( $x, 1 );
1856                         }
1857                         while ( strstr( $valid, $x{0} ) != false ) {
1858                                 $isbn .= $x{0};
1859                                 $x = substr( $x, 1 );
1860                         }
1861                         $num = str_replace( "-", "", $isbn );
1862                         $num = str_replace( " ", "", $num );
1863
1864                         if ( "" == $num ) {
1865                                 $text = "ISBN $blank$x";
1866                         } else {
1867                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1868                                 $text = "<a href=\"" .
1869                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1870                                         "\" class=\"internal\">ISBN $isbn</a>";
1871                                 $text .= $x;
1872                         }
1873                 } else {
1874                         $text = "ISBN ";
1875                 }
1876                 return $text;
1877         }
1878         /* private */ function doMagicRFC( &$tokenizer )
1879         {
1880                 global $wgLang;
1881
1882                 # Check whether next token is a text token
1883                 # If yes, fetch it and convert the text into a
1884                 # link to an RFC source
1885                 $token = $tokenizer->previewToken();
1886                 while ( $token["type"] == "" )
1887                 {
1888                         $tokenizer->nextToken();
1889                         $token = $tokenizer->previewToken();
1890                 }
1891                 if ( $token["type"] == "text" )
1892                 {
1893                         $token = $tokenizer->nextToken();
1894                         $x = $token["text"];
1895                         $valid = "0123456789";
1896
1897                         $rfc = $blank = "" ;
1898                         while ( " " == $x{0} ) {
1899                                 $blank .= " ";
1900                                 $x = substr( $x, 1 );
1901                         }
1902                         while ( strstr( $valid, $x{0} ) != false ) {
1903                                 $rfc .= $x{0};
1904                                 $x = substr( $x, 1 );
1905                         }
1906
1907                         if ( "" == $rfc ) {
1908                                 $text .= "RFC $blank$x";
1909                         } else {
1910                                 $url = wfmsg( "rfcurl" );
1911                                 $url = str_replace( "$1", $rfc, $url);
1912                                 $sk =& $this->mOptions->getSkin();
1913                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1914                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1915                         }
1916                 } else {
1917                         $text = "RFC ";
1918                 }
1919                 return $text;
1920         }
1921
1922         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1923         {
1924                 $this->mOptions = $options;
1925                 $this->mTitle =& $title;
1926                 $this->mOutputType = OT_WIKI;
1927
1928                 if ( $clearState ) {
1929                         $this->clearState();
1930                 }
1931
1932                 $stripState = false;
1933                 $pairs = array(
1934                         "\r\n" => "\n",
1935                         );
1936                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1937                 // now with regexes
1938                 $pairs = array(
1939                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1940                         "/<br *?>/i" => "<br/>",
1941                 );
1942                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1943                 $text = $this->strip( $text, $stripState, false );
1944                 $text = $this->pstPass2( $text, $user );
1945                 $text = $this->unstrip( $text, $stripState );
1946                 return $text;
1947         }
1948
1949         /* private */ function pstPass2( $text, &$user )
1950         {
1951                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1952
1953                 # Variable replacement
1954                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1955                 $text = $this->replaceVariables( $text );
1956
1957                 # Signatures
1958                 #
1959                 $n = $user->getName();
1960                 $k = $user->getOption( "nickname" );
1961                 if ( "" == $k ) { $k = $n; }
1962                 if(isset($wgLocaltimezone)) {
1963                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1964                 }
1965                 /* Note: this is an ugly timezone hack for the European wikis */
1966                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1967                   " (" . date( "T" ) . ")";
1968                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1969
1970                 $text = preg_replace( "/~~~~~/", $d, $text );
1971                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1972                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1973                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1974                   Namespace::getUser() ) . ":$n|$k]]", $text );
1975
1976                 # Context links: [[|name]] and [[name (context)|]]
1977                 #
1978                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1979                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1980                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1981                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1982
1983                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1984                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1985                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1986                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1987                                                                                                                 # [[ns:page (cont)|]]
1988                 $context = "";
1989                 $t = $this->mTitle->getText();
1990                 if ( preg_match( $conpat, $t, $m ) ) {
1991                         $context = $m[2];
1992                 }
1993                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1994                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1995                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1996
1997                 if ( "" == $context ) {
1998                         $text = preg_replace( $p2, "[[\\1]]", $text );
1999                 } else {
2000                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2001                 }
2002
2003                 /*
2004                 $mw =& MagicWord::get( MAG_SUBST );
2005                 $wgCurParser = $this->fork();
2006                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2007                 $this->merge( $wgCurParser );
2008                 */
2009
2010                 # Trim trailing whitespace
2011                 # MAG_END (__END__) tag allows for trailing
2012                 # whitespace to be deliberately included
2013                 $text = rtrim( $text );
2014                 $mw =& MagicWord::get( MAG_END );
2015                 $mw->matchAndRemove( $text );
2016
2017                 return $text;
2018         }
2019
2020         # Set up some variables which are usually set up in parse()
2021         # so that an external function can call some class members with confidence
2022         function startExternalParse( &$title, $options, $outputType, $clearState = true )
2023         {
2024                 $this->mTitle =& $title;
2025                 $this->mOptions = $options;
2026                 $this->mOutputType = $outputType;
2027                 if ( $clearState ) {
2028                         $this->clearState();
2029                 }
2030         }
2031
2032         function transformMsg( $text, $options ) {
2033                 global $wgTitle;
2034                 static $executing = false;
2035
2036                 # Guard against infinite recursion
2037                 if ( $executing ) {
2038                         return $text;
2039                 }
2040                 $executing = true;
2041
2042                 $this->mTitle = $wgTitle;
2043                 $this->mOptions = $options;
2044                 $this->mOutputType = OT_MSG;
2045                 $this->clearState();
2046                 $text = $this->replaceVariables( $text );
2047
2048                 $executing = false;
2049                 return $text;
2050         }
2051 }
2052
2053 class ParserOutput
2054 {
2055         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2056
2057         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2058                 $containsOldMagic = false )
2059         {
2060                 $this->mText = $text;
2061                 $this->mLanguageLinks = $languageLinks;
2062                 $this->mCategoryLinks = $categoryLinks;
2063                 $this->mContainsOldMagic = $containsOldMagic;
2064         }
2065
2066         function getText() { return $this->mText; }
2067         function getLanguageLinks() { return $this->mLanguageLinks; }
2068         function getCategoryLinks() { return $this->mCategoryLinks; }
2069         function containsOldMagic() { return $this->mContainsOldMagic; }
2070         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2071         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2072         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2073         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2074
2075         function merge( $other ) {
2076                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2077                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2078                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2079         }
2080
2081 }
2082
2083 class ParserOptions
2084 {
2085         # All variables are private
2086         var $mUseTeX;                    # Use texvc to expand <math> tags
2087         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2088         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2089         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2090         var $mAllowExternalImages;       # Allow external images inline
2091         var $mSkin;                      # Reference to the preferred skin
2092         var $mDateFormat;                # Date format index
2093         var $mEditSection;               # Create "edit section" links
2094         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2095         var $mNumberHeadings;            # Automatically number headings
2096         var $mShowToc;                   # Show table of contents
2097
2098         function getUseTeX() { return $this->mUseTeX; }
2099         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2100         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2101         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2102         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2103         function getSkin() { return $this->mSkin; }
2104         function getDateFormat() { return $this->mDateFormat; }
2105         function getEditSection() { return $this->mEditSection; }
2106         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2107         function getNumberHeadings() { return $this->mNumberHeadings; }
2108         function getShowToc() { return $this->mShowToc; }
2109
2110         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2111         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2112         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2113         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2114         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2115         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2116         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2117         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2118         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2119         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2120         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2121
2122         /* static */ function newFromUser( &$user )
2123         {
2124                 $popts = new ParserOptions;
2125                 $popts->initialiseFromUser( $user );
2126                 return $popts;
2127         }
2128
2129         function initialiseFromUser( &$userInput )
2130         {
2131                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2132
2133                 if ( !$userInput ) {
2134                         $user = new User;
2135                         $user->setLoaded( true );
2136                 } else {
2137                         $user =& $userInput;
2138                 }
2139
2140                 $this->mUseTeX = $wgUseTeX;
2141                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2142                 $this->mUseDynamicDates = $wgUseDynamicDates;
2143                 $this->mInterwikiMagic = $wgInterwikiMagic;
2144                 $this->mAllowExternalImages = $wgAllowExternalImages;
2145                 $this->mSkin =& $user->getSkin();
2146                 $this->mDateFormat = $user->getOption( "date" );
2147                 $this->mEditSection = $user->getOption( "editsection" );
2148                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2149                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2150                 $this->mShowToc = $user->getOption( "showtoc" );
2151         }
2152
2153
2154 }
2155
2156 # Regex callbacks, used in Parser::replaceVariables
2157 function wfBraceSubstitution( $matches )
2158 {
2159         global $wgCurParser;
2160         return $wgCurParser->braceSubstitution( $matches );
2161 }
2162
2163 ?>