includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         include_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 $fname = "Parser::parse";
  90                 wfProfileIn( $fname );
  91
  92                 if ( $clearState ) {
  93                         $this->clearState();
  94                 }
  95
  96                 $this->mOptions = $options;
  97                 $this->mTitle =& $title;
  98                 $this->mOutputType = OT_HTML;
  99
 100                 $stripState = NULL;
 101                 $text = $this->strip( $text, $this->mStripState );
 102                 $text = $this->internalParse( $text, $linestart );
 103                 $text = $this->unstrip( $text, $this->mStripState );
 104                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 105                 $fixtags = array(
 106                         "/<hr *>/i" => '<hr/>',
 107                         "/<br *>/i" => '<br/>',
 108                         "/<center *>/i"=>'<div class="center">',
 109                         "/<\\/center *>/i" => '</div>',
 110                         # Clean up spare ampersands; note that we probably ought to be
 111                         # more careful about named entities.
 112                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 113                 );
 114                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 115
 116                 # only once and last
 117                 $text = $this->doBlockLevels( $text, $linestart );
 118
 119                 $this->mOutput->setText( $text );
 120                 wfProfileOut( $fname );
 121                 return $this->mOutput;
 122         }
 123
 124         /* static */ function getRandomString()
 125         {
 126                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 127         }
 128
 129         # Replaces all occurrences of <$tag>content</$tag> in the text
 130         # with a random marker and returns the new text. the output parameter
 131         # $content will be an associative array filled with data on the form
 132         # $unique_marker => content.
 133
 134         # If $content is already set, the additional entries will be appended
 135
 136         # If $tag is set to STRIP_COMMENTS, the function will extract
 137         # <!-- HTML comments -->
 138
 139         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 140                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 141                 if ( !$content ) {
 142                         $content = array( );
 143                 }
 144                 $n = 1;
 145                 $stripped = "";
 146
 147                 while ( "" != $text ) {
 148                         if($tag==STRIP_COMMENTS) {
 149                                 $p = preg_split( "/<!--/i", $text, 2 );
 150                         } else {
 151                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 152                         }
 153                         $stripped .= $p[0];
 154                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 155                                 $text = "";
 156                         } else {
 157                                 if($tag==STRIP_COMMENTS) {
 158                                         $q = preg_split( "/-->/i", $p[1], 2 );
 159                                 } else {
 160                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 161                                 }
 162                                 $marker = $rnd . sprintf("%08X", $n++);
 163                                 $content[$marker] = $q[0];
 164                                 $stripped .= $marker;
 165                                 $text = $q[1];
 166                         }
 167                 }
 168                 return $stripped;
 169         }
 170
 171         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 172         # If $render is set, performs necessary rendering operations on plugins
 173         # Returns the text, and fills an array with data needed in unstrip()
 174         # If the $state is already a valid strip state, it adds to the state
 175
 176         # When $stripcomments is set, HTML comments <!-- like this -->
 177         # will be stripped in addition to other tags. This is important
 178         # for section editing, where these comments cause confusion when
 179         # counting the sections in the wikisource
 180         function strip( $text, &$state, $stripcomments = false )
 181         {
 182                 $render = ($this->mOutputType == OT_HTML);
 183                 $nowiki_content = array();
 184                 $hiero_content = array();
 185                 $math_content = array();
 186                 $pre_content = array();
 187                 $comment_content = array();
 188
 189                 # Replace any instances of the placeholders
 190                 $uniq_prefix = UNIQ_PREFIX;
 191                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 192
 193                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 194                 foreach( $nowiki_content as $marker => $content ){
 195                         if( $render ){
 196                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 197                         } else {
 198                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 199                         }
 200                 }
 201
 202                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 203                 foreach( $hiero_content as $marker => $content ){
 204                         if( $render && $GLOBALS['wgUseWikiHiero']){
 205                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 206                         } else {
 207                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 208                         }
 209                 }
 210
 211                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 212                 foreach( $math_content as $marker => $content ){
 213                         if( $render && $this->mOptions->getUseTeX() ){
 214                                 $math_content[$marker] = renderMath( $content );
 215                         } else {
 216                                 $math_content[$marker] = "<math>$content</math>";
 217                         }
 218                 }
 219
 220                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 221                 foreach( $pre_content as $marker => $content ){
 222                         if( $render ){
 223                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 224                         } else {
 225                                 $pre_content[$marker] = "<pre>$content</pre>";
 226                         }
 227                 }
 228                 if($stripcomments) {
 229                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 230                         foreach( $comment_content as $marker => $content ){
 231                                 $comment_content[$marker] = "<!--$content-->";
 232                         }
 233                 }
 234
 235                 # Merge state with the pre-existing state, if there is one
 236                 if ( $state ) {
 237                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 238                         $state['hiero'] = $state['hiero'] + $hiero_content;
 239                         $state['math'] = $state['math'] + $math_content;
 240                         $state['pre'] = $state['pre'] + $pre_content;
 241                         $state['comment'] = $state['comment'] + $comment_content;
 242                 } else {
 243                         $state = array(
 244                           'nowiki' => $nowiki_content,
 245                           'hiero' => $hiero_content,
 246                           'math' => $math_content,
 247                           'pre' => $pre_content,
 248                           'comment' => $comment_content
 249                         );
 250                 }
 251                 return $text;
 252         }
 253
 254         function unstrip( $text, &$state )
 255         {
 256                 # Must expand in reverse order, otherwise nested tags will be corrupted
 257                 $contentDict = end( $state );
 258                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 259                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 260                                 $text = str_replace( key( $contentDict ), $content, $text );
 261                         }
 262                 }
 263
 264                 return $text;
 265         }
 266
 267         # Add an item to the strip state
 268         # Returns the unique tag which must be inserted into the stripped text
 269         # The tag will be replaced with the original text in unstrip()
 270
 271         function insertStripItem( $text, &$state )
 272         {
 273                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 274                 if ( !$state ) {
 275                         $state = array(
 276                           'nowiki' => array(),
 277                           'hiero' => array(),
 278                           'math' => array(),
 279                           'pre' => array()
 280                         );
 281                 }
 282                 $state['item'][$rnd] = $text;
 283                 return $rnd;
 284         }
 285
 286         # This method generates the list of subcategories and pages for a category
 287         function categoryMagic ()
 288         {
 289                 global $wgLang , $wgUser ;
 290                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 291
 292                 $cns = Namespace::getCategory() ;
 293                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 294
 295                 $r = "<br style=\"clear:both;\"/>\n";
 296
 297
 298                 $sk =& $wgUser->getSkin() ;
 299
 300                 $articles = array() ;
 301                 $children = array() ;
 302                 $data = array () ;
 303                 $id = $this->mTitle->getArticleID() ;
 304
 305                 # For existing categories
 306                 if( $id ) {
 307                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 308                         $res = wfQuery ( $sql, DB_READ ) ;
 309                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 310                 } else {
 311                         # For non-existing categories
 312                         $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
 313                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
 314                         $res = wfQuery ( $sql, DB_READ ) ;
 315                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 316                 }
 317
 318                 # For all pages that link to this category
 319                 foreach ( $data AS $x )
 320                 {
 321                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 322                         if ( $t != "" ) $t .= ":" ;
 323                         $t .= $x->cur_title ;
 324
 325                         if ( $x->cur_namespace == $cns ) {
 326                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 327                         } else {
 328                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 329                         }
 330                 }
 331                 wfFreeResult ( $res ) ;
 332
 333                 # Showing subcategories
 334                 if ( count ( $children ) > 0 )
 335                 {
 336                         asort ( $children ) ;
 337                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 338                         $r .= implode ( ", " , $children ) ;
 339                 }
 340
 341                 # Showing pages in this category
 342                 if ( count ( $articles ) > 0 )
 343                 {
 344                         $ti = $this->mTitle->getText() ;
 345                         asort ( $articles ) ;
 346                         $h =  wfMsg( "category_header", $ti );
 347                         $r .= "<h2>{$h}</h2>\n" ;
 348                         $r .= implode ( ", " , $articles ) ;
 349                 }
 350
 351
 352                 return $r ;
 353         }
 354
 355         function getHTMLattrs ()
 356         {
 357                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 358                                 "title", "align", "lang", "dir", "width", "height",
 359                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 360                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 361                                 /* FONT */ "type", "start", "value", "compact",
 362                                 /* For various lists, mostly deprecated but safe */
 363                                 "summary", "width", "border", "frame", "rules",
 364                                 "cellspacing", "cellpadding", "valign", "char",
 365                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 366                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 367                                 "id", "class", "name", "style" /* For CSS */
 368                                 );
 369                 return $htmlattrs ;
 370         }
 371
 372         function fixTagAttributes ( $t )
 373         {
 374                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 375                 $htmlattrs = $this->getHTMLattrs() ;
 376
 377                 # Strip non-approved attributes from the tag
 378                 $t = preg_replace(
 379                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 380                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 381                         $t);
 382                 # Strip javascript "expression" from stylesheets. Brute force approach:
 383                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 384
 385                 if( preg_match(
 386                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 387                         wfMungeToUtf8( $t ) ) )
 388                 {
 389                         $t="";
 390                 }
 391
 392                 return trim ( $t ) ;
 393         }
 394
 395         function doTableStuff ( $t )
 396         {
 397                 $t = explode ( "\n" , $t ) ;
 398                 $td = array () ; # Is currently a td tag open?
 399                         $ltd = array () ; # Was it TD or TH?
 400                         $tr = array () ; # Is currently a tr tag open?
 401                         $ltr = array () ; # tr attributes
 402                         foreach ( $t AS $k => $x )
 403                         {
 404                                 $x = trim ( $x ) ;
 405                                 $fc = substr ( $x , 0 , 1 ) ;
 406                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 407                                 {
 408                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 409                                         array_push ( $td , false ) ;
 410                                         array_push ( $ltd , "" ) ;
 411                                         array_push ( $tr , false ) ;
 412                                         array_push ( $ltr , "" ) ;
 413                                 }
 414                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 415                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 416                                 {
 417                                         $z = "</table>\n" ;
 418                                         $l = array_pop ( $ltd ) ;
 419                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 420                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 421                                         array_pop ( $ltr ) ;
 422                                         $t[$k] = $z ;
 423                                 }
 424                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 425                                                 {
 426                                                 $z = trim ( substr ( $x , 2 ) ) ;
 427                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 428                                                 }*/
 429                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 430                                 {
 431                                         $x = substr ( $x , 1 ) ;
 432                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 433                                         $z = "" ;
 434                                         $l = array_pop ( $ltd ) ;
 435                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 436                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 437                                         array_pop ( $ltr ) ;
 438                                         $t[$k] = $z ;
 439                                         array_push ( $tr , false ) ;
 440                                         array_push ( $td , false ) ;
 441                                         array_push ( $ltd , "" ) ;
 442                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 443                                 }
 444                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 445                                 {
 446                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 447                                         {
 448                                                 $fc = "+" ;
 449                                                 $x = substr ( $x , 1 ) ;
 450                                         }
 451                                         $after = substr ( $x , 1 ) ;
 452                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 453                                         $after = explode ( "||" , $after ) ;
 454                                         $t[$k] = "" ;
 455                                         foreach ( $after AS $theline )
 456                                         {
 457                                                 $z = "" ;
 458                                                 if ( $fc != "+" )
 459                                                 {
 460                                                         $tra = array_pop ( $ltr ) ;
 461                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 462                                                         array_push ( $tr , true ) ;
 463                                                         array_push ( $ltr , "" ) ;
 464                                                 }
 465
 466                                                 $l = array_pop ( $ltd ) ;
 467                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 468                                                 if ( $fc == "|" ) $l = "td" ;
 469                                                 else if ( $fc == "!" ) $l = "th" ;
 470                                                 else if ( $fc == "+" ) $l = "caption" ;
 471                                                 else $l = "" ;
 472                                                 array_push ( $ltd , $l ) ;
 473                                                 $y = explode ( "|" , $theline , 2 ) ;
 474                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 475                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 476                                                 $t[$k] .= $y ;
 477                                                 array_push ( $td , true ) ;
 478                                         }
 479                                 }
 480                         }
 481
 482                 # Closing open td, tr && table
 483                 while ( count ( $td ) > 0 )
 484                 {
 485                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 486                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 487                         $t[] = "</table>" ;
 488                 }
 489
 490                 $t = implode ( "\n" , $t ) ;
 491                 #               $t = $this->removeHTMLtags( $t );
 492                 return $t ;
 493         }
 494
 495         function internalParse( $text, $linestart, $args = array() )
 496         {
 497                 $fname = "Parser::internalParse";
 498                 wfProfileIn( $fname );
 499
 500                 $text = $this->removeHTMLtags( $text );
 501                 $text = $this->replaceVariables( $text, $args );
 502
 503                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 504
 505                 $text = $this->doHeadings( $text );
 506                 if($this->mOptions->getUseDynamicDates()) {
 507                         global $wgDateFormatter;
 508                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 509                 }
 510                 $text = $this->replaceExternalLinks( $text );
 511                 $text = $this->doTokenizedParser ( $text );
 512                 $text = $this->doTableStuff ( $text ) ;
 513                 $text = $this->formatHeadings( $text );
 514                 $sk =& $this->mOptions->getSkin();
 515                 $text = $sk->transformContent( $text );
 516
 517                 if ( !isset ( $this->categoryMagicDone ) ) {
 518                    $text .= $this->categoryMagic () ;
 519                    $this->categoryMagicDone = true ;
 520                    }
 521
 522                 wfProfileOut( $fname );
 523                 return $text;
 524         }
 525
 526
 527         /* private */ function doHeadings( $text )
 528         {
 529                 for ( $i = 6; $i >= 1; --$i ) {
 530                         $h = substr( "======", 0, $i );
 531                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 532                           "<h{$i}>\\1</h{$i}>\\2", $text );
 533                 }
 534                 return $text;
 535         }
 536
 537         # Note: we have to do external links before the internal ones,
 538         # and otherwise take great care in the order of things here, so
 539         # that we don't end up interpreting some URLs twice.
 540
 541         /* private */ function replaceExternalLinks( $text )
 542         {
 543                 $fname = "Parser::replaceExternalLinks";
 544                 wfProfileIn( $fname );
 545                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 546                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 547                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 548                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 549                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 550                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 551                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 552                 wfProfileOut( $fname );
 553                 return $text;
 554         }
 555
 556         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 557         {
 558                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 559                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 560
 561                 # this is  the list of separators that should be ignored if they
 562                 # are the last character of an URL but that should be included
 563                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 564                 # in this case, the last comma should not become part of the URL,
 565                 # but in "www.foo.com/123,2342,32.htm" it should.
 566                 $sep = ",;\.:";
 567                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 568                 $images = "gif|png|jpg|jpeg";
 569
 570                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 571                 # they are interpreted as part of the string (used to tell PHP
 572                 # that the content of the string should be inserted there).
 573                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 574                   "((?i){$images})([^{$uc}]|$)/";
 575
 576                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 577                 $sk =& $this->mOptions->getSkin();
 578
 579                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 580                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 581                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 582                 }
 583                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 584                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 585                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 586                   "</a>\\5", $s );
 587                 $s = str_replace( $unique, $protocol, $s );
 588
 589                 $a = explode( "[{$protocol}:", " " . $s );
 590                 $s = array_shift( $a );
 591                 $s = substr( $s, 1 );
 592
 593                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 594                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 595
 596                 foreach ( $a as $line ) {
 597                         if ( preg_match( $e1, $line, $m ) ) {
 598                                 $link = "{$protocol}:{$m[1]}";
 599                                 $trail = $m[2];
 600                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 601                                 else { $text = wfEscapeHTML( $link ); }
 602                         } else if ( preg_match( $e2, $line, $m ) ) {
 603                                 $link = "{$protocol}:{$m[1]}";
 604                                 $text = $m[2];
 605                                 $trail = $m[3];
 606                         } else {
 607                                 $s .= "[{$protocol}:" . $line;
 608                                 continue;
 609                         }
 610                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 611                                 $paren = "";
 612                         } else {
 613                                 # Expand the URL for printable version
 614                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 615                         }
 616                         $la = $sk->getExternalLinkAttributes( $link, $text );
 617                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 618
 619                 }
 620                 return $s;
 621         }
 622
 623         /* private */ function handle3Quotes( &$state, $token )
 624         {
 625                 if ( $state["strong"] !== false ) {
 626                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 627                         {
 628                                 # ''' lala ''lala '''
 629                                 $s = "</em></strong><em>";
 630                         } else {
 631                                 $s = "</strong>";
 632                         }
 633                         $state["strong"] = FALSE;
 634                 } else {
 635                         $s = "<strong>";
 636                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 637                 }
 638                 return $s;
 639         }
 640
 641         /* private */ function handle2Quotes( &$state, $token )
 642         {
 643                 if ( $state["em"] !== false ) {
 644                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 645                         {
 646                                 # ''lala'''lala'' ....'''
 647                                 $s = "</strong></em><strong>";
 648                         } else {
 649                                 $s = "</em>";
 650                         }
 651                         $state["em"] = FALSE;
 652                 } else {
 653                         $s = "<em>";
 654                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 655
 656                 }
 657                 return $s;
 658         }
 659
 660         /* private */ function handle5Quotes( &$state, $token )
 661         {
 662                 $s = "";
 663                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 664                         if ( $state["em"] < $state["strong"] ) {
 665                                 $s .= "</strong></em>";
 666                         } else {
 667                                 $s .= "</em></strong>";
 668                         }
 669                         $state["strong"] = $state["em"] = FALSE;
 670                 } elseif ( $state["em"] !== false ) {
 671                         $s .= "</em><strong>";
 672                         $state["em"] = FALSE;
 673                         $state["strong"] = $token["pos"];
 674                 } elseif ( $state["strong"] !== false ) {
 675                         $s .= "</strong><em>";
 676                         $state["strong"] = FALSE;
 677                         $state["em"] = $token["pos"];
 678                 } else { # not $em and not $strong
 679                         $s .= "<strong><em>";
 680                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 681                 }
 682                 return $s;
 683         }
 684
 685         /* private */ function doTokenizedParser( $str )
 686         {
 687                 global $wgLang; # for language specific parser hook
 688                 global $wgUploadDirectory, $wgUseTimeline;
 689
 690                 $tokenizer=Tokenizer::newFromString( $str );
 691                 $tokenStack = array();
 692
 693                 $s="";
 694                 $state["em"]      = FALSE;
 695                 $state["strong"]  = FALSE;
 696                 $tagIsOpen = FALSE;
 697                 $threeopen = false;
 698
 699                 # The tokenizer splits the text into tokens and returns them one by one.
 700                 # Every call to the tokenizer returns a new token.
 701                 while ( $token = $tokenizer->nextToken() )
 702                 {
 703                         switch ( $token["type"] )
 704                         {
 705                                 case "text":
 706                                         # simple text with no further markup
 707                                         $txt = $token["text"];
 708                                         break;
 709                                 case "blank":
 710                                         # Text that contains blanks that have to be converted to
 711                                         # non-breakable spaces for French.
 712                                         # U+202F NARROW NO-BREAK SPACE might be a better choice, but
 713                                         # browser support for Unicode spacing is poor.
 714                                         $txt = str_replace( " ", "&nbsp;", $token["text"] );
 715                                         break;
 716                                 case "[[[":
 717                                         # remember the tag opened with 3 [
 718                                         $threeopen = true;
 719                                 case "[[":
 720                                         # link opening tag.
 721                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 722                                         $tagIsOpen = TRUE;
 723                                         array_push( $tokenStack, $token );
 724                                         $txt="";
 725                                         break;
 726
 727                                 case "]]]":
 728                                 case "]]":
 729                                         # link close tag.
 730                                         # get text from stack, glue it together, and call the code to handle a
 731                                         # link
 732
 733                                         if ( count( $tokenStack ) == 0 )
 734                                         {
 735                                                 # stack empty. Found a ]] without an opening [[
 736                                                 $txt = "]]";
 737                                         } else {
 738                                                 $linkText = "";
 739                                                 $lastToken = array_pop( $tokenStack );
 740                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 741                                                 {
 742                                                         if( !empty( $lastToken["text"] ) ) {
 743                                                                 $linkText = $lastToken["text"] . $linkText;
 744                                                         }
 745                                                         $lastToken = array_pop( $tokenStack );
 746                                                 }
 747
 748                                                 $txt = $linkText ."]]";
 749
 750                                                 if( isset( $lastToken["text"] ) ) {
 751                                                         $prefix = $lastToken["text"];
 752                                                 } else {
 753                                                         $prefix = "";
 754                                                 }
 755                                                 $nextToken = $tokenizer->previewToken();
 756                                                 if ( $nextToken["type"] == "text" )
 757                                                 {
 758                                                         # Preview just looks at it. Now we have to fetch it.
 759                                                         $nextToken = $tokenizer->nextToken();
 760                                                         $txt .= $nextToken["text"];
 761                                                 }
 762                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 763
 764                                                 # did the tag start with 3 [ ?
 765                                                 if($threeopen) {
 766                                                         # show the first as text
 767                                                         $txt = "[".$txt;
 768                                                         $threeopen=false;
 769                                                 }
 770
 771                                         }
 772                                         $tagIsOpen = (count( $tokenStack ) != 0);
 773                                         break;
 774                                 case "----":
 775                                         $txt = "\n<hr />\n";
 776                                         break;
 777                                 case "'''":
 778                                         # This and the three next ones handle quotes
 779                                         $txt = $this->handle3Quotes( $state, $token );
 780                                         break;
 781                                 case "''":
 782                                         $txt = $this->handle2Quotes( $state, $token );
 783                                         break;
 784                                 case "'''''":
 785                                         $txt = $this->handle5Quotes( $state, $token );
 786                                         break;
 787                                 case "":
 788                                         # empty token
 789                                         $txt="";
 790                                         break;
 791                                 case "RFC ":
 792                                         if ( $tagIsOpen ) {
 793                                                 $txt = "RFC ";
 794                                         } else {
 795                                                 $txt = $this->doMagicRFC( $tokenizer );
 796                                         }
 797                                         break;
 798                                 case "ISBN ":
 799                                         if ( $tagIsOpen ) {
 800                                                 $txt = "ISBN ";
 801                                         } else {
 802                                                 $txt = $this->doMagicISBN( $tokenizer );
 803                                         }
 804                                         break;
 805                                 case "<timeline>":
 806                                         if ( $wgUseTimeline &&
 807                                              "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
 808                                         {
 809                                                 $txt = renderTimeline( $timelinesrc );
 810                                         } else {
 811                                                 $txt=$token["text"];
 812                                         }
 813                                         break;
 814                                 default:
 815                                         # Call language specific Hook.
 816                                         $txt = $wgLang->processToken( $token, $tokenStack );
 817                                         if ( NULL == $txt ) {
 818                                                 # An unkown token. Highlight.
 819                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 820                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 821                                         }
 822                                         break;
 823                         }
 824                         # If we're parsing the interior of a link, don't append the interior to $s,
 825                         # but push it to the stack so it can be processed when a ]] token is found.
 826                         if ( $tagIsOpen  && $txt != "" ) {
 827                                 $token["type"] = "text";
 828                                 $token["text"] = $txt;
 829                                 array_push( $tokenStack, $token );
 830                         } else {
 831                                 $s .= $txt;
 832                         }
 833                 } #end while
 834                 if ( count( $tokenStack ) != 0 )
 835                 {
 836                         # still objects on stack. opened [[ tag without closing ]] tag.
 837                         $txt = "";
 838                         while ( $lastToken = array_pop( $tokenStack ) )
 839                         {
 840                                 if ( $lastToken["type"] == "text" )
 841                                 {
 842                                         $txt = $lastToken["text"] . $txt;
 843                                 } else {
 844                                         $txt = $lastToken["type"] . $txt;
 845                                 }
 846                         }
 847                         $s .= $txt;
 848                 }
 849                 return $s;
 850         }
 851
 852         /* private */ function handleInternalLink( $line, $prefix )
 853         {
 854                 global $wgLang, $wgLinkCache;
 855                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 856                 static $fname = "Parser::handleInternalLink" ;
 857                 wfProfileIn( $fname );
 858
 859                 wfProfileIn( "$fname-setup" );
 860                 static $tc = FALSE;
 861                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 862                 $sk =& $this->mOptions->getSkin();
 863
 864                 # Match a link having the form [[namespace:link|alternate]]trail
 865                 static $e1 = FALSE;
 866                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 867                 # Match the end of a line for a word that's not followed by whitespace,
 868                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 869                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 870                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 871                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 872
 873
 874                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 875                 static $image = FALSE;
 876                 static $special = FALSE;
 877                 static $media = FALSE;
 878                 static $category = FALSE;
 879                 if ( !$image ) { $image = Namespace::getImage(); }
 880                 if ( !$special ) { $special = Namespace::getSpecial(); }
 881                 if ( !$media ) { $media = Namespace::getMedia(); }
 882                 if ( !$category ) { $category = Namespace::getCategory(); }
 883
 884                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 885
 886                 wfProfileOut( "$fname-setup" );
 887                 $s = "";
 888
 889                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 890                         $text = $m[2];
 891                         $trail = $m[3];
 892                 } else { # Invalid form; output directly
 893                         $s .= $prefix . "[[" . $line ;
 894                         return $s;
 895                 }
 896
 897                 /* Valid link forms:
 898                 Foobar -- normal
 899                 :Foobar -- override special treatment of prefix (images, language links)
 900                 /Foobar -- convert to CurrentPage/Foobar
 901                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 902                 */
 903                 $c = substr($m[1],0,1);
 904                 $noforce = ($c != ":");
 905                 if( $c == "/" ) { # subpage
 906                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 907                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 908                                 $noslash=$m[1];
 909                         } else {
 910                                 $noslash=substr($m[1],1);
 911                         }
 912                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 913                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 914                                 if( "" == $text ) {
 915                                         $text= $m[1];
 916                                 } # this might be changed for ugliness reasons
 917                         } else {
 918                                 $link = $noslash; # no subpage allowed, use standard link
 919                         }
 920                 } elseif( $noforce ) { # no subpage
 921                         $link = $m[1];
 922                 } else {
 923                         $link = substr( $m[1], 1 );
 924                 }
 925                 if( "" == $text )
 926                         $text = $link;
 927
 928                 $nt = Title::newFromText( $link );
 929                 if( !$nt ) {
 930                         $s .= $prefix . "[[" . $line;
 931                         return $s;
 932                 }
 933                 $ns = $nt->getNamespace();
 934                 $iw = $nt->getInterWiki();
 935                 if( $noforce ) {
 936                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 937                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 938                                 $s .= $prefix . $trail ;
 939                                 return (trim($s) == '')? '': $s;
 940                         }
 941                         if( $ns == $image ) {
 942                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 943                                 $wgLinkCache->addImageLinkObj( $nt );
 944                                 return $s;
 945                         }
 946                         if ( $ns == $category ) {
 947                                 $t = $nt->getText() ;
 948                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 949                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 950                                 $this->mOutput->mCategoryLinks[] = $t ;
 951                                 $s .= $prefix . $trail ;
 952                                 return $s ;
 953                         }
 954                 }
 955                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 956                     ( strpos( $link, "#" ) == FALSE ) ) {
 957                         # Self-links are handled specially; generally de-link and change to bold.
 958                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 959                         return $s;
 960                 }
 961
 962                 if( $ns == $media ) {
 963                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 964                         $wgLinkCache->addImageLinkObj( $nt );
 965                         return $s;
 966                 } elseif( $ns == $special ) {
 967                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 968                         return $s;
 969                 }
 970                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 971
 972                 wfProfileOut( $fname );
 973                 return $s;
 974         }
 975
 976         # Some functions here used by doBlockLevels()
 977         #
 978         /* private */ function closeParagraph()
 979         {
 980                 $result = "";
 981                 if ( '' != $this->mLastSection ) {
 982                         $result = "</" . $this->mLastSection  . ">\n";
 983                 }
 984                 $this->mInPre = false;
 985                 $this->mLastSection = "";
 986                 return $result;
 987         }
 988         # getCommon() returns the length of the longest common substring
 989         # of both arguments, starting at the beginning of both.
 990         #
 991         /* private */ function getCommon( $st1, $st2 )
 992         {
 993                 $fl = strlen( $st1 );
 994                 $shorter = strlen( $st2 );
 995                 if ( $fl < $shorter ) { $shorter = $fl; }
 996
 997                 for ( $i = 0; $i < $shorter; ++$i ) {
 998                         if ( $st1{$i} != $st2{$i} ) { break; }
 999                 }
1000                 return $i;
1001         }
1002         # These next three functions open, continue, and close the list
1003         # element appropriate to the prefix character passed into them.
1004         #
1005         /* private */ function openList( $char )
1006     {
1007                 $result = $this->closeParagraph();
1008
1009                 if ( "*" == $char ) { $result .= "<ul><li>"; }
1010                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
1011                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
1012                 else if ( ";" == $char ) {
1013                         $result .= "<dl><dt>";
1014                         $this->mDTopen = true;
1015                 }
1016                 else { $result = "<!-- ERR 1 -->"; }
1017
1018                 return $result;
1019         }
1020
1021         /* private */ function nextItem( $char )
1022         {
1023                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1024                 else if ( ":" == $char || ";" == $char ) {
1025                         $close = "</dd>";
1026                         if ( $this->mDTopen ) { $close = "</dt>"; }
1027                         if ( ";" == $char ) {
1028                                 $this->mDTopen = true;
1029                                 return $close . "<dt>";
1030                         } else {
1031                                 $this->mDTopen = false;
1032                                 return $close . "<dd>";
1033                         }
1034                 }
1035                 return "<!-- ERR 2 -->";
1036         }
1037
1038         /* private */function closeList( $char )
1039         {
1040                 if ( "*" == $char ) { $text = "</li></ul>"; }
1041                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1042                 else if ( ":" == $char ) {
1043                         if ( $this->mDTopen ) {
1044                                 $this->mDTopen = false;
1045                                 $text = "</dt></dl>";
1046                         } else {
1047                                 $text = "</dd></dl>";
1048                         }
1049                 }
1050                 else {  return "<!-- ERR 3 -->"; }
1051                 return $text."\n";
1052         }
1053
1054         /* private */ function doBlockLevels( $text, $linestart ) {
1055                 $fname = "Parser::doBlockLevels";
1056                 wfProfileIn( $fname );
1057
1058                 # Parsing through the text line by line.  The main thing
1059                 # happening here is handling of block-level elements p, pre,
1060                 # and making lists from lines starting with * # : etc.
1061                 #
1062                 $textLines = explode( "\n", $text );
1063
1064                 $lastPrefix = $output = $lastLine = '';
1065                 $this->mDTopen = $inBlockElem = false;
1066                 $prefixLength = 0;
1067                 $paragraphStack = false;
1068
1069                 if ( !$linestart ) {
1070                         $output .= array_shift( $textLines );
1071                 }
1072                 foreach ( $textLines as $oLine ) {
1073                         $lastPrefixLength = strlen( $lastPrefix );
1074                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1075                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1076                         if (!$this->mInPre) {
1077                                 $this->mInPre = !empty($preOpenMatch);
1078                         }
1079                         if ( !$this->mInPre ) {
1080                                 # Multiple prefixes may abut each other for nested lists.
1081                                 $prefixLength = strspn( $oLine, "*#:;" );
1082                                 $pref = substr( $oLine, 0, $prefixLength );
1083
1084                                 # eh?
1085                                 $pref2 = str_replace( ";", ":", $pref );
1086                                 $t = substr( $oLine, $prefixLength );
1087                         } else {
1088                                 # Don't interpret any other prefixes in preformatted text
1089                                 $prefixLength = 0;
1090                                 $pref = $pref2 = '';
1091                                 $t = $oLine;
1092                         }
1093
1094                         # List generation
1095                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1096                                 # Same as the last item, so no need to deal with nesting or opening stuff
1097                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1098                                 $paragraphStack = false;
1099
1100                                 if ( ";" == substr( $pref, -1 ) ) {
1101                                         # The one nasty exception: definition lists work like this:
1102                                         # ; title : definition text
1103                                         # So we check for : in the remainder text to split up the
1104                                         # title and definition, without b0rking links.
1105                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1106                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1107                                                 $term = $match[1];
1108                                                 $output .= $term . $this->nextItem( ":" );
1109                                                 $t = $match[2];
1110                                         }
1111                                 }
1112                         } elseif( $prefixLength || $lastPrefixLength ) {
1113                                 # Either open or close a level...
1114                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1115                                 $paragraphStack = false;
1116
1117                                 while( $commonPrefixLength < $lastPrefixLength ) {
1118                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1119                                         --$lastPrefixLength;
1120                                 }
1121                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1122                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1123                                 }
1124                                 while ( $prefixLength > $commonPrefixLength ) {
1125                                         $char = substr( $pref, $commonPrefixLength, 1 );
1126                                         $output .= $this->openList( $char );
1127
1128                                         if ( ";" == $char ) {
1129                                                 # FIXME: This is dupe of code above
1130                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1131                                                         $term = $match[1];
1132                                                         $output .= $term . $this->nextItem( ":" );
1133                                                         $t = $match[2];
1134                                                 }
1135                                         }
1136                                         ++$commonPrefixLength;
1137                                 }
1138                                 $lastPrefix = $pref2;
1139                         }
1140                         if( 0 == $prefixLength ) {
1141                                 # No prefix (not in list)--go to paragraph mode
1142                                 $uniq_prefix = UNIQ_PREFIX;
1143                                 // XXX: use a stack for nestable elements like span, table and div
1144                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1145                                 $closematch = preg_match(
1146                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1147                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1148                                 if ( $openmatch or $closematch ) {
1149                                         $paragraphStack = false;
1150                                         $output .= $this->closeParagraph();
1151                                         if($preOpenMatch and !$preCloseMatch) {
1152                                                 $this->mInPre = true;
1153                                         }
1154                                         if ( $closematch  ) {
1155                                                 $inBlockElem = false;
1156                                         } else {
1157                                                 $inBlockElem = true;
1158                                         }
1159                                 } else if ( !$inBlockElem ) {
1160                                         if ( " " == $t{0} ) {
1161                                                 // pre
1162                                                 if ($this->mLastSection != 'pre') {
1163                                                         $paragraphStack = false;
1164                                                         $output .= $this->closeParagraph().'<pre>';
1165                                                         $this->mLastSection = 'pre';
1166                                                 }
1167                                         } else {
1168                                                 // paragraph
1169                                                 if ( '' == trim($t) ) {
1170                                                         if ( $paragraphStack ) {
1171                                                                 $output .= $paragraphStack.'<br/>';
1172                                                                 $paragraphStack = false;
1173                                                                 $this->mLastSection = 'p';
1174                                                         } else {
1175                                                                 if ($this->mLastSection != 'p' ) {
1176                                                                         $output .= $this->closeParagraph();
1177                                                                         $this->mLastSection = '';
1178                                                                         $paragraphStack = "<p>";
1179                                                                 } else {
1180                                                                         $paragraphStack = '</p><p>';
1181                                                                 }
1182                                                         }
1183                                                 } else {
1184                                                         if ( $paragraphStack ) {
1185                                                                 $output .= $paragraphStack;
1186                                                                 $paragraphStack = false;
1187                                                                 $this->mLastSection = 'p';
1188                                                         } else if ($this->mLastSection != 'p') {
1189                                                                 $output .= $this->closeParagraph().'<p>';
1190                                                                 $this->mLastSection = 'p';
1191                                                         }
1192                                                 }
1193                                         }
1194                                 }
1195                         }
1196                         if ($paragraphStack === false) {
1197                                 $output .= $t."\n";
1198                         }
1199                 }
1200                 while ( $prefixLength ) {
1201                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1202                         --$prefixLength;
1203                 }
1204                 if ( "" != $this->mLastSection ) {
1205                         $output .= "</" . $this->mLastSection . ">";
1206                         $this->mLastSection = "";
1207                 }
1208
1209                 wfProfileOut( $fname );
1210                 return $output;
1211         }
1212
1213         function getVariableValue( $index ) {
1214                 global $wgLang, $wgSitename, $wgServer;
1215
1216                 switch ( $index ) {
1217                         case MAG_CURRENTMONTH:
1218                                 return date( "m" );
1219                         case MAG_CURRENTMONTHNAME:
1220                                 return $wgLang->getMonthName( date("n") );
1221                         case MAG_CURRENTMONTHNAMEGEN:
1222                                 return $wgLang->getMonthNameGen( date("n") );
1223                         case MAG_CURRENTDAY:
1224                                 return date("j");
1225                         case MAG_PAGENAME:
1226                                 return $this->mTitle->getText();
1227                         case MAG_NAMESPACE:
1228                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1229                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1230                         case MAG_CURRENTDAYNAME:
1231                                 return $wgLang->getWeekdayName( date("w")+1 );
1232                         case MAG_CURRENTYEAR:
1233                                 return date( "Y" );
1234                         case MAG_CURRENTTIME:
1235                                 return $wgLang->time( wfTimestampNow(), false );
1236                         case MAG_NUMBEROFARTICLES:
1237                                 return wfNumberOfArticles();
1238                         case MAG_SITENAME:
1239                                 return $wgSitename;
1240                         case MAG_SERVER:
1241                                 return $wgServer;
1242                         default:
1243                                 return NULL;
1244                 }
1245         }
1246
1247         function initialiseVariables()
1248         {
1249                 global $wgVariableIDs;
1250                 $this->mVariables = array();
1251                 foreach ( $wgVariableIDs as $id ) {
1252                         $mw =& MagicWord::get( $id );
1253                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1254                 }
1255         }
1256
1257         /* private */ function replaceVariables( $text, $args = array() )
1258         {
1259                 global $wgLang, $wgScript, $wgArticlePath;
1260
1261                 $fname = "Parser::replaceVariables";
1262                 wfProfileIn( $fname );
1263
1264                 $bail = false;
1265                 if ( !$this->mVariables ) {
1266                         $this->initialiseVariables();
1267                 }
1268                 $titleChars = Title::legalChars();
1269                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1270
1271                 # This function is called recursively. To keep track of arguments we need a stack:
1272                 array_push( $this->mArgStack, $args );
1273
1274                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1275                 $GLOBALS['wgCurParser'] =& $this;
1276                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1277
1278                 array_pop( $this->mArgStack );
1279
1280                 return $text;
1281         }
1282
1283         function braceSubstitution( $matches )
1284         {
1285                 global $wgLinkCache, $wgLang;
1286                 $fname = "Parser::braceSubstitution";
1287                 $found = false;
1288                 $nowiki = false;
1289                 $title = NULL;
1290
1291                 # $newline is an optional newline character before the braces
1292                 # $part1 is the bit before the first |, and must contain only title characters
1293                 # $args is a list of arguments, starting from index 0, not including $part1
1294
1295                 $newline = $matches[1];
1296                 $part1 = $matches[2];
1297                 # If the third subpattern matched anything, it will start with |
1298                 if ( $matches[3] !== "" ) {
1299                         $args = explode( "|", substr( $matches[3], 1 ) );
1300                 } else {
1301                         $args = array();
1302                 }
1303                 $argc = count( $args );
1304
1305                 # SUBST
1306                 $mwSubst =& MagicWord::get( MAG_SUBST );
1307                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1308                         if ( $this->mOutputType != OT_WIKI ) {
1309                                 # Invalid SUBST not replaced at PST time
1310                                 # Return without further processing
1311                                 $text = $matches[0];
1312                                 $found = true;
1313                         }
1314                 } elseif ( $this->mOutputType == OT_WIKI ) {
1315                         # SUBST not found in PST pass, do nothing
1316                         $text = $matches[0];
1317                         $found = true;
1318                 }
1319
1320                 # MSG, MSGNW and INT
1321                 if ( !$found ) {
1322                         # Check for MSGNW:
1323                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1324                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1325                                 $nowiki = true;
1326                         } else {
1327                                 # Remove obsolete MSG:
1328                                 $mwMsg =& MagicWord::get( MAG_MSG );
1329                                 $mwMsg->matchStartAndRemove( $part1 );
1330                         }
1331
1332                         # Check if it is an internal message
1333                         $mwInt =& MagicWord::get( MAG_INT );
1334                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1335                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1336                                         $text = wfMsgReal( $part1, $args, true );
1337                                         $found = true;
1338                                 }
1339                         }
1340                 }
1341
1342                 # NS
1343                 if ( !$found ) {
1344                         # Check for NS: (namespace expansion)
1345                         $mwNs = MagicWord::get( MAG_NS );
1346                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1347                                 if ( intval( $part1 ) ) {
1348                                         $text = $wgLang->getNsText( intval( $part1 ) );
1349                                         $found = true;
1350                                 } else {
1351                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1352                                         if ( !is_null( $index ) ) {
1353                                                 $text = $wgLang->getNsText( $index );
1354                                                 $found = true;
1355                                         }
1356                                 }
1357                         }
1358                 }
1359
1360                 # LOCALURL and LOCALURLE
1361                 if ( !$found ) {
1362                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1363                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1364
1365                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1366                                 $func = 'getLocalURL';
1367                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1368                                 $func = 'escapeLocalURL';
1369                         } else {
1370                                 $func = '';
1371                         }
1372
1373                         if ( $func !== '' ) {
1374                                 $title = Title::newFromText( $part1 );
1375                                 if ( !is_null( $title ) ) {
1376                                         if ( $argc > 0 ) {
1377                                                 $text = $title->$func( $args[0] );
1378                                         } else {
1379                                                 $text = $title->$func();
1380                                         }
1381                                         $found = true;
1382                                 }
1383                         }
1384                 }
1385
1386                 # Internal variables
1387                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1388                         $text = $this->mVariables[$part1];
1389                         $found = true;
1390                         $this->mOutput->mContainsOldMagic = true;
1391                 }
1392
1393                 # Arguments input from the caller
1394                 $inputArgs = end( $this->mArgStack );
1395                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1396                         $text = $inputArgs[$part1];
1397                         $found = true;
1398                 }
1399
1400                 # Load from database
1401                 if ( !$found ) {
1402                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1403                         if ( !is_null( $title ) && !$title->isExternal() ) {
1404                                 # Check for excessive inclusion
1405                                 $dbk = $title->getPrefixedDBkey();
1406                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1407                                         $article = new Article( $title );
1408                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1409                                         if ( $articleContent !== false ) {
1410                                                 $found = true;
1411                                                 $text = $articleContent;
1412
1413                                         }
1414                                 }
1415
1416                                 # If the title is valid but undisplayable, make a link to it
1417                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1418                                         $text = "[[" . $title->getPrefixedText() . "]]";
1419                                         $found = true;
1420                                 }
1421                         }
1422                 }
1423
1424                 # Recursive parsing, escaping and link table handling
1425                 # Only for HTML output
1426                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1427                         $text = wfEscapeWikiText( $text );
1428                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1429                         # Clean up argument array
1430                         $assocArgs = array();
1431                         $index = 1;
1432                         foreach( $args as $arg ) {
1433                                 $eqpos = strpos( $arg, "=" );
1434                                 if ( $eqpos === false ) {
1435                                         $assocArgs[$index++] = $arg;
1436                                 } else {
1437                                         $name = trim( substr( $arg, 0, $eqpos ) );
1438                                         $value = trim( substr( $arg, $eqpos+1 ) );
1439                                         if ( $value === false ) {
1440                                                 $value = "";
1441                                         }
1442                                         if ( $name !== false ) {
1443                                                 $assocArgs[$name] = $value;
1444                                         }
1445                                 }
1446                         }
1447
1448                         # Do not enter included links in link table
1449                         if ( !is_null( $title ) ) {
1450                                 $wgLinkCache->suspend();
1451                         }
1452
1453                         # Run full parser on the included text
1454                         $text = $this->strip( $text, $this->mStripState );
1455                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1456
1457                         # Add the result to the strip state for re-inclusion after
1458                         # the rest of the processing
1459                         $text = $this->insertStripItem( $text, $this->mStripState );
1460
1461                         # Resume the link cache and register the inclusion as a link
1462                         if ( !is_null( $title ) ) {
1463                                 $wgLinkCache->resume();
1464                                 $wgLinkCache->addLinkObj( $title );
1465                         }
1466                 }
1467
1468                 if ( !$found ) {
1469                         return $matches[0];
1470                 } else {
1471                         return $text;
1472                 }
1473         }
1474
1475         # Returns true if the function is allowed to include this entity
1476         function incrementIncludeCount( $dbk )
1477         {
1478                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1479                         $this->mIncludeCount[$dbk] = 0;
1480                 }
1481                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1482                         return true;
1483                 } else {
1484                         return false;
1485                 }
1486         }
1487
1488
1489         # Cleans up HTML, removes dangerous tags and attributes
1490         /* private */ function removeHTMLtags( $text )
1491         {
1492                 $fname = "Parser::removeHTMLtags";
1493                 wfProfileIn( $fname );
1494                 $htmlpairs = array( # Tags that must be closed
1495                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1496                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1497                         "strike", "strong", "tt", "var", "div", "center",
1498                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1499                         "ruby", "rt" , "rb" , "rp", "p"
1500                 );
1501                 $htmlsingle = array(
1502                         "br", "hr", "li", "dt", "dd"
1503                 );
1504                 $htmlnest = array( # Tags that can be nested--??
1505                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1506                         "dl", "font", "big", "small", "sub", "sup"
1507                 );
1508                 $tabletags = array( # Can only appear inside table
1509                         "td", "th", "tr"
1510                 );
1511
1512                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1513                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1514
1515                 $htmlattrs = $this->getHTMLattrs () ;
1516
1517                 # Remove HTML comments
1518                 $text = preg_replace( "/(\n *<!--.*--> *(?=\n)|<!--.*-->)/sU", "$2", $text );
1519
1520                 $bits = explode( "<", $text );
1521                 $text = array_shift( $bits );
1522                 $tagstack = array(); $tablestack = array();
1523
1524                 foreach ( $bits as $x ) {
1525                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1526                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1527                           $x, $regs );
1528                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1529                         error_reporting( $prev );
1530
1531                         $badtag = 0 ;
1532                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1533                                 # Check our stack
1534                                 if ( $slash ) {
1535                                         # Closing a tag...
1536                                         if ( ! in_array( $t, $htmlsingle ) &&
1537                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1538                                                 array_push( $tagstack, $ot );
1539                                                 $badtag = 1;
1540                                         } else {
1541                                                 if ( $t == "table" ) {
1542                                                         $tagstack = array_pop( $tablestack );
1543                                                 }
1544                                                 $newparams = "";
1545                                         }
1546                                 } else {
1547                                         # Keep track for later
1548                                         if ( in_array( $t, $tabletags ) &&
1549                                           ! in_array( "table", $tagstack ) ) {
1550                                                 $badtag = 1;
1551                                         } else if ( in_array( $t, $tagstack ) &&
1552                                           ! in_array ( $t , $htmlnest ) ) {
1553                                                 $badtag = 1 ;
1554                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1555                                                 if ( $t == "table" ) {
1556                                                         array_push( $tablestack, $tagstack );
1557                                                         $tagstack = array();
1558                                                 }
1559                                                 array_push( $tagstack, $t );
1560                                         }
1561                                         # Strip non-approved attributes from the tag
1562                                         $newparams = $this->fixTagAttributes($params);
1563
1564                                 }
1565                                 if ( ! $badtag ) {
1566                                         $rest = str_replace( ">", "&gt;", $rest );
1567                                         $text .= "<$slash$t $newparams$brace$rest";
1568                                         continue;
1569                                 }
1570                         }
1571                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1572                 }
1573                 # Close off any remaining tags
1574                 while ( $t = array_pop( $tagstack ) ) {
1575                         $text .= "</$t>\n";
1576                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1577                 }
1578                 wfProfileOut( $fname );
1579                 return $text;
1580         }
1581
1582 /*
1583  *
1584  * This function accomplishes several tasks:
1585  * 1) Auto-number headings if that option is enabled
1586  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1587  * 3) Add a Table of contents on the top for users who have enabled the option
1588  * 4) Auto-anchor headings
1589  *
1590  * It loops through all headlines, collects the necessary data, then splits up the
1591  * string and re-inserts the newly formatted headlines.
1592  *
1593  */
1594
1595         /* private */ function formatHeadings( $text )
1596         {
1597                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1598                 $doShowToc = $this->mOptions->getShowToc();
1599                 if( !$this->mTitle->userCanEdit() ) {
1600                         $showEditLink = 0;
1601                         $rightClickHack = 0;
1602                 } else {
1603                         $showEditLink = $this->mOptions->getEditSection();
1604                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1605                 }
1606
1607                 # Inhibit editsection links if requested in the page
1608                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1609                 if( $esw->matchAndRemove( $text ) ) {
1610                         $showEditLink = 0;
1611                 }
1612                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1613                 # do not add TOC
1614                 $mw =& MagicWord::get( MAG_NOTOC );
1615                 if( $mw->matchAndRemove( $text ) ) {
1616                         $doShowToc = 0;
1617                 }
1618
1619                 # never add the TOC to the Main Page. This is an entry page that should not
1620                 # be more than 1-2 screens large anyway
1621                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1622                         $doShowToc = 0;
1623                 }
1624
1625                 # Get all headlines for numbering them and adding funky stuff like [edit]
1626                 # links - this is for later, but we need the number of headlines right now
1627                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1628
1629                 # if there are fewer than 4 headlines in the article, do not show TOC
1630                 if( $numMatches < 4 ) {
1631                         $doShowToc = 0;
1632                 }
1633
1634                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1635                 # override above conditions and always show TOC
1636                 $mw =& MagicWord::get( MAG_FORCETOC );
1637                 if ($mw->matchAndRemove( $text ) ) {
1638                         $doShowToc = 1;
1639                 }
1640
1641
1642                 # We need this to perform operations on the HTML
1643                 $sk =& $this->mOptions->getSkin();
1644
1645                 # headline counter
1646                 $headlineCount = 0;
1647
1648                 # Ugh .. the TOC should have neat indentation levels which can be
1649                 # passed to the skin functions. These are determined here
1650                 $toclevel = 0;
1651                 $toc = "";
1652                 $full = "";
1653                 $head = array();
1654                 $sublevelCount = array();
1655                 $level = 0;
1656                 $prevlevel = 0;
1657                 foreach( $matches[3] as $headline ) {
1658                         $numbering = "";
1659                         if( $level ) {
1660                                 $prevlevel = $level;
1661                         }
1662                         $level = $matches[1][$headlineCount];
1663                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1664                                 # reset when we enter a new level
1665                                 $sublevelCount[$level] = 0;
1666                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1667                                 $toclevel += $level - $prevlevel;
1668                         }
1669                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1670                                 # reset when we step back a level
1671                                 $sublevelCount[$level+1]=0;
1672                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1673                                 $toclevel -= $prevlevel - $level;
1674                         }
1675                         # count number of headlines for each level
1676                         @$sublevelCount[$level]++;
1677                         if( $doNumberHeadings || $doShowToc ) {
1678                                 $dot = 0;
1679                                 for( $i = 1; $i <= $level; $i++ ) {
1680                                         if( !empty( $sublevelCount[$i] ) ) {
1681                                                 if( $dot ) {
1682                                                         $numbering .= ".";
1683                                                 }
1684                                                 $numbering .= $sublevelCount[$i];
1685                                                 $dot = 1;
1686                                         }
1687                                 }
1688                         }
1689
1690                         # The canonized header is a version of the header text safe to use for links
1691                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1692                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1693
1694                         # strip out HTML
1695                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1696                         $tocline = trim( $canonized_headline );
1697                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1698                         $refer[$headlineCount] = $canonized_headline;
1699
1700                         # count how many in assoc. array so we can track dupes in anchors
1701                         @$refers[$canonized_headline]++;
1702                         $refcount[$headlineCount]=$refers[$canonized_headline];
1703
1704                         # Prepend the number to the heading text
1705
1706                         if( $doNumberHeadings || $doShowToc ) {
1707                                 $tocline = $numbering . " " . $tocline;
1708
1709                                 # Don't number the heading if it is the only one (looks silly)
1710                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1711                                         # the two are different if the line contains a link
1712                                         $headline=$numbering . " " . $headline;
1713                                 }
1714                         }
1715
1716                         # Create the anchor for linking from the TOC to the section
1717                         $anchor = $canonized_headline;
1718                         if($refcount[$headlineCount] > 1 ) {
1719                                 $anchor .= "_" . $refcount[$headlineCount];
1720                         }
1721                         if( $doShowToc ) {
1722                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1723                         }
1724                         if( $showEditLink ) {
1725                                 if ( empty( $head[$headlineCount] ) ) {
1726                                         $head[$headlineCount] = "";
1727                                 }
1728                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1729                         }
1730
1731                         # Add the edit section span
1732                         if( $rightClickHack ) {
1733                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1734                         }
1735
1736                         # give headline the correct <h#> tag
1737                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1738
1739                         $headlineCount++;
1740                 }
1741
1742                 if( $doShowToc ) {
1743                         $toclines = $headlineCount;
1744                         $toc .= $sk->tocUnindent( $toclevel );
1745                         $toc = $sk->tocTable( $toc );
1746                 }
1747
1748                 # split up and insert constructed headlines
1749
1750                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1751                 $i = 0;
1752
1753                 foreach( $blocks as $block ) {
1754                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1755                             # This is the [edit] link that appears for the top block of text when
1756                                 # section editing is enabled
1757
1758                                 # Disabled because it broke block formatting
1759                                 # For example, a bullet point in the top line
1760                                 # $full .= $sk->editSectionLink(0);
1761                         }
1762                         $full .= $block;
1763                         if( $doShowToc && !$i) {
1764                         # Top anchor now in skin
1765                                 $full = $full.$toc;
1766                         }
1767
1768                         if( !empty( $head[$i] ) ) {
1769                                 $full .= $head[$i];
1770                         }
1771                         $i++;
1772                 }
1773
1774                 return $full;
1775         }
1776
1777         /* private */ function doMagicISBN( &$tokenizer )
1778         {
1779                 global $wgLang;
1780
1781                 # Check whether next token is a text token
1782                 # If yes, fetch it and convert the text into a
1783                 # Special::BookSources link
1784                 $token = $tokenizer->previewToken();
1785                 while ( $token["type"] == "" )
1786                 {
1787                         $tokenizer->nextToken();
1788                         $token = $tokenizer->previewToken();
1789                 }
1790                 if ( $token["type"] == "text" )
1791                 {
1792                         $token = $tokenizer->nextToken();
1793                         $x = $token["text"];
1794                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1795
1796                         $isbn = $blank = "" ;
1797                         while ( " " == $x{0} ) {
1798                                 $blank .= " ";
1799                                 $x = substr( $x, 1 );
1800                         }
1801                         while ( strstr( $valid, $x{0} ) != false ) {
1802                                 $isbn .= $x{0};
1803                                 $x = substr( $x, 1 );
1804                         }
1805                         $num = str_replace( "-", "", $isbn );
1806                         $num = str_replace( " ", "", $num );
1807
1808                         if ( "" == $num ) {
1809                                 $text = "ISBN $blank$x";
1810                         } else {
1811                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1812                                 $text = "<a href=\"" .
1813                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1814                                         "\" class=\"internal\">ISBN $isbn</a>";
1815                                 $text .= $x;
1816                         }
1817                 } else {
1818                         $text = "ISBN ";
1819                 }
1820                 return $text;
1821         }
1822         /* private */ function doMagicRFC( &$tokenizer )
1823         {
1824                 global $wgLang;
1825
1826                 # Check whether next token is a text token
1827                 # If yes, fetch it and convert the text into a
1828                 # link to an RFC source
1829                 $token = $tokenizer->previewToken();
1830                 while ( $token["type"] == "" )
1831                 {
1832                         $tokenizer->nextToken();
1833                         $token = $tokenizer->previewToken();
1834                 }
1835                 if ( $token["type"] == "text" )
1836                 {
1837                         $token = $tokenizer->nextToken();
1838                         $x = $token["text"];
1839                         $valid = "0123456789";
1840
1841                         $rfc = $blank = "" ;
1842                         while ( " " == $x{0} ) {
1843                                 $blank .= " ";
1844                                 $x = substr( $x, 1 );
1845                         }
1846                         while ( strstr( $valid, $x{0} ) != false ) {
1847                                 $rfc .= $x{0};
1848                                 $x = substr( $x, 1 );
1849                         }
1850
1851                         if ( "" == $rfc ) {
1852                                 $text .= "RFC $blank$x";
1853                         } else {
1854                                 $url = wfmsg( "rfcurl" );
1855                                 $url = str_replace( "$1", $rfc, $url);
1856                                 $sk =& $this->mOptions->getSkin();
1857                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1858                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1859                         }
1860                 } else {
1861                         $text = "RFC ";
1862                 }
1863                 return $text;
1864         }
1865
1866         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1867         {
1868                 $this->mOptions = $options;
1869                 $this->mTitle =& $title;
1870                 $this->mOutputType = OT_WIKI;
1871
1872                 if ( $clearState ) {
1873                         $this->clearState();
1874                 }
1875
1876                 $stripState = false;
1877                 $pairs = array(
1878                         "\r\n" => "\n",
1879                         );
1880                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1881                 // now with regexes
1882                 $pairs = array(
1883                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1884                         "/<br *?>/i" => "<br/>",
1885                 );
1886                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1887                 $text = $this->strip( $text, $stripState, false );
1888                 $text = $this->pstPass2( $text, $user );
1889                 $text = $this->unstrip( $text, $stripState );
1890                 return $text;
1891         }
1892
1893         /* private */ function pstPass2( $text, &$user )
1894         {
1895                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1896
1897                 # Variable replacement
1898                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1899                 $text = $this->replaceVariables( $text );
1900
1901                 # Signatures
1902                 #
1903                 $n = $user->getName();
1904                 $k = $user->getOption( "nickname" );
1905                 if ( "" == $k ) { $k = $n; }
1906                 if(isset($wgLocaltimezone)) {
1907                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1908                 }
1909                 /* Note: this is an ugly timezone hack for the European wikis */
1910                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1911                   " (" . date( "T" ) . ")";
1912                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1913
1914                 $text = preg_replace( "/~~~~~/", $d, $text );
1915                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1916                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1917                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1918                   Namespace::getUser() ) . ":$n|$k]]", $text );
1919
1920                 # Context links: [[|name]] and [[name (context)|]]
1921                 #
1922                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1923                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1924                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1925                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1926
1927                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1928                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1929                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1930                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1931                                                                                                                 # [[ns:page (cont)|]]
1932                 $context = "";
1933                 $t = $this->mTitle->getText();
1934                 if ( preg_match( $conpat, $t, $m ) ) {
1935                         $context = $m[2];
1936                 }
1937                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1938                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1939                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1940
1941                 if ( "" == $context ) {
1942                         $text = preg_replace( $p2, "[[\\1]]", $text );
1943                 } else {
1944                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1945                 }
1946
1947                 /*
1948                 $mw =& MagicWord::get( MAG_SUBST );
1949                 $wgCurParser = $this->fork();
1950                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1951                 $this->merge( $wgCurParser );
1952                 */
1953
1954                 # Trim trailing whitespace
1955                 # MAG_END (__END__) tag allows for trailing
1956                 # whitespace to be deliberately included
1957                 $text = rtrim( $text );
1958                 $mw =& MagicWord::get( MAG_END );
1959                 $mw->matchAndRemove( $text );
1960
1961                 return $text;
1962         }
1963
1964         # Set up some variables which are usually set up in parse()
1965         # so that an external function can call some class members with confidence
1966         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1967         {
1968                 $this->mTitle =& $title;
1969                 $this->mOptions = $options;
1970                 $this->mOutputType = $outputType;
1971                 if ( $clearState ) {
1972                         $this->clearState();
1973                 }
1974         }
1975
1976         function transformMsg( $text, $options ) {
1977                 global $wgTitle;
1978                 static $executing = false;
1979
1980                 # Guard against infinite recursion
1981                 if ( $executing ) {
1982                         return $text;
1983                 }
1984                 $executing = true;
1985
1986                 $this->mTitle = $wgTitle;
1987                 $this->mOptions = $options;
1988                 $this->mOutputType = OT_MSG;
1989                 $this->clearState();
1990                 $text = $this->replaceVariables( $text );
1991
1992                 $executing = false;
1993                 return $text;
1994         }
1995 }
1996
1997 class ParserOutput
1998 {
1999         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2000
2001         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2002                 $containsOldMagic = false )
2003         {
2004                 $this->mText = $text;
2005                 $this->mLanguageLinks = $languageLinks;
2006                 $this->mCategoryLinks = $categoryLinks;
2007                 $this->mContainsOldMagic = $containsOldMagic;
2008         }
2009
2010         function getText() { return $this->mText; }
2011         function getLanguageLinks() { return $this->mLanguageLinks; }
2012         function getCategoryLinks() { return $this->mCategoryLinks; }
2013         function containsOldMagic() { return $this->mContainsOldMagic; }
2014         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2015         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2016         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2017         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2018
2019         function merge( $other ) {
2020                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2021                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2022                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2023         }
2024
2025 }
2026
2027 class ParserOptions
2028 {
2029         # All variables are private
2030         var $mUseTeX;                    # Use texvc to expand <math> tags
2031         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2032         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2033         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2034         var $mAllowExternalImages;       # Allow external images inline
2035         var $mSkin;                      # Reference to the preferred skin
2036         var $mDateFormat;                # Date format index
2037         var $mEditSection;               # Create "edit section" links
2038         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2039         var $mNumberHeadings;            # Automatically number headings
2040         var $mShowToc;                   # Show table of contents
2041
2042         function getUseTeX() { return $this->mUseTeX; }
2043         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2044         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2045         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2046         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2047         function getSkin() { return $this->mSkin; }
2048         function getDateFormat() { return $this->mDateFormat; }
2049         function getEditSection() { return $this->mEditSection; }
2050         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2051         function getNumberHeadings() { return $this->mNumberHeadings; }
2052         function getShowToc() { return $this->mShowToc; }
2053
2054         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2055         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2056         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2057         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2058         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2059         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2060         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2061         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2062         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2063         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2064         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2065
2066         /* static */ function newFromUser( &$user )
2067         {
2068                 $popts = new ParserOptions;
2069                 $popts->initialiseFromUser( $user );
2070                 return $popts;
2071         }
2072
2073         function initialiseFromUser( &$userInput )
2074         {
2075                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2076
2077                 if ( !$userInput ) {
2078                         $user = new User;
2079                         $user->setLoaded( true );
2080                 } else {
2081                         $user =& $userInput;
2082                 }
2083
2084                 $this->mUseTeX = $wgUseTeX;
2085                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2086                 $this->mUseDynamicDates = $wgUseDynamicDates;
2087                 $this->mInterwikiMagic = $wgInterwikiMagic;
2088                 $this->mAllowExternalImages = $wgAllowExternalImages;
2089                 $this->mSkin =& $user->getSkin();
2090                 $this->mDateFormat = $user->getOption( "date" );
2091                 $this->mEditSection = $user->getOption( "editsection" );
2092                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2093                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2094                 $this->mShowToc = $user->getOption( "showtoc" );
2095         }
2096
2097
2098 }
2099
2100 # Regex callbacks, used in Parser::replaceVariables
2101 function wfBraceSubstitution( $matches )
2102 {
2103         global $wgCurParser;
2104         return $wgCurParser->braceSubstitution( $matches );
2105 }
2106
2107 ?>