includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         include_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 $fname = "Parser::parse";
  90                 wfProfileIn( $fname );
  91
  92                 if ( $clearState ) {
  93                         $this->clearState();
  94                 }
  95
  96                 $this->mOptions = $options;
  97                 $this->mTitle =& $title;
  98                 $this->mOutputType = OT_HTML;
  99
 100                 $stripState = NULL;
 101                 $text = $this->strip( $text, $this->mStripState );
 102                 $text = $this->internalParse( $text, $linestart );
 103                 $text = $this->unstrip( $text, $this->mStripState );
 104                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 105                 $fixtags = array(
 106                         "/<hr *>/i" => '<hr/>',
 107                         "/<br *>/i" => '<br/>',
 108                         "/<center *>/i"=>'<div class="center">',
 109                         "/<\\/center *>/i" => '</div>',
 110                         # Clean up spare ampersands; note that we probably ought to be
 111                         # more careful about named entities.
 112                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 113                 );
 114                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 115
 116                 # only once and last
 117                 $text = $this->doBlockLevels( $text, $linestart );
 118
 119                 $this->mOutput->setText( $text );
 120                 wfProfileOut( $fname );
 121                 return $this->mOutput;
 122         }
 123
 124         /* static */ function getRandomString()
 125         {
 126                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 127         }
 128
 129         # Replaces all occurrences of <$tag>content</$tag> in the text
 130         # with a random marker and returns the new text. the output parameter
 131         # $content will be an associative array filled with data on the form
 132         # $unique_marker => content.
 133
 134         # If $content is already set, the additional entries will be appended
 135
 136         # If $tag is set to STRIP_COMMENTS, the function will extract
 137         # <!-- HTML comments -->
 138
 139         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 140                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 141                 if ( !$content ) {
 142                         $content = array( );
 143                 }
 144                 $n = 1;
 145                 $stripped = "";
 146
 147                 while ( "" != $text ) {
 148                         if($tag==STRIP_COMMENTS) {
 149                                 $p = preg_split( "/<!--/i", $text, 2 );
 150                         } else {
 151                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 152                         }
 153                         $stripped .= $p[0];
 154                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 155                                 $text = "";
 156                         } else {
 157                                 if($tag==STRIP_COMMENTS) {
 158                                         $q = preg_split( "/-->/i", $p[1], 2 );
 159                                 } else {
 160                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 161                                 }
 162                                 $marker = $rnd . sprintf("%08X", $n++);
 163                                 $content[$marker] = $q[0];
 164                                 $stripped .= $marker;
 165                                 $text = $q[1];
 166                         }
 167                 }
 168                 return $stripped;
 169         }
 170
 171         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 172         # If $render is set, performs necessary rendering operations on plugins
 173         # Returns the text, and fills an array with data needed in unstrip()
 174         # If the $state is already a valid strip state, it adds to the state
 175
 176         # When $stripcomments is set, HTML comments <!-- like this -->
 177         # will be stripped in addition to other tags. This is important
 178         # for section editing, where these comments cause confusion when
 179         # counting the sections in the wikisource
 180         function strip( $text, &$state, $stripcomments = false )
 181         {
 182                 $render = ($this->mOutputType == OT_HTML);
 183                 $nowiki_content = array();
 184                 $hiero_content = array();
 185                 $math_content = array();
 186                 $pre_content = array();
 187                 $comment_content = array();
 188
 189                 # Replace any instances of the placeholders
 190                 $uniq_prefix = UNIQ_PREFIX;
 191                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 192
 193                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 194                 foreach( $nowiki_content as $marker => $content ){
 195                         if( $render ){
 196                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 197                         } else {
 198                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 199                         }
 200                 }
 201
 202                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 203                 foreach( $hiero_content as $marker => $content ){
 204                         if( $render && $GLOBALS['wgUseWikiHiero']){
 205                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 206                         } else {
 207                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 208                         }
 209                 }
 210
 211                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 212                 foreach( $math_content as $marker => $content ){
 213                         if( $render && $this->mOptions->getUseTeX() ){
 214                                 $math_content[$marker] = renderMath( $content );
 215                         } else {
 216                                 $math_content[$marker] = "<math>$content</math>";
 217                         }
 218                 }
 219
 220                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 221                 foreach( $pre_content as $marker => $content ){
 222                         if( $render ){
 223                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 224                         } else {
 225                                 $pre_content[$marker] = "<pre>$content</pre>";
 226                         }
 227                 }
 228                 if($stripcomments) {
 229                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 230                         foreach( $comment_content as $marker => $content ){
 231                                 $comment_content[$marker] = "<!--$content-->";
 232                         }
 233                 }
 234
 235                 # Merge state with the pre-existing state, if there is one
 236                 if ( $state ) {
 237                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 238                         $state['hiero'] = $state['hiero'] + $hiero_content;
 239                         $state['math'] = $state['math'] + $math_content;
 240                         $state['pre'] = $state['pre'] + $pre_content;
 241                         $state['comment'] = $state['comment'] + $comment_content;
 242                 } else {
 243                         $state = array(
 244                           'nowiki' => $nowiki_content,
 245                           'hiero' => $hiero_content,
 246                           'math' => $math_content,
 247                           'pre' => $pre_content,
 248                           'comment' => $comment_content
 249                         );
 250                 }
 251                 return $text;
 252         }
 253
 254         function unstrip( $text, &$state )
 255         {
 256                 # Must expand in reverse order, otherwise nested tags will be corrupted
 257                 $contentDict = end( $state );
 258                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 259                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 260                                 $text = str_replace( key( $contentDict ), $content, $text );
 261                         }
 262                 }
 263
 264                 return $text;
 265         }
 266
 267         # Add an item to the strip state
 268         # Returns the unique tag which must be inserted into the stripped text
 269         # The tag will be replaced with the original text in unstrip()
 270
 271         function insertStripItem( $text, &$state )
 272         {
 273                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 274                 if ( !$state ) {
 275                         $state = array(
 276                           'nowiki' => array(),
 277                           'hiero' => array(),
 278                           'math' => array(),
 279                           'pre' => array()
 280                         );
 281                 }
 282                 $state['item'][$rnd] = $text;
 283                 return $rnd;
 284         }
 285
 286         # This method generates the list of subcategories and pages for a category
 287         function categoryMagic ()
 288         {
 289                 global $wgLang , $wgUser ;
 290                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 291
 292                 $cns = Namespace::getCategory() ;
 293                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 294
 295                 $r = "<br style=\"clear:both;\"/>\n";
 296
 297
 298                 $sk =& $wgUser->getSkin() ;
 299
 300                 $articles = array() ;
 301                 $children = array() ;
 302                 $data = array () ;
 303                 $id = $this->mTitle->getArticleID() ;
 304
 305                 # For existing categories
 306                 if( $id ) {
 307                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 308                         $res = wfQuery ( $sql, DB_READ ) ;
 309                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 310                 } else {
 311                         # For non-existing categories
 312                         $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
 313                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
 314                         $res = wfQuery ( $sql, DB_READ ) ;
 315                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 316                 }
 317
 318                 # For all pages that link to this category
 319                 foreach ( $data AS $x )
 320                 {
 321                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 322                         if ( $t != "" ) $t .= ":" ;
 323                         $t .= $x->cur_title ;
 324
 325                         if ( $x->cur_namespace == $cns ) {
 326                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 327                         } else {
 328                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 329                         }
 330                 }
 331                 wfFreeResult ( $res ) ;
 332
 333                 # Showing subcategories
 334                 if ( count ( $children ) > 0 )
 335                 {
 336                         asort ( $children ) ;
 337                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 338                         $r .= implode ( ", " , $children ) ;
 339                 }
 340
 341                 # Showing pages in this category
 342                 if ( count ( $articles ) > 0 )
 343                 {
 344                         $ti = $this->mTitle->getText() ;
 345                         asort ( $articles ) ;
 346                         $h =  wfMsg( "category_header", $ti );
 347                         $r .= "<h2>{$h}</h2>\n" ;
 348                         $r .= implode ( ", " , $articles ) ;
 349                 }
 350
 351
 352                 return $r ;
 353         }
 354
 355         function getHTMLattrs ()
 356         {
 357                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 358                                 "title", "align", "lang", "dir", "width", "height",
 359                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 360                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 361                                 /* FONT */ "type", "start", "value", "compact",
 362                                 /* For various lists, mostly deprecated but safe */
 363                                 "summary", "width", "border", "frame", "rules",
 364                                 "cellspacing", "cellpadding", "valign", "char",
 365                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 366                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 367                                 "id", "class", "name", "style" /* For CSS */
 368                                 );
 369                 return $htmlattrs ;
 370         }
 371
 372         function fixTagAttributes ( $t )
 373         {
 374                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 375                 $htmlattrs = $this->getHTMLattrs() ;
 376
 377                 # Strip non-approved attributes from the tag
 378                 $t = preg_replace(
 379                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 380                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 381                         $t);
 382                 # Strip javascript "expression" from stylesheets. Brute force approach:
 383                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 384
 385                 if( preg_match(
 386                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 387                         wfMungeToUtf8( $t ) ) )
 388                 {
 389                         $t="";
 390                 }
 391
 392                 return trim ( $t ) ;
 393         }
 394
 395         function doTableStuff ( $t )
 396         {
 397                 $t = explode ( "\n" , $t ) ;
 398                 $td = array () ; # Is currently a td tag open?
 399                         $ltd = array () ; # Was it TD or TH?
 400                         $tr = array () ; # Is currently a tr tag open?
 401                         $ltr = array () ; # tr attributes
 402                         foreach ( $t AS $k => $x )
 403                         {
 404                                 $x = trim ( $x ) ;
 405                                 $fc = substr ( $x , 0 , 1 ) ;
 406                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 407                                 {
 408                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 409                                         array_push ( $td , false ) ;
 410                                         array_push ( $ltd , "" ) ;
 411                                         array_push ( $tr , false ) ;
 412                                         array_push ( $ltr , "" ) ;
 413                                 }
 414                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 415                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 416                                 {
 417                                         $z = "</table>\n" ;
 418                                         $l = array_pop ( $ltd ) ;
 419                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 420                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 421                                         array_pop ( $ltr ) ;
 422                                         $t[$k] = $z ;
 423                                 }
 424                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 425                                                 {
 426                                                 $z = trim ( substr ( $x , 2 ) ) ;
 427                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 428                                                 }*/
 429                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 430                                 {
 431                                         $x = substr ( $x , 1 ) ;
 432                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 433                                         $z = "" ;
 434                                         $l = array_pop ( $ltd ) ;
 435                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 436                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 437                                         array_pop ( $ltr ) ;
 438                                         $t[$k] = $z ;
 439                                         array_push ( $tr , false ) ;
 440                                         array_push ( $td , false ) ;
 441                                         array_push ( $ltd , "" ) ;
 442                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 443                                 }
 444                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 445                                 {
 446                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 447                                         {
 448                                                 $fc = "+" ;
 449                                                 $x = substr ( $x , 1 ) ;
 450                                         }
 451                                         $after = substr ( $x , 1 ) ;
 452                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 453                                         $after = explode ( "||" , $after ) ;
 454                                         $t[$k] = "" ;
 455                                         foreach ( $after AS $theline )
 456                                         {
 457                                                 $z = "" ;
 458                                                 if ( $fc != "+" )
 459                                                 {
 460                                                         $tra = array_pop ( $ltr ) ;
 461                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 462                                                         array_push ( $tr , true ) ;
 463                                                         array_push ( $ltr , "" ) ;
 464                                                 }
 465
 466                                                 $l = array_pop ( $ltd ) ;
 467                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 468                                                 if ( $fc == "|" ) $l = "td" ;
 469                                                 else if ( $fc == "!" ) $l = "th" ;
 470                                                 else if ( $fc == "+" ) $l = "caption" ;
 471                                                 else $l = "" ;
 472                                                 array_push ( $ltd , $l ) ;
 473                                                 $y = explode ( "|" , $theline , 2 ) ;
 474                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 475                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 476                                                 $t[$k] .= $y ;
 477                                                 array_push ( $td , true ) ;
 478                                         }
 479                                 }
 480                         }
 481
 482                 # Closing open td, tr && table
 483                 while ( count ( $td ) > 0 )
 484                 {
 485                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 486                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 487                         $t[] = "</table>" ;
 488                 }
 489
 490                 $t = implode ( "\n" , $t ) ;
 491                 #               $t = $this->removeHTMLtags( $t );
 492                 return $t ;
 493         }
 494
 495         function internalParse( $text, $linestart, $args = array() )
 496         {
 497                 $fname = "Parser::internalParse";
 498                 wfProfileIn( $fname );
 499
 500                 $text = $this->removeHTMLtags( $text );
 501                 $text = $this->replaceVariables( $text, $args );
 502
 503                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 504
 505                 $text = $this->doHeadings( $text );
 506                 if($this->mOptions->getUseDynamicDates()) {
 507                         global $wgDateFormatter;
 508                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 509                 }
 510                 $text = $this->replaceExternalLinks( $text );
 511                 $text = $this->doTokenizedParser ( $text );
 512                 $text = $this->doTableStuff ( $text ) ;
 513                 $text = $this->formatHeadings( $text );
 514                 $sk =& $this->mOptions->getSkin();
 515                 $text = $sk->transformContent( $text );
 516
 517                 if ( !isset ( $this->categoryMagicDone ) ) {
 518                    $text .= $this->categoryMagic () ;
 519                    $this->categoryMagicDone = true ;
 520                    }
 521
 522                 wfProfileOut( $fname );
 523                 return $text;
 524         }
 525
 526
 527         /* private */ function doHeadings( $text )
 528         {
 529                 for ( $i = 6; $i >= 1; --$i ) {
 530                         $h = substr( "======", 0, $i );
 531                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 532                           "<h{$i}>\\1</h{$i}>\\2", $text );
 533                 }
 534                 return $text;
 535         }
 536
 537         # Note: we have to do external links before the internal ones,
 538         # and otherwise take great care in the order of things here, so
 539         # that we don't end up interpreting some URLs twice.
 540
 541         /* private */ function replaceExternalLinks( $text )
 542         {
 543                 $fname = "Parser::replaceExternalLinks";
 544                 wfProfileIn( $fname );
 545                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 546                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 547                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 548                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 549                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 550                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 551                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 552                 wfProfileOut( $fname );
 553                 return $text;
 554         }
 555
 556         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 557         {
 558                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 559                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 560
 561                 # this is  the list of separators that should be ignored if they
 562                 # are the last character of an URL but that should be included
 563                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 564                 # in this case, the last comma should not become part of the URL,
 565                 # but in "www.foo.com/123,2342,32.htm" it should.
 566                 $sep = ",;\.:";
 567                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 568                 $images = "gif|png|jpg|jpeg";
 569
 570                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 571                 # they are interpreted as part of the string (used to tell PHP
 572                 # that the content of the string should be inserted there).
 573                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 574                   "((?i){$images})([^{$uc}]|$)/";
 575
 576                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 577                 $sk =& $this->mOptions->getSkin();
 578
 579                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 580                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 581                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 582                 }
 583                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 584                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 585                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 586                   "</a>\\5", $s );
 587                 $s = str_replace( $unique, $protocol, $s );
 588
 589                 $a = explode( "[{$protocol}:", " " . $s );
 590                 $s = array_shift( $a );
 591                 $s = substr( $s, 1 );
 592
 593                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 594                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 595
 596                 foreach ( $a as $line ) {
 597                         if ( preg_match( $e1, $line, $m ) ) {
 598                                 $link = "{$protocol}:{$m[1]}";
 599                                 $trail = $m[2];
 600                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 601                                 else { $text = wfEscapeHTML( $link ); }
 602                         } else if ( preg_match( $e2, $line, $m ) ) {
 603                                 $link = "{$protocol}:{$m[1]}";
 604                                 $text = $m[2];
 605                                 $trail = $m[3];
 606                         } else {
 607                                 $s .= "[{$protocol}:" . $line;
 608                                 continue;
 609                         }
 610                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 611                                 $paren = "";
 612                         } else {
 613                                 # Expand the URL for printable version
 614                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 615                         }
 616                         $la = $sk->getExternalLinkAttributes( $link, $text );
 617                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 618
 619                 }
 620                 return $s;
 621         }
 622
 623         /* private */ function handle3Quotes( &$state, $token )
 624         {
 625                 if ( $state["strong"] !== false ) {
 626                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 627                         {
 628                                 # ''' lala ''lala '''
 629                                 $s = "</em></strong><em>";
 630                         } else {
 631                                 $s = "</strong>";
 632                         }
 633                         $state["strong"] = FALSE;
 634                 } else {
 635                         $s = "<strong>";
 636                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 637                 }
 638                 return $s;
 639         }
 640
 641         /* private */ function handle2Quotes( &$state, $token )
 642         {
 643                 if ( $state["em"] !== false ) {
 644                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 645                         {
 646                                 # ''lala'''lala'' ....'''
 647                                 $s = "</strong></em><strong>";
 648                         } else {
 649                                 $s = "</em>";
 650                         }
 651                         $state["em"] = FALSE;
 652                 } else {
 653                         $s = "<em>";
 654                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 655
 656                 }
 657                 return $s;
 658         }
 659
 660         /* private */ function handle5Quotes( &$state, $token )
 661         {
 662                 $s = "";
 663                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 664                         if ( $state["em"] < $state["strong"] ) {
 665                                 $s .= "</strong></em>";
 666                         } else {
 667                                 $s .= "</em></strong>";
 668                         }
 669                         $state["strong"] = $state["em"] = FALSE;
 670                 } elseif ( $state["em"] !== false ) {
 671                         $s .= "</em><strong>";
 672                         $state["em"] = FALSE;
 673                         $state["strong"] = $token["pos"];
 674                 } elseif ( $state["strong"] !== false ) {
 675                         $s .= "</strong><em>";
 676                         $state["strong"] = FALSE;
 677                         $state["em"] = $token["pos"];
 678                 } else { # not $em and not $strong
 679                         $s .= "<strong><em>";
 680                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 681                 }
 682                 return $s;
 683         }
 684
 685         /* private */ function doTokenizedParser( $str )
 686         {
 687                 global $wgLang; # for language specific parser hook
 688                 global $wgUploadDirectory, $wgUseTimeline;
 689
 690                 $tokenizer=Tokenizer::newFromString( $str );
 691                 $tokenStack = array();
 692
 693                 $s="";
 694                 $state["em"]      = FALSE;
 695                 $state["strong"]  = FALSE;
 696                 $tagIsOpen = FALSE;
 697                 $threeopen = false;
 698
 699                 # The tokenizer splits the text into tokens and returns them one by one.
 700                 # Every call to the tokenizer returns a new token.
 701                 while ( $token = $tokenizer->nextToken() )
 702                 {
 703                         switch ( $token["type"] )
 704                         {
 705                                 case "text":
 706                                         # simple text with no further markup
 707                                         $txt = $token["text"];
 708                                         break;
 709                                 case "blank":
 710                                         # Text that contains blanks that have to be converted to
 711                                         # non-breakable spaces for French.
 712                                         # U+202F NARROW NO-BREAK SPACE might be a better choice, but
 713                                         # browser support for Unicode spacing is poor.
 714                                         $txt = str_replace( " ", "&nbsp;", $token["text"] );
 715                                         break;
 716                                 case "[[[":
 717                                         # remember the tag opened with 3 [
 718                                         $threeopen = true;
 719                                 case "[[":
 720                                         # link opening tag.
 721                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 722                                         $tagIsOpen = TRUE;
 723                                         array_push( $tokenStack, $token );
 724                                         $txt="";
 725                                         break;
 726
 727                                 case "]]]":
 728                                 case "]]":
 729                                         # link close tag.
 730                                         # get text from stack, glue it together, and call the code to handle a
 731                                         # link
 732
 733                                         if ( count( $tokenStack ) == 0 )
 734                                         {
 735                                                 # stack empty. Found a ]] without an opening [[
 736                                                 $txt = "]]";
 737                                         } else {
 738                                                 $linkText = "";
 739                                                 $lastToken = array_pop( $tokenStack );
 740                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 741                                                 {
 742                                                         if( !empty( $lastToken["text"] ) ) {
 743                                                                 $linkText = $lastToken["text"] . $linkText;
 744                                                         }
 745                                                         $lastToken = array_pop( $tokenStack );
 746                                                 }
 747
 748                                                 $txt = $linkText ."]]";
 749
 750                                                 if( isset( $lastToken["text"] ) ) {
 751                                                         $prefix = $lastToken["text"];
 752                                                 } else {
 753                                                         $prefix = "";
 754                                                 }
 755                                                 $nextToken = $tokenizer->previewToken();
 756                                                 if ( $nextToken["type"] == "text" )
 757                                                 {
 758                                                         # Preview just looks at it. Now we have to fetch it.
 759                                                         $nextToken = $tokenizer->nextToken();
 760                                                         $txt .= $nextToken["text"];
 761                                                 }
 762                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 763
 764                                                 # did the tag start with 3 [ ?
 765                                                 if($threeopen) {
 766                                                         # show the first as text
 767                                                         $txt = "[".$txt;
 768                                                         $threeopen=false;
 769                                                 }
 770
 771                                         }
 772                                         $tagIsOpen = (count( $tokenStack ) != 0);
 773                                         break;
 774                                 case "----":
 775                                         $txt = "\n<hr />\n";
 776                                         break;
 777                                 case "'''":
 778                                         # This and the three next ones handle quotes
 779                                         $txt = $this->handle3Quotes( $state, $token );
 780                                         break;
 781                                 case "''":
 782                                         $txt = $this->handle2Quotes( $state, $token );
 783                                         break;
 784                                 case "'''''":
 785                                         $txt = $this->handle5Quotes( $state, $token );
 786                                         break;
 787                                 case "":
 788                                         # empty token
 789                                         $txt="";
 790                                         break;
 791                                 case "RFC ":
 792                                         if ( $tagIsOpen ) {
 793                                                 $txt = "RFC ";
 794                                         } else {
 795                                                 $txt = $this->doMagicRFC( $tokenizer );
 796                                         }
 797                                         break;
 798                                 case "ISBN ":
 799                                         if ( $tagIsOpen ) {
 800                                                 $txt = "ISBN ";
 801                                         } else {
 802                                                 $txt = $this->doMagicISBN( $tokenizer );
 803                                         }
 804                                         break;
 805                                 case "<timeline>":
 806                                         if ( $wgUseTimeline &&
 807                                              "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
 808                                         {
 809                                                 $txt = renderTimeline( $timelinesrc );
 810                                         } else {
 811                                                 $txt=$token["text"];
 812                                         }
 813                                         break;
 814                                 default:
 815                                         # Call language specific Hook.
 816                                         $txt = $wgLang->processToken( $token, $tokenStack );
 817                                         if ( NULL == $txt ) {
 818                                                 # An unkown token. Highlight.
 819                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 820                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 821                                         }
 822                                         break;
 823                         }
 824                         # If we're parsing the interior of a link, don't append the interior to $s,
 825                         # but push it to the stack so it can be processed when a ]] token is found.
 826                         if ( $tagIsOpen  && $txt != "" ) {
 827                                 $token["type"] = "text";
 828                                 $token["text"] = $txt;
 829                                 array_push( $tokenStack, $token );
 830                         } else {
 831                                 $s .= $txt;
 832                         }
 833                 } #end while
 834                 if ( count( $tokenStack ) != 0 )
 835                 {
 836                         # still objects on stack. opened [[ tag without closing ]] tag.
 837                         $txt = "";
 838                         while ( $lastToken = array_pop( $tokenStack ) )
 839                         {
 840                                 if ( $lastToken["type"] == "text" )
 841                                 {
 842                                         $txt = $lastToken["text"] . $txt;
 843                                 } else {
 844                                         $txt = $lastToken["type"] . $txt;
 845                                 }
 846                         }
 847                         $s .= $txt;
 848                 }
 849                 return $s;
 850         }
 851
 852         /* private */ function handleInternalLink( $line, $prefix )
 853         {
 854                 global $wgLang, $wgLinkCache;
 855                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 856                 static $fname = "Parser::handleInternalLink" ;
 857                 wfProfileIn( $fname );
 858
 859                 wfProfileIn( "$fname-setup" );
 860                 static $tc = FALSE;
 861                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 862                 $sk =& $this->mOptions->getSkin();
 863
 864                 # Match a link having the form [[namespace:link|alternate]]trail
 865                 static $e1 = FALSE;
 866                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 867                 # Match the end of a line for a word that's not followed by whitespace,
 868                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 869                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 870                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 871                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 872
 873
 874                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 875                 static $image = FALSE;
 876                 static $special = FALSE;
 877                 static $media = FALSE;
 878                 static $category = FALSE;
 879                 if ( !$image ) { $image = Namespace::getImage(); }
 880                 if ( !$special ) { $special = Namespace::getSpecial(); }
 881                 if ( !$media ) { $media = Namespace::getMedia(); }
 882                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 883
 884                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 885
 886                 wfProfileOut( "$fname-setup" );
 887                 $s = "";
 888
 889                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 890                         $text = $m[2];
 891                         $trail = $m[3];
 892                 } else { # Invalid form; output directly
 893                         $s .= $prefix . "[[" . $line ;
 894                         return $s;
 895                 }
 896
 897                 /* Valid link forms:
 898                 Foobar -- normal
 899                 :Foobar -- override special treatment of prefix (images, language links)
 900                 /Foobar -- convert to CurrentPage/Foobar
 901                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 902                 */
 903                 $c = substr($m[1],0,1);
 904                 $noforce = ($c != ":");
 905                 if( $c == "/" ) { # subpage
 906                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 907                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 908                                 $noslash=$m[1];
 909                         } else {
 910                                 $noslash=substr($m[1],1);
 911                         }
 912                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 913                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 914                                 if( "" == $text ) {
 915                                         $text= $m[1];
 916                                 } # this might be changed for ugliness reasons
 917                         } else {
 918                                 $link = $noslash; # no subpage allowed, use standard link
 919                         }
 920                 } elseif( $noforce ) { # no subpage
 921                         $link = $m[1];
 922                 } else {
 923                         $link = substr( $m[1], 1 );
 924                 }
 925                 if( "" == $text )
 926                         $text = $link;
 927
 928                 $nt = Title::newFromText( $link );
 929                 if( !$nt ) {
 930                         $s .= $prefix . "[[" . $line;
 931                         return $s;
 932                 }
 933                 $ns = $nt->getNamespace();
 934                 $iw = $nt->getInterWiki();
 935                 if( $noforce ) {
 936                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 937                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 938                                 return (trim($s) == '')? '': $s;
 939                         }
 940                         if( $ns == $image ) {
 941                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 942                                 $wgLinkCache->addImageLinkObj( $nt );
 943                                 return $s;
 944                         }
 945                         if ( $ns == $category ) {
 946                                 $t = $nt->getText() ;
 947                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 948                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 949                                 $this->mOutput->mCategoryLinks[] = $t ;
 950                                 $s .= $prefix . $trail ;
 951                                 return $s ;
 952                         }
 953                 }
 954                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 955                     ( strpos( $link, "#" ) == FALSE ) ) {
 956                         # Self-links are handled specially; generally de-link and change to bold.
 957                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 958                         return $s;
 959                 }
 960
 961                 if( $ns == $media ) {
 962                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 963                         $wgLinkCache->addImageLinkObj( $nt );
 964                         return $s;
 965                 } elseif( $ns == $special ) {
 966                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 967                         return $s;
 968                 }
 969                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 970
 971                 wfProfileOut( $fname );
 972                 return $s;
 973         }
 974
 975         # Some functions here used by doBlockLevels()
 976         #
 977         /* private */ function closeParagraph()
 978         {
 979                 $result = "";
 980                 if ( '' != $this->mLastSection ) {
 981                         $result = "</" . $this->mLastSection  . ">\n";
 982                 }
 983                 $this->mInPre = false;
 984                 $this->mLastSection = "";
 985                 return $result;
 986         }
 987         # getCommon() returns the length of the longest common substring
 988         # of both arguments, starting at the beginning of both.
 989         #
 990         /* private */ function getCommon( $st1, $st2 )
 991         {
 992                 $fl = strlen( $st1 );
 993                 $shorter = strlen( $st2 );
 994                 if ( $fl < $shorter ) { $shorter = $fl; }
 995
 996                 for ( $i = 0; $i < $shorter; ++$i ) {
 997                         if ( $st1{$i} != $st2{$i} ) { break; }
 998                 }
 999                 return $i;
1000         }
1001         # These next three functions open, continue, and close the list
1002         # element appropriate to the prefix character passed into them.
1003         #
1004         /* private */ function openList( $char )
1005     {
1006                 $result = $this->closeParagraph();
1007
1008                 if ( "*" == $char ) { $result .= "<ul><li>"; }
1009                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
1010                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
1011                 else if ( ";" == $char ) {
1012                         $result .= "<dl><dt>";
1013                         $this->mDTopen = true;
1014                 }
1015                 else { $result = "<!-- ERR 1 -->"; }
1016
1017                 return $result;
1018         }
1019
1020         /* private */ function nextItem( $char )
1021         {
1022                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1023                 else if ( ":" == $char || ";" == $char ) {
1024                         $close = "</dd>";
1025                         if ( $this->mDTopen ) { $close = "</dt>"; }
1026                         if ( ";" == $char ) {
1027                                 $this->mDTopen = true;
1028                                 return $close . "<dt>";
1029                         } else {
1030                                 $this->mDTopen = false;
1031                                 return $close . "<dd>";
1032                         }
1033                 }
1034                 return "<!-- ERR 2 -->";
1035         }
1036
1037         /* private */function closeList( $char )
1038         {
1039                 if ( "*" == $char ) { $text = "</li></ul>"; }
1040                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1041                 else if ( ":" == $char ) {
1042                         if ( $this->mDTopen ) {
1043                                 $this->mDTopen = false;
1044                                 $text = "</dt></dl>";
1045                         } else {
1046                                 $text = "</dd></dl>";
1047                         }
1048                 }
1049                 else {  return "<!-- ERR 3 -->"; }
1050                 return $text."\n";
1051         }
1052
1053         /* private */ function doBlockLevels( $text, $linestart ) {
1054                 $fname = "Parser::doBlockLevels";
1055                 wfProfileIn( $fname );
1056
1057                 # Parsing through the text line by line.  The main thing
1058                 # happening here is handling of block-level elements p, pre,
1059                 # and making lists from lines starting with * # : etc.
1060                 #
1061                 $textLines = explode( "\n", $text );
1062
1063                 $lastPrefix = $output = $lastLine = '';
1064                 $this->mDTopen = $inBlockElem = false;
1065                 $prefixLength = 0;
1066                 $paragraphStack = false;
1067
1068                 if ( !$linestart ) {
1069                         $output .= array_shift( $textLines );
1070                 }
1071                 foreach ( $textLines as $oLine ) {
1072                         $lastPrefixLength = strlen( $lastPrefix );
1073                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1074                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1075                         if (!$this->mInPre) {
1076                                 $this->mInPre = !empty($preOpenMatch);
1077                         }
1078                         if ( !$this->mInPre ) {
1079                                 # Multiple prefixes may abut each other for nested lists.
1080                                 $prefixLength = strspn( $oLine, "*#:;" );
1081                                 $pref = substr( $oLine, 0, $prefixLength );
1082
1083                                 # eh?
1084                                 $pref2 = str_replace( ";", ":", $pref );
1085                                 $t = substr( $oLine, $prefixLength );
1086                         } else {
1087                                 # Don't interpret any other prefixes in preformatted text
1088                                 $prefixLength = 0;
1089                                 $pref = $pref2 = '';
1090                                 $t = $oLine;
1091                         }
1092
1093                         # List generation
1094                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1095                                 # Same as the last item, so no need to deal with nesting or opening stuff
1096                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1097                                 $paragraphStack = false;
1098
1099                                 if ( ";" == substr( $pref, -1 ) ) {
1100                                         # The one nasty exception: definition lists work like this:
1101                                         # ; title : definition text
1102                                         # So we check for : in the remainder text to split up the
1103                                         # title and definition, without b0rking links.
1104                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1105                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1106                                                 $term = $match[1];
1107                                                 $output .= $term . $this->nextItem( ":" );
1108                                                 $t = $match[2];
1109                                         }
1110                                 }
1111                         } elseif( $prefixLength || $lastPrefixLength ) {
1112                                 # Either open or close a level...
1113                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1114                                 $paragraphStack = false;
1115
1116                                 while( $commonPrefixLength < $lastPrefixLength ) {
1117                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1118                                         --$lastPrefixLength;
1119                                 }
1120                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1121                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1122                                 }
1123                                 while ( $prefixLength > $commonPrefixLength ) {
1124                                         $char = substr( $pref, $commonPrefixLength, 1 );
1125                                         $output .= $this->openList( $char );
1126
1127                                         if ( ";" == $char ) {
1128                                                 # FIXME: This is dupe of code above
1129                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1130                                                         $term = $match[1];
1131                                                         $output .= $term . $this->nextItem( ":" );
1132                                                         $t = $match[2];
1133                                                 }
1134                                         }
1135                                         ++$commonPrefixLength;
1136                                 }
1137                                 $lastPrefix = $pref2;
1138                         }
1139                         if( 0 == $prefixLength ) {
1140                                 # No prefix (not in list)--go to paragraph mode
1141                                 $uniq_prefix = UNIQ_PREFIX;
1142                                 // XXX: use a stack for nestable elements like span, table and div
1143                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1144                                 $closematch = preg_match(
1145                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1146                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1147                                 if ( $openmatch or $closematch ) {
1148                                         $paragraphStack = false;
1149                                         $output .= $this->closeParagraph();
1150                                         if($preOpenMatch and !$preCloseMatch) {
1151                                                 $this->mInPre = true;
1152                                         }
1153                                         if ( $closematch  ) {
1154                                                 $inBlockElem = false;
1155                                         } else {
1156                                                 $inBlockElem = true;
1157                                         }
1158                                 } else if ( !$inBlockElem ) {
1159                                         if ( " " == $t{0} ) {
1160                                                 // pre
1161                                                 if ($this->mLastSection != 'pre') {
1162                                                         $paragraphStack = false;
1163                                                         $output .= $this->closeParagraph().'<pre>';
1164                                                         $this->mLastSection = 'pre';
1165                                                 }
1166                                         } else {
1167                                                 // paragraph
1168                                                 if ( '' == trim($t) ) {
1169                                                         if ( $paragraphStack ) {
1170                                                                 $output .= $paragraphStack.'<br/>';
1171                                                                 $paragraphStack = false;
1172                                                                 $this->mLastSection = 'p';
1173                                                         } else {
1174                                                                 if ($this->mLastSection != 'p' ) {
1175                                                                         $output .= $this->closeParagraph();
1176                                                                         $this->mLastSection = '';
1177                                                                         $paragraphStack = "<p>";
1178                                                                 } else {
1179                                                                         $paragraphStack = '</p><p>';
1180                                                                 }
1181                                                         }
1182                                                 } else {
1183                                                         if ( $paragraphStack ) {
1184                                                                 $output .= $paragraphStack;
1185                                                                 $paragraphStack = false;
1186                                                                 $this->mLastSection = 'p';
1187                                                         } else if ($this->mLastSection != 'p') {
1188                                                                 $output .= $this->closeParagraph().'<p>';
1189                                                                 $this->mLastSection = 'p';
1190                                                         }
1191                                                 }
1192                                         }
1193                                 }
1194                         }
1195                         if ($paragraphStack === false) {
1196                                 $output .= $t."\n";
1197                         }
1198                 }
1199                 while ( $prefixLength ) {
1200                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1201                         --$prefixLength;
1202                 }
1203                 if ( "" != $this->mLastSection ) {
1204                         $output .= "</" . $this->mLastSection . ">";
1205                         $this->mLastSection = "";
1206                 }
1207
1208                 wfProfileOut( $fname );
1209                 return $output;
1210         }
1211
1212         function getVariableValue( $index ) {
1213                 global $wgLang, $wgSitename, $wgServer;
1214
1215                 switch ( $index ) {
1216                         case MAG_CURRENTMONTH:
1217                                 return date( "m" );
1218                         case MAG_CURRENTMONTHNAME:
1219                                 return $wgLang->getMonthName( date("n") );
1220                         case MAG_CURRENTMONTHNAMEGEN:
1221                                 return $wgLang->getMonthNameGen( date("n") );
1222                         case MAG_CURRENTDAY:
1223                                 return date("j");
1224                         case MAG_PAGENAME:
1225                                 return $this->mTitle->getText();
1226                         case MAG_NAMESPACE:
1227                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1228                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1229                         case MAG_CURRENTDAYNAME:
1230                                 return $wgLang->getWeekdayName( date("w")+1 );
1231                         case MAG_CURRENTYEAR:
1232                                 return date( "Y" );
1233                         case MAG_CURRENTTIME:
1234                                 return $wgLang->time( wfTimestampNow(), false );
1235                         case MAG_NUMBEROFARTICLES:
1236                                 return wfNumberOfArticles();
1237                         case MAG_SITENAME:
1238                                 return $wgSitename;
1239                         case MAG_SERVER:
1240                                 return $wgServer;
1241                         default:
1242                                 return NULL;
1243                 }
1244         }
1245
1246         function initialiseVariables()
1247         {
1248                 global $wgVariableIDs;
1249                 $this->mVariables = array();
1250                 foreach ( $wgVariableIDs as $id ) {
1251                         $mw =& MagicWord::get( $id );
1252                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1253                 }
1254         }
1255
1256         /* private */ function replaceVariables( $text, $args = array() )
1257         {
1258                 global $wgLang, $wgScript, $wgArticlePath;
1259
1260                 $fname = "Parser::replaceVariables";
1261                 wfProfileIn( $fname );
1262
1263                 $bail = false;
1264                 if ( !$this->mVariables ) {
1265                         $this->initialiseVariables();
1266                 }
1267                 $titleChars = Title::legalChars();
1268                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1269
1270                 # This function is called recursively. To keep track of arguments we need a stack:
1271                 array_push( $this->mArgStack, $args );
1272
1273                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1274                 $GLOBALS['wgCurParser'] =& $this;
1275                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1276
1277                 array_pop( $this->mArgStack );
1278
1279                 return $text;
1280         }
1281
1282         function braceSubstitution( $matches )
1283         {
1284                 global $wgLinkCache, $wgLang;
1285                 $fname = "Parser::braceSubstitution";
1286                 $found = false;
1287                 $nowiki = false;
1288                 $title = NULL;
1289
1290                 # $newline is an optional newline character before the braces
1291                 # $part1 is the bit before the first |, and must contain only title characters
1292                 # $args is a list of arguments, starting from index 0, not including $part1
1293
1294                 $newline = $matches[1];
1295                 $part1 = $matches[2];
1296                 # If the third subpattern matched anything, it will start with |
1297                 if ( $matches[3] !== "" ) {
1298                         $args = explode( "|", substr( $matches[3], 1 ) );
1299                 } else {
1300                         $args = array();
1301                 }
1302                 $argc = count( $args );
1303
1304                 # SUBST
1305                 $mwSubst =& MagicWord::get( MAG_SUBST );
1306                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1307                         if ( $this->mOutputType != OT_WIKI ) {
1308                                 # Invalid SUBST not replaced at PST time
1309                                 # Return without further processing
1310                                 $text = $matches[0];
1311                                 $found = true;
1312                         }
1313                 } elseif ( $this->mOutputType == OT_WIKI ) {
1314                         # SUBST not found in PST pass, do nothing
1315                         $text = $matches[0];
1316                         $found = true;
1317                 }
1318
1319                 # MSG, MSGNW and INT
1320                 if ( !$found ) {
1321                         # Check for MSGNW:
1322                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1323                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1324                                 $nowiki = true;
1325                         } else {
1326                                 # Remove obsolete MSG:
1327                                 $mwMsg =& MagicWord::get( MAG_MSG );
1328                                 $mwMsg->matchStartAndRemove( $part1 );
1329                         }
1330
1331                         # Check if it is an internal message
1332                         $mwInt =& MagicWord::get( MAG_INT );
1333                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1334                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1335                                         $text = wfMsgReal( $part1, $args, true );
1336                                         $found = true;
1337                                 }
1338                         }
1339                 }
1340
1341                 # NS
1342                 if ( !$found ) {
1343                         # Check for NS: (namespace expansion)
1344                         $mwNs = MagicWord::get( MAG_NS );
1345                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1346                                 if ( intval( $part1 ) ) {
1347                                         $text = $wgLang->getNsText( intval( $part1 ) );
1348                                         $found = true;
1349                                 } else {
1350                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1351                                         if ( !is_null( $index ) ) {
1352                                                 $text = $wgLang->getNsText( $index );
1353                                                 $found = true;
1354                                         }
1355                                 }
1356                         }
1357                 }
1358
1359                 # LOCALURL and LOCALURLE
1360                 if ( !$found ) {
1361                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1362                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1363
1364                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1365                                 $func = 'getLocalURL';
1366                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1367                                 $func = 'escapeLocalURL';
1368                         } else {
1369                                 $func = '';
1370                         }
1371
1372                         if ( $func !== '' ) {
1373                                 $title = Title::newFromText( $part1 );
1374                                 if ( !is_null( $title ) ) {
1375                                         if ( $argc > 0 ) {
1376                                                 $text = $title->$func( $args[0] );
1377                                         } else {
1378                                                 $text = $title->$func();
1379                                         }
1380                                         $found = true;
1381                                 }
1382                         }
1383                 }
1384
1385                 # Internal variables
1386                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1387                         $text = $this->mVariables[$part1];
1388                         $found = true;
1389                         $this->mOutput->mContainsOldMagic = true;
1390                 }
1391
1392                 # Arguments input from the caller
1393                 $inputArgs = end( $this->mArgStack );
1394                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1395                         $text = $inputArgs[$part1];
1396                         $found = true;
1397                 }
1398
1399                 # Load from database
1400                 if ( !$found ) {
1401                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1402                         if ( !is_null( $title ) && !$title->isExternal() ) {
1403                                 # Check for excessive inclusion
1404                                 $dbk = $title->getPrefixedDBkey();
1405                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1406                                         $article = new Article( $title );
1407                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1408                                         if ( $articleContent !== false ) {
1409                                                 $found = true;
1410                                                 $text = $articleContent;
1411
1412                                         }
1413                                 }
1414
1415                                 # If the title is valid but undisplayable, make a link to it
1416                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1417                                         $text = "[[" . $title->getPrefixedText() . "]]";
1418                                         $found = true;
1419                                 }
1420                         }
1421                 }
1422
1423                 # Recursive parsing, escaping and link table handling
1424                 # Only for HTML output
1425                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1426                         $text = wfEscapeWikiText( $text );
1427                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1428                         # Clean up argument array
1429                         $assocArgs = array();
1430                         $index = 1;
1431                         foreach( $args as $arg ) {
1432                                 $eqpos = strpos( $arg, "=" );
1433                                 if ( $eqpos === false ) {
1434                                         $assocArgs[$index++] = $arg;
1435                                 } else {
1436                                         $name = trim( substr( $arg, 0, $eqpos ) );
1437                                         $value = trim( substr( $arg, $eqpos+1 ) );
1438                                         if ( $value === false ) {
1439                                                 $value = "";
1440                                         }
1441                                         if ( $name !== false ) {
1442                                                 $assocArgs[$name] = $value;
1443                                         }
1444                                 }
1445                         }
1446
1447                         # Do not enter included links in link table
1448                         if ( !is_null( $title ) ) {
1449                                 $wgLinkCache->suspend();
1450                         }
1451
1452                         # Run full parser on the included text
1453                         $text = $this->strip( $text, $this->mStripState );
1454                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1455
1456                         # Add the result to the strip state for re-inclusion after
1457                         # the rest of the processing
1458                         $text = $this->insertStripItem( $text, $this->mStripState );
1459
1460                         # Resume the link cache and register the inclusion as a link
1461                         if ( !is_null( $title ) ) {
1462                                 $wgLinkCache->resume();
1463                                 $wgLinkCache->addLinkObj( $title );
1464                         }
1465                 }
1466
1467                 if ( !$found ) {
1468                         return $matches[0];
1469                 } else {
1470                         return $newline . $text;
1471                 }
1472         }
1473
1474         # Returns true if the function is allowed to include this entity
1475         function incrementIncludeCount( $dbk )
1476         {
1477                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1478                         $this->mIncludeCount[$dbk] = 0;
1479                 }
1480                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1481                         return true;
1482                 } else {
1483                         return false;
1484                 }
1485         }
1486
1487
1488         # Cleans up HTML, removes dangerous tags and attributes
1489         /* private */ function removeHTMLtags( $text )
1490         {
1491                 $fname = "Parser::removeHTMLtags";
1492                 wfProfileIn( $fname );
1493                 $htmlpairs = array( # Tags that must be closed
1494                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1495                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1496                         "strike", "strong", "tt", "var", "div", "center",
1497                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1498                         "ruby", "rt" , "rb" , "rp", "p"
1499                 );
1500                 $htmlsingle = array(
1501                         "br", "hr", "li", "dt", "dd"
1502                 );
1503                 $htmlnest = array( # Tags that can be nested--??
1504                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1505                         "dl", "font", "big", "small", "sub", "sup"
1506                 );
1507                 $tabletags = array( # Can only appear inside table
1508                         "td", "th", "tr"
1509                 );
1510
1511                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1512                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1513
1514                 $htmlattrs = $this->getHTMLattrs () ;
1515
1516                 # Remove HTML comments
1517                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1518
1519                 $bits = explode( "<", $text );
1520                 $text = array_shift( $bits );
1521                 $tagstack = array(); $tablestack = array();
1522
1523                 foreach ( $bits as $x ) {
1524                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1525                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1526                           $x, $regs );
1527                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1528                         error_reporting( $prev );
1529
1530                         $badtag = 0 ;
1531                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1532                                 # Check our stack
1533                                 if ( $slash ) {
1534                                         # Closing a tag...
1535                                         if ( ! in_array( $t, $htmlsingle ) &&
1536                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1537                                                 array_push( $tagstack, $ot );
1538                                                 $badtag = 1;
1539                                         } else {
1540                                                 if ( $t == "table" ) {
1541                                                         $tagstack = array_pop( $tablestack );
1542                                                 }
1543                                                 $newparams = "";
1544                                         }
1545                                 } else {
1546                                         # Keep track for later
1547                                         if ( in_array( $t, $tabletags ) &&
1548                                           ! in_array( "table", $tagstack ) ) {
1549                                                 $badtag = 1;
1550                                         } else if ( in_array( $t, $tagstack ) &&
1551                                           ! in_array ( $t , $htmlnest ) ) {
1552                                                 $badtag = 1 ;
1553                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1554                                                 if ( $t == "table" ) {
1555                                                         array_push( $tablestack, $tagstack );
1556                                                         $tagstack = array();
1557                                                 }
1558                                                 array_push( $tagstack, $t );
1559                                         }
1560                                         # Strip non-approved attributes from the tag
1561                                         $newparams = $this->fixTagAttributes($params);
1562
1563                                 }
1564                                 if ( ! $badtag ) {
1565                                         $rest = str_replace( ">", "&gt;", $rest );
1566                                         $text .= "<$slash$t $newparams$brace$rest";
1567                                         continue;
1568                                 }
1569                         }
1570                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1571                 }
1572                 # Close off any remaining tags
1573                 while ( $t = array_pop( $tagstack ) ) {
1574                         $text .= "</$t>\n";
1575                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1576                 }
1577                 wfProfileOut( $fname );
1578                 return $text;
1579         }
1580
1581 /*
1582  *
1583  * This function accomplishes several tasks:
1584  * 1) Auto-number headings if that option is enabled
1585  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1586  * 3) Add a Table of contents on the top for users who have enabled the option
1587  * 4) Auto-anchor headings
1588  *
1589  * It loops through all headlines, collects the necessary data, then splits up the
1590  * string and re-inserts the newly formatted headlines.
1591  *
1592  */
1593
1594         /* private */ function formatHeadings( $text )
1595         {
1596                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1597                 $doShowToc = $this->mOptions->getShowToc();
1598                 if( !$this->mTitle->userCanEdit() ) {
1599                         $showEditLink = 0;
1600                         $rightClickHack = 0;
1601                 } else {
1602                         $showEditLink = $this->mOptions->getEditSection();
1603                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1604                 }
1605
1606                 # Inhibit editsection links if requested in the page
1607                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1608                 if( $esw->matchAndRemove( $text ) ) {
1609                         $showEditLink = 0;
1610                 }
1611                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1612                 # do not add TOC
1613                 $mw =& MagicWord::get( MAG_NOTOC );
1614                 if( $mw->matchAndRemove( $text ) ) {
1615                         $doShowToc = 0;
1616                 }
1617
1618                 # never add the TOC to the Main Page. This is an entry page that should not
1619                 # be more than 1-2 screens large anyway
1620                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1621                         $doShowToc = 0;
1622                 }
1623
1624                 # Get all headlines for numbering them and adding funky stuff like [edit]
1625                 # links - this is for later, but we need the number of headlines right now
1626                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1627
1628                 # if there are fewer than 4 headlines in the article, do not show TOC
1629                 if( $numMatches < 4 ) {
1630                         $doShowToc = 0;
1631                 }
1632
1633                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1634                 # override above conditions and always show TOC
1635                 $mw =& MagicWord::get( MAG_FORCETOC );
1636                 if ($mw->matchAndRemove( $text ) ) {
1637                         $doShowToc = 1;
1638                 }
1639
1640
1641                 # We need this to perform operations on the HTML
1642                 $sk =& $this->mOptions->getSkin();
1643
1644                 # headline counter
1645                 $headlineCount = 0;
1646
1647                 # Ugh .. the TOC should have neat indentation levels which can be
1648                 # passed to the skin functions. These are determined here
1649                 $toclevel = 0;
1650                 $toc = "";
1651                 $full = "";
1652                 $head = array();
1653                 $sublevelCount = array();
1654                 $level = 0;
1655                 $prevlevel = 0;
1656                 foreach( $matches[3] as $headline ) {
1657                         $numbering = "";
1658                         if( $level ) {
1659                                 $prevlevel = $level;
1660                         }
1661                         $level = $matches[1][$headlineCount];
1662                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1663                                 # reset when we enter a new level
1664                                 $sublevelCount[$level] = 0;
1665                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1666                                 $toclevel += $level - $prevlevel;
1667                         }
1668                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1669                                 # reset when we step back a level
1670                                 $sublevelCount[$level+1]=0;
1671                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1672                                 $toclevel -= $prevlevel - $level;
1673                         }
1674                         # count number of headlines for each level
1675                         @$sublevelCount[$level]++;
1676                         if( $doNumberHeadings || $doShowToc ) {
1677                                 $dot = 0;
1678                                 for( $i = 1; $i <= $level; $i++ ) {
1679                                         if( !empty( $sublevelCount[$i] ) ) {
1680                                                 if( $dot ) {
1681                                                         $numbering .= ".";
1682                                                 }
1683                                                 $numbering .= $sublevelCount[$i];
1684                                                 $dot = 1;
1685                                         }
1686                                 }
1687                         }
1688
1689                         # The canonized header is a version of the header text safe to use for links
1690                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1691                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1692
1693                         # strip out HTML
1694                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1695                         $tocline = trim( $canonized_headline );
1696                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1697                         $refer[$headlineCount] = $canonized_headline;
1698
1699                         # count how many in assoc. array so we can track dupes in anchors
1700                         @$refers[$canonized_headline]++;
1701                         $refcount[$headlineCount]=$refers[$canonized_headline];
1702
1703                         # Prepend the number to the heading text
1704
1705                         if( $doNumberHeadings || $doShowToc ) {
1706                                 $tocline = $numbering . " " . $tocline;
1707
1708                                 # Don't number the heading if it is the only one (looks silly)
1709                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1710                                         # the two are different if the line contains a link
1711                                         $headline=$numbering . " " . $headline;
1712                                 }
1713                         }
1714
1715                         # Create the anchor for linking from the TOC to the section
1716                         $anchor = $canonized_headline;
1717                         if($refcount[$headlineCount] > 1 ) {
1718                                 $anchor .= "_" . $refcount[$headlineCount];
1719                         }
1720                         if( $doShowToc ) {
1721                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1722                         }
1723                         if( $showEditLink ) {
1724                                 if ( empty( $head[$headlineCount] ) ) {
1725                                         $head[$headlineCount] = "";
1726                                 }
1727                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1728                         }
1729
1730                         # Add the edit section span
1731                         if( $rightClickHack ) {
1732                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1733                         }
1734
1735                         # give headline the correct <h#> tag
1736                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1737
1738                         $headlineCount++;
1739                 }
1740
1741                 if( $doShowToc ) {
1742                         $toclines = $headlineCount;
1743                         $toc .= $sk->tocUnindent( $toclevel );
1744                         $toc = $sk->tocTable( $toc );
1745                 }
1746
1747                 # split up and insert constructed headlines
1748
1749                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1750                 $i = 0;
1751
1752                 foreach( $blocks as $block ) {
1753                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1754                             # This is the [edit] link that appears for the top block of text when
1755                                 # section editing is enabled
1756
1757                                 # Disabled because it broke block formatting
1758                                 # For example, a bullet point in the top line
1759                                 # $full .= $sk->editSectionLink(0);
1760                         }
1761                         $full .= $block;
1762                         if( $doShowToc && !$i) {
1763                         # Top anchor now in skin
1764                                 $full = $full.$toc;
1765                         }
1766
1767                         if( !empty( $head[$i] ) ) {
1768                                 $full .= $head[$i];
1769                         }
1770                         $i++;
1771                 }
1772
1773                 return $full;
1774         }
1775
1776         /* private */ function doMagicISBN( &$tokenizer )
1777         {
1778                 global $wgLang;
1779
1780                 # Check whether next token is a text token
1781                 # If yes, fetch it and convert the text into a
1782                 # Special::BookSources link
1783                 $token = $tokenizer->previewToken();
1784                 while ( $token["type"] == "" )
1785                 {
1786                         $tokenizer->nextToken();
1787                         $token = $tokenizer->previewToken();
1788                 }
1789                 if ( $token["type"] == "text" )
1790                 {
1791                         $token = $tokenizer->nextToken();
1792                         $x = $token["text"];
1793                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1794
1795                         $isbn = $blank = "" ;
1796                         while ( " " == $x{0} ) {
1797                                 $blank .= " ";
1798                                 $x = substr( $x, 1 );
1799                         }
1800                         while ( strstr( $valid, $x{0} ) != false ) {
1801                                 $isbn .= $x{0};
1802                                 $x = substr( $x, 1 );
1803                         }
1804                         $num = str_replace( "-", "", $isbn );
1805                         $num = str_replace( " ", "", $num );
1806
1807                         if ( "" == $num ) {
1808                                 $text = "ISBN $blank$x";
1809                         } else {
1810                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1811                                 $text = "<a href=\"" .
1812                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1813                                         "\" class=\"internal\">ISBN $isbn</a>";
1814                                 $text .= $x;
1815                         }
1816                 } else {
1817                         $text = "ISBN ";
1818                 }
1819                 return $text;
1820         }
1821         /* private */ function doMagicRFC( &$tokenizer )
1822         {
1823                 global $wgLang;
1824
1825                 # Check whether next token is a text token
1826                 # If yes, fetch it and convert the text into a
1827                 # link to an RFC source
1828                 $token = $tokenizer->previewToken();
1829                 while ( $token["type"] == "" )
1830                 {
1831                         $tokenizer->nextToken();
1832                         $token = $tokenizer->previewToken();
1833                 }
1834                 if ( $token["type"] == "text" )
1835                 {
1836                         $token = $tokenizer->nextToken();
1837                         $x = $token["text"];
1838                         $valid = "0123456789";
1839
1840                         $rfc = $blank = "" ;
1841                         while ( " " == $x{0} ) {
1842                                 $blank .= " ";
1843                                 $x = substr( $x, 1 );
1844                         }
1845                         while ( strstr( $valid, $x{0} ) != false ) {
1846                                 $rfc .= $x{0};
1847                                 $x = substr( $x, 1 );
1848                         }
1849
1850                         if ( "" == $rfc ) {
1851                                 $text .= "RFC $blank$x";
1852                         } else {
1853                                 $url = wfmsg( "rfcurl" );
1854                                 $url = str_replace( "$1", $rfc, $url);
1855                                 $sk =& $this->mOptions->getSkin();
1856                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1857                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1858                         }
1859                 } else {
1860                         $text = "RFC ";
1861                 }
1862                 return $text;
1863         }
1864
1865         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1866         {
1867                 $this->mOptions = $options;
1868                 $this->mTitle =& $title;
1869                 $this->mOutputType = OT_WIKI;
1870
1871                 if ( $clearState ) {
1872                         $this->clearState();
1873                 }
1874
1875                 $stripState = false;
1876                 $pairs = array(
1877                         "\r\n" => "\n",
1878                         );
1879                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1880                 // now with regexes
1881                 $pairs = array(
1882                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1883                         "/<br *?>/i" => "<br/>",
1884                 );
1885                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1886                 $text = $this->strip( $text, $stripState, false );
1887                 $text = $this->pstPass2( $text, $user );
1888                 $text = $this->unstrip( $text, $stripState );
1889                 return $text;
1890         }
1891
1892         /* private */ function pstPass2( $text, &$user )
1893         {
1894                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1895
1896                 # Variable replacement
1897                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1898                 $text = $this->replaceVariables( $text );
1899
1900                 # Signatures
1901                 #
1902                 $n = $user->getName();
1903                 $k = $user->getOption( "nickname" );
1904                 if ( "" == $k ) { $k = $n; }
1905                 if(isset($wgLocaltimezone)) {
1906                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1907                 }
1908                 /* Note: this is an ugly timezone hack for the European wikis */
1909                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1910                   " (" . date( "T" ) . ")";
1911                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1912
1913                 $text = preg_replace( "/~~~~~/", $d, $text );
1914                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1915                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1916                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1917                   Namespace::getUser() ) . ":$n|$k]]", $text );
1918
1919                 # Context links: [[|name]] and [[name (context)|]]
1920                 #
1921                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1922                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1923                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1924                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1925
1926                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1927                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1928                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1929                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1930                                                                                                                 # [[ns:page (cont)|]]
1931                 $context = "";
1932                 $t = $this->mTitle->getText();
1933                 if ( preg_match( $conpat, $t, $m ) ) {
1934                         $context = $m[2];
1935                 }
1936                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1937                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1938                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1939
1940                 if ( "" == $context ) {
1941                         $text = preg_replace( $p2, "[[\\1]]", $text );
1942                 } else {
1943                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1944                 }
1945
1946                 /*
1947                 $mw =& MagicWord::get( MAG_SUBST );
1948                 $wgCurParser = $this->fork();
1949                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1950                 $this->merge( $wgCurParser );
1951                 */
1952
1953                 # Trim trailing whitespace
1954                 # MAG_END (__END__) tag allows for trailing
1955                 # whitespace to be deliberately included
1956                 $text = rtrim( $text );
1957                 $mw =& MagicWord::get( MAG_END );
1958                 $mw->matchAndRemove( $text );
1959
1960                 return $text;
1961         }
1962
1963         # Set up some variables which are usually set up in parse()
1964         # so that an external function can call some class members with confidence
1965         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1966         {
1967                 $this->mTitle =& $title;
1968                 $this->mOptions = $options;
1969                 $this->mOutputType = $outputType;
1970                 if ( $clearState ) {
1971                         $this->clearState();
1972                 }
1973         }
1974
1975         function transformMsg( $text, $options ) {
1976                 global $wgTitle;
1977                 static $executing = false;
1978
1979                 # Guard against infinite recursion
1980                 if ( $executing ) {
1981                         return $text;
1982                 }
1983                 $executing = true;
1984
1985                 $this->mTitle = $wgTitle;
1986                 $this->mOptions = $options;
1987                 $this->mOutputType = OT_MSG;
1988                 $this->clearState();
1989                 $text = $this->replaceVariables( $text );
1990
1991                 $executing = false;
1992                 return $text;
1993         }
1994 }
1995
1996 class ParserOutput
1997 {
1998         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1999
2000         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2001                 $containsOldMagic = false )
2002         {
2003                 $this->mText = $text;
2004                 $this->mLanguageLinks = $languageLinks;
2005                 $this->mCategoryLinks = $categoryLinks;
2006                 $this->mContainsOldMagic = $containsOldMagic;
2007         }
2008
2009         function getText() { return $this->mText; }
2010         function getLanguageLinks() { return $this->mLanguageLinks; }
2011         function getCategoryLinks() { return $this->mCategoryLinks; }
2012         function containsOldMagic() { return $this->mContainsOldMagic; }
2013         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2014         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2015         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2016         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2017
2018         function merge( $other ) {
2019                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2020                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2021                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2022         }
2023
2024 }
2025
2026 class ParserOptions
2027 {
2028         # All variables are private
2029         var $mUseTeX;                    # Use texvc to expand <math> tags
2030         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2031         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2032         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2033         var $mAllowExternalImages;       # Allow external images inline
2034         var $mSkin;                      # Reference to the preferred skin
2035         var $mDateFormat;                # Date format index
2036         var $mEditSection;               # Create "edit section" links
2037         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2038         var $mNumberHeadings;            # Automatically number headings
2039         var $mShowToc;                   # Show table of contents
2040
2041         function getUseTeX() { return $this->mUseTeX; }
2042         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2043         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2044         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2045         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2046         function getSkin() { return $this->mSkin; }
2047         function getDateFormat() { return $this->mDateFormat; }
2048         function getEditSection() { return $this->mEditSection; }
2049         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2050         function getNumberHeadings() { return $this->mNumberHeadings; }
2051         function getShowToc() { return $this->mShowToc; }
2052
2053         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2054         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2055         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2056         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2057         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2058         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2059         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2060         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2061         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2062         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2063         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2064
2065         /* static */ function newFromUser( &$user )
2066         {
2067                 $popts = new ParserOptions;
2068                 $popts->initialiseFromUser( $user );
2069                 return $popts;
2070         }
2071
2072         function initialiseFromUser( &$userInput )
2073         {
2074                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2075
2076                 if ( !$userInput ) {
2077                         $user = new User;
2078                         $user->setLoaded( true );
2079                 } else {
2080                         $user =& $userInput;
2081                 }
2082
2083                 $this->mUseTeX = $wgUseTeX;
2084                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2085                 $this->mUseDynamicDates = $wgUseDynamicDates;
2086                 $this->mInterwikiMagic = $wgInterwikiMagic;
2087                 $this->mAllowExternalImages = $wgAllowExternalImages;
2088                 $this->mSkin =& $user->getSkin();
2089                 $this->mDateFormat = $user->getOption( "date" );
2090                 $this->mEditSection = $user->getOption( "editsection" );
2091                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2092                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2093                 $this->mShowToc = $user->getOption( "showtoc" );
2094         }
2095
2096
2097 }
2098
2099 # Regex callbacks, used in Parser::replaceVariables
2100 function wfBraceSubstitution( $matches )
2101 {
2102         global $wgCurParser;
2103         return $wgCurParser->braceSubstitution( $matches );
2104 }
2105
2106 ?>