includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         function categoryMagic ()
 260         {
 261                 global $wgLang , $wgUser ;
 262                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 263                 $id = $this->mTitle->getArticleID() ;
 264                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 265                 $ti = $this->mTitle->getText() ;
 266                 $ti = explode ( ":" , $ti , 2 ) ;
 267                 if ( $cat != $ti[0] ) return "" ;
 268                 $r = "<br style=\"clear:both;\"/>\n";
 269
 270                 $articles = array() ;
 271                 $parents = array () ;
 272                 $children = array() ;
 273
 274
 275 #               $sk =& $this->mGetSkin();
 276                 $sk =& $wgUser->getSkin() ;
 277
 278                 $data = array () ;
 279                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 280                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 281
 282                 $res = wfQuery ( $sql1, DB_READ ) ;
 283                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 284
 285                 $res = wfQuery ( $sql2, DB_READ ) ;
 286                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 287
 288
 289                 foreach ( $data AS $x )
 290                 {
 291                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 292                         if ( $t != "" ) $t .= ":" ;
 293                         $t .= $x->cur_title ;
 294
 295                         $y = explode ( ":" , $t , 2 ) ;
 296                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 297                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 298                         } else {
 299                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 300                         }
 301                 }
 302                 wfFreeResult ( $res ) ;
 303
 304                 # Children
 305                 if ( count ( $children ) > 0 )
 306                 {
 307                         asort ( $children ) ;
 308                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 309                         $r .= implode ( ", " , $children ) ;
 310                 }
 311
 312                 # Articles
 313                 if ( count ( $articles ) > 0 )
 314                 {
 315                         asort ( $articles ) ;
 316                         $h =  wfMsg( "category_header", $ti[1] );
 317                         $r .= "<h2>{$h}</h2>\n" ;
 318                         $r .= implode ( ", " , $articles ) ;
 319                 }
 320
 321
 322                 return $r ;
 323         }
 324
 325         function getHTMLattrs ()
 326         {
 327                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 328                                 "title", "align", "lang", "dir", "width", "height",
 329                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 330                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 331                                 /* FONT */ "type", "start", "value", "compact",
 332                                 /* For various lists, mostly deprecated but safe */
 333                                 "summary", "width", "border", "frame", "rules",
 334                                 "cellspacing", "cellpadding", "valign", "char",
 335                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 336                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 337                                 "id", "class", "name", "style" /* For CSS */
 338                                 );
 339                 return $htmlattrs ;
 340         }
 341
 342         function fixTagAttributes ( $t )
 343         {
 344                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 345                 $htmlattrs = $this->getHTMLattrs() ;
 346
 347                 # Strip non-approved attributes from the tag
 348                 $t = preg_replace(
 349                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 350                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 351                         $t);
 352                 # Strip javascript "expression" from stylesheets. Brute force approach:
 353                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 354
 355                 if( preg_match(
 356                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 357                         wfMungeToUtf8( $t ) ) )
 358                 {
 359                         $t="";
 360                 }
 361
 362                 return trim ( $t ) ;
 363         }
 364
 365         function doTableStuff ( $t )
 366         {
 367                 $t = explode ( "\n" , $t ) ;
 368                 $td = array () ; # Is currently a td tag open?
 369                         $ltd = array () ; # Was it TD or TH?
 370                         $tr = array () ; # Is currently a tr tag open?
 371                         $ltr = array () ; # tr attributes
 372                         foreach ( $t AS $k => $x )
 373                         {
 374                                 $x = trim ( $x ) ;
 375                                 $fc = substr ( $x , 0 , 1 ) ;
 376                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 377                                 {
 378                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 379                                         array_push ( $td , false ) ;
 380                                         array_push ( $ltd , "" ) ;
 381                                         array_push ( $tr , false ) ;
 382                                         array_push ( $ltr , "" ) ;
 383                                 }
 384                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 385                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 386                                 {
 387                                         $z = "</table>\n" ;
 388                                         $l = array_pop ( $ltd ) ;
 389                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 390                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 391                                         array_pop ( $ltr ) ;
 392                                         $t[$k] = $z ;
 393                                 }
 394                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 395                                                 {
 396                                                 $z = trim ( substr ( $x , 2 ) ) ;
 397                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 398                                                 }*/
 399                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 400                                 {
 401                                         $x = substr ( $x , 1 ) ;
 402                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 403                                         $z = "" ;
 404                                         $l = array_pop ( $ltd ) ;
 405                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 406                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 407                                         array_pop ( $ltr ) ;
 408                                         $t[$k] = $z ;
 409                                         array_push ( $tr , false ) ;
 410                                         array_push ( $td , false ) ;
 411                                         array_push ( $ltd , "" ) ;
 412                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 413                                 }
 414                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 415                                 {
 416                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 417                                         {
 418                                                 $fc = "+" ;
 419                                                 $x = substr ( $x , 1 ) ;
 420                                         }
 421                                         $after = substr ( $x , 1 ) ;
 422                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 423                                         $after = explode ( "||" , $after ) ;
 424                                         $t[$k] = "" ;
 425                                         foreach ( $after AS $theline )
 426                                         {
 427                                                 $z = "" ;
 428                                                 if ( $fc != "+" )
 429                                                 {
 430                                                         $tra = array_pop ( $ltr ) ;
 431                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 432                                                         array_push ( $tr , true ) ;
 433                                                         array_push ( $ltr , "" ) ;
 434                                                 }
 435
 436                                                 $l = array_pop ( $ltd ) ;
 437                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 438                                                 if ( $fc == "|" ) $l = "td" ;
 439                                                 else if ( $fc == "!" ) $l = "th" ;
 440                                                 else if ( $fc == "+" ) $l = "caption" ;
 441                                                 else $l = "" ;
 442                                                 array_push ( $ltd , $l ) ;
 443                                                 $y = explode ( "|" , $theline , 2 ) ;
 444                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 445                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 446                                                 $t[$k] .= $y ;
 447                                                 array_push ( $td , true ) ;
 448                                         }
 449                                 }
 450                         }
 451
 452                 # Closing open td, tr && table
 453                 while ( count ( $td ) > 0 )
 454                 {
 455                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 456                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 457                         $t[] = "</table>" ;
 458                 }
 459
 460                 $t = implode ( "\n" , $t ) ;
 461                 #               $t = $this->removeHTMLtags( $t );
 462                 return $t ;
 463         }
 464
 465         function internalParse( $text, $linestart, $args = array() )
 466         {
 467                 $fname = "Parser::internalParse";
 468                 wfProfileIn( $fname );
 469
 470                 $text = $this->removeHTMLtags( $text );
 471                 $text = $this->replaceVariables( $text, $args );
 472
 473                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 474
 475                 $text = $this->doHeadings( $text );
 476                 if($this->mOptions->getUseDynamicDates()) {
 477                         global $wgDateFormatter;
 478                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 479                 }
 480                 $text = $this->replaceExternalLinks( $text );
 481                 $text = $this->doTokenizedParser ( $text );
 482                 $text = $this->doTableStuff ( $text ) ;
 483                 $text = $this->formatHeadings( $text );
 484                 $sk =& $this->mOptions->getSkin();
 485                 $text = $sk->transformContent( $text );
 486
 487                 $text .= $this->categoryMagic () ;
 488
 489                 wfProfileOut( $fname );
 490                 return $text;
 491         }
 492
 493
 494         /* private */ function doHeadings( $text )
 495         {
 496                 for ( $i = 6; $i >= 1; --$i ) {
 497                         $h = substr( "======", 0, $i );
 498                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 499                           "<h{$i}>\\1</h{$i}>\\2", $text );
 500                 }
 501                 return $text;
 502         }
 503
 504         # Note: we have to do external links before the internal ones,
 505         # and otherwise take great care in the order of things here, so
 506         # that we don't end up interpreting some URLs twice.
 507
 508         /* private */ function replaceExternalLinks( $text )
 509         {
 510                 $fname = "Parser::replaceExternalLinks";
 511                 wfProfileIn( $fname );
 512                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 513                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 514                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 515                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 516                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 517                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 518                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 519                 wfProfileOut( $fname );
 520                 return $text;
 521         }
 522
 523         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 524         {
 525                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 526                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 527
 528                 # this is  the list of separators that should be ignored if they
 529                 # are the last character of an URL but that should be included
 530                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 531                 # in this case, the last comma should not become part of the URL,
 532                 # but in "www.foo.com/123,2342,32.htm" it should.
 533                 $sep = ",;\.:";
 534                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 535                 $images = "gif|png|jpg|jpeg";
 536
 537                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 538                 # they are interpreted as part of the string (used to tell PHP
 539                 # that the content of the string should be inserted there).
 540                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 541                   "((?i){$images})([^{$uc}]|$)/";
 542
 543                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 544                 $sk =& $this->mOptions->getSkin();
 545
 546                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 547                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 548                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 549                 }
 550                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 551                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 552                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 553                   "</a>\\5", $s );
 554                 $s = str_replace( $unique, $protocol, $s );
 555
 556                 $a = explode( "[{$protocol}:", " " . $s );
 557                 $s = array_shift( $a );
 558                 $s = substr( $s, 1 );
 559
 560                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 561                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 562
 563                 foreach ( $a as $line ) {
 564                         if ( preg_match( $e1, $line, $m ) ) {
 565                                 $link = "{$protocol}:{$m[1]}";
 566                                 $trail = $m[2];
 567                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 568                                 else { $text = wfEscapeHTML( $link ); }
 569                         } else if ( preg_match( $e2, $line, $m ) ) {
 570                                 $link = "{$protocol}:{$m[1]}";
 571                                 $text = $m[2];
 572                                 $trail = $m[3];
 573                         } else {
 574                                 $s .= "[{$protocol}:" . $line;
 575                                 continue;
 576                         }
 577                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 578                                 $paren = "";
 579                         } else {
 580                                 # Expand the URL for printable version
 581                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 582                         }
 583                         $la = $sk->getExternalLinkAttributes( $link, $text );
 584                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 585
 586                 }
 587                 return $s;
 588         }
 589
 590         /* private */ function handle3Quotes( &$state, $token )
 591         {
 592                 if ( $state["strong"] !== false ) {
 593                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 594                         {
 595                                 # ''' lala ''lala '''
 596                                 $s = "</em></strong><em>";
 597                         } else {
 598                                 $s = "</strong>";
 599                         }
 600                         $state["strong"] = FALSE;
 601                 } else {
 602                         $s = "<strong>";
 603                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 604                 }
 605                 return $s;
 606         }
 607
 608         /* private */ function handle2Quotes( &$state, $token )
 609         {
 610                 if ( $state["em"] !== false ) {
 611                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 612                         {
 613                                 # ''lala'''lala'' ....'''
 614                                 $s = "</strong></em><strong>";
 615                         } else {
 616                                 $s = "</em>";
 617                         }
 618                         $state["em"] = FALSE;
 619                 } else {
 620                         $s = "<em>";
 621                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 622
 623                 }
 624                 return $s;
 625         }
 626
 627         /* private */ function handle5Quotes( &$state, $token )
 628         {
 629                 $s = "";
 630                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 631                         if ( $state["em"] < $state["strong"] ) {
 632                                 $s .= "</strong></em>";
 633                         } else {
 634                                 $s .= "</em></strong>";
 635                         }
 636                         $state["strong"] = $state["em"] = FALSE;
 637                 } elseif ( $state["em"] !== false ) {
 638                         $s .= "</em><strong>";
 639                         $state["em"] = FALSE;
 640                         $state["strong"] = $token["pos"];
 641                 } elseif ( $state["strong"] !== false ) {
 642                         $s .= "</strong><em>";
 643                         $state["strong"] = FALSE;
 644                         $state["em"] = $token["pos"];
 645                 } else { # not $em and not $strong
 646                         $s .= "<strong><em>";
 647                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 648                 }
 649                 return $s;
 650         }
 651
 652         /* private */ function doTokenizedParser( $str )
 653         {
 654                 global $wgLang; # for language specific parser hook
 655
 656                 $tokenizer=Tokenizer::newFromString( $str );
 657                 $tokenStack = array();
 658
 659                 $s="";
 660                 $state["em"]      = FALSE;
 661                 $state["strong"]  = FALSE;
 662                 $tagIsOpen = FALSE;
 663                 $threeopen = false;
 664
 665                 # The tokenizer splits the text into tokens and returns them one by one.
 666                 # Every call to the tokenizer returns a new token.
 667                 while ( $token = $tokenizer->nextToken() )
 668                 {
 669                         switch ( $token["type"] )
 670                         {
 671                                 case "text":
 672                                         # simple text with no further markup
 673                                         $txt = $token["text"];
 674                                         break;
 675                                 case "[[[":
 676                                         # remember the tag opened with 3 [
 677                                         $threeopen = true;
 678                                 case "[[":
 679                                         # link opening tag.
 680                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 681                                         $tagIsOpen = TRUE;
 682                                         array_push( $tokenStack, $token );
 683                                         $txt="";
 684                                         break;
 685
 686                                 case "]]]":
 687                                 case "]]":
 688                                         # link close tag.
 689                                         # get text from stack, glue it together, and call the code to handle a
 690                                         # link
 691
 692                                         if ( count( $tokenStack ) == 0 )
 693                                         {
 694                                                 # stack empty. Found a ]] without an opening [[
 695                                                 $txt = "]]";
 696                                         } else {
 697                                                 $linkText = "";
 698                                                 $lastToken = array_pop( $tokenStack );
 699                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 700                                                 {
 701                                                         if( !empty( $lastToken["text"] ) ) {
 702                                                                 $linkText = $lastToken["text"] . $linkText;
 703                                                         }
 704                                                         $lastToken = array_pop( $tokenStack );
 705                                                 }
 706
 707                                                 $txt = $linkText ."]]";
 708
 709                                                 if( isset( $lastToken["text"] ) ) {
 710                                                         $prefix = $lastToken["text"];
 711                                                 } else {
 712                                                         $prefix = "";
 713                                                 }
 714                                                 $nextToken = $tokenizer->previewToken();
 715                                                 if ( $nextToken["type"] == "text" )
 716                                                 {
 717                                                         # Preview just looks at it. Now we have to fetch it.
 718                                                         $nextToken = $tokenizer->nextToken();
 719                                                         $txt .= $nextToken["text"];
 720                                                 }
 721                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 722
 723                                                 # did the tag start with 3 [ ?
 724                                                 if($threeopen) {
 725                                                         # show the first as text
 726                                                         $txt = "[".$txt;
 727                                                         $threeopen=false;
 728                                                 }
 729
 730                                         }
 731                                         $tagIsOpen = (count( $tokenStack ) != 0);
 732                                         break;
 733                                 case "----":
 734                                         $txt = "\n<hr />\n";
 735                                         break;
 736                                 case "'''":
 737                                         # This and the three next ones handle quotes
 738                                         $txt = $this->handle3Quotes( $state, $token );
 739                                         break;
 740                                 case "''":
 741                                         $txt = $this->handle2Quotes( $state, $token );
 742                                         break;
 743                                 case "'''''":
 744                                         $txt = $this->handle5Quotes( $state, $token );
 745                                         break;
 746                                 case "":
 747                                         # empty token
 748                                         $txt="";
 749                                         break;
 750                                 case "RFC ":
 751                                         if ( $tagIsOpen ) {
 752                                                 $txt = "RFC ";
 753                                         } else {
 754                                                 $txt = $this->doMagicRFC( $tokenizer );
 755                                         }
 756                                         break;
 757                                 case "ISBN ":
 758                                         if ( $tagIsOpen ) {
 759                                                 $txt = "ISBN ";
 760                                         } else {
 761                                                 $txt = $this->doMagicISBN( $tokenizer );
 762                                         }
 763                                         break;
 764                                 default:
 765                                         # Call language specific Hook.
 766                                         $txt = $wgLang->processToken( $token, $tokenStack );
 767                                         if ( NULL == $txt ) {
 768                                                 # An unkown token. Highlight.
 769                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 770                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 771                                         }
 772                                         break;
 773                         }
 774                         # If we're parsing the interior of a link, don't append the interior to $s,
 775                         # but push it to the stack so it can be processed when a ]] token is found.
 776                         if ( $tagIsOpen  && $txt != "" ) {
 777                                 $token["type"] = "text";
 778                                 $token["text"] = $txt;
 779                                 array_push( $tokenStack, $token );
 780                         } else {
 781                                 $s .= $txt;
 782                         }
 783                 } #end while
 784                 if ( count( $tokenStack ) != 0 )
 785                 {
 786                         # still objects on stack. opened [[ tag without closing ]] tag.
 787                         $txt = "";
 788                         while ( $lastToken = array_pop( $tokenStack ) )
 789                         {
 790                                 if ( $lastToken["type"] == "text" )
 791                                 {
 792                                         $txt = $lastToken["text"] . $txt;
 793                                 } else {
 794                                         $txt = $lastToken["type"] . $txt;
 795                                 }
 796                         }
 797                         $s .= $txt;
 798                 }
 799                 return $s;
 800         }
 801
 802         /* private */ function handleInternalLink( $line, $prefix )
 803         {
 804                 global $wgLang, $wgLinkCache;
 805                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 806                 static $fname = "Parser::handleInternalLink" ;
 807                 wfProfileIn( $fname );
 808
 809                 wfProfileIn( "$fname-setup" );
 810                 static $tc = FALSE;
 811                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 812                 $sk =& $this->mOptions->getSkin();
 813
 814                 # Match a link having the form [[namespace:link|alternate]]trail
 815                 static $e1 = FALSE;
 816                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 817                 # Match the end of a line for a word that's not followed by whitespace,
 818                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 819                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 820                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 821                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 822
 823
 824                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 825                 static $image = FALSE;
 826                 static $special = FALSE;
 827                 static $media = FALSE;
 828                 static $category = FALSE;
 829                 if ( !$image ) { $image = Namespace::getImage(); }
 830                 if ( !$special ) { $special = Namespace::getSpecial(); }
 831                 if ( !$media ) { $media = Namespace::getMedia(); }
 832                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 833
 834                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 835
 836                 wfProfileOut( "$fname-setup" );
 837                 $s = "";
 838
 839                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 840                         $text = $m[2];
 841                         $trail = $m[3];
 842                 } else { # Invalid form; output directly
 843                         $s .= $prefix . "[[" . $line ;
 844                         return $s;
 845                 }
 846
 847                 /* Valid link forms:
 848                 Foobar -- normal
 849                 :Foobar -- override special treatment of prefix (images, language links)
 850                 /Foobar -- convert to CurrentPage/Foobar
 851                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 852                 */
 853                 $c = substr($m[1],0,1);
 854                 $noforce = ($c != ":");
 855                 if( $c == "/" ) { # subpage
 856                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 857                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 858                                 $noslash=$m[1];
 859                         } else {
 860                                 $noslash=substr($m[1],1);
 861                         }
 862                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 863                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 864                                 if( "" == $text ) {
 865                                         $text= $m[1];
 866                                 } # this might be changed for ugliness reasons
 867                         } else {
 868                                 $link = $noslash; # no subpage allowed, use standard link
 869                         }
 870                 } elseif( $noforce ) { # no subpage
 871                         $link = $m[1];
 872                 } else {
 873                         $link = substr( $m[1], 1 );
 874                 }
 875                 if( "" == $text )
 876                         $text = $link;
 877
 878                 $nt = Title::newFromText( $link );
 879                 if( !$nt ) {
 880                         $s .= $prefix . "[[" . $line;
 881                         return $s;
 882                 }
 883                 $ns = $nt->getNamespace();
 884                 $iw = $nt->getInterWiki();
 885                 if( $noforce ) {
 886                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 887                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 888                                 return (trim($s) == '')? '': $s;
 889                         }
 890                         if( $ns == $image ) {
 891                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 892                                 $wgLinkCache->addImageLinkObj( $nt );
 893                                 return $s;
 894                         }
 895                 }
 896                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 897                     ( strpos( $link, "#" ) == FALSE ) ) {
 898                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 899                         return $s;
 900                 }
 901
 902                 # Category feature
 903                 $catns = strtoupper ( $nt->getDBkey () ) ;
 904                 $catns = explode ( ":" , $catns ) ;
 905                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 906                 else $catns = "" ;
 907                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 908                         $t = explode ( ":" , $nt->getText() ) ;
 909                         array_shift ( $t ) ;
 910                         $t = implode ( ":" , $t ) ;
 911                         $t = $wgLang->ucFirst ( $t ) ;
 912                         $nnt = Title::newFromText ( $category.":".$t ) ;
 913                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 914                         $this->mOutput->mCategoryLinks[] = $t ;
 915                         $s .= $prefix . $trail ;
 916                         return $s ;
 917                 }
 918
 919                 if( $ns == $media ) {
 920                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 921                         $wgLinkCache->addImageLinkObj( $nt );
 922                         return $s;
 923                 } elseif( $ns == $special ) {
 924                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 925                         return $s;
 926                 }
 927                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 928
 929                 wfProfileOut( $fname );
 930                 return $s;
 931         }
 932
 933         # Some functions here used by doBlockLevels()
 934         #
 935         /* private */ function closeParagraph()
 936         {
 937                 $result = "";
 938                 if ( '' != $this->mLastSection ) {
 939                         $result = "</" . $this->mLastSection  . ">\n";
 940                 }
 941                 $this->mInPre = false;
 942                 $this->mLastSection = "";
 943                 return $result;
 944         }
 945         # getCommon() returns the length of the longest common substring
 946         # of both arguments, starting at the beginning of both.
 947         #
 948         /* private */ function getCommon( $st1, $st2 )
 949         {
 950                 $fl = strlen( $st1 );
 951                 $shorter = strlen( $st2 );
 952                 if ( $fl < $shorter ) { $shorter = $fl; }
 953
 954                 for ( $i = 0; $i < $shorter; ++$i ) {
 955                         if ( $st1{$i} != $st2{$i} ) { break; }
 956                 }
 957                 return $i;
 958         }
 959         # These next three functions open, continue, and close the list
 960         # element appropriate to the prefix character passed into them.
 961         #
 962         /* private */ function openList( $char )
 963     {
 964                 $result = $this->closeParagraph();
 965
 966                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 967                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 968                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 969                 else if ( ";" == $char ) {
 970                         $result .= "<dl><dt>";
 971                         $this->mDTopen = true;
 972                 }
 973                 else { $result = "<!-- ERR 1 -->"; }
 974
 975                 return $result;
 976         }
 977
 978         /* private */ function nextItem( $char )
 979         {
 980                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 981                 else if ( ":" == $char || ";" == $char ) {
 982                         $close = "</dd>";
 983                         if ( $this->mDTopen ) { $close = "</dt>"; }
 984                         if ( ";" == $char ) {
 985                                 $this->mDTopen = true;
 986                                 return $close . "<dt>";
 987                         } else {
 988                                 $this->mDTopen = false;
 989                                 return $close . "<dd>";
 990                         }
 991                 }
 992                 return "<!-- ERR 2 -->";
 993         }
 994
 995         /* private */function closeList( $char )
 996         {
 997                 if ( "*" == $char ) { $text = "</li></ul>"; }
 998                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 999                 else if ( ":" == $char ) {
1000                         if ( $this->mDTopen ) {
1001                                 $this->mDTopen = false;
1002                                 $text = "</dt></dl>";
1003                         } else {
1004                                 $text = "</dd></dl>";
1005                         }
1006                 }
1007                 else {  return "<!-- ERR 3 -->"; }
1008                 return $text."\n";
1009         }
1010
1011         /* private */ function doBlockLevels( $text, $linestart )
1012         {
1013                 $fname = "Parser::doBlockLevels";
1014                 wfProfileIn( $fname );
1015                 # Parsing through the text line by line.  The main thing
1016                 # happening here is handling of block-level elements p, pre,
1017                 # and making lists from lines starting with * # : etc.
1018                 #
1019                 $a = explode( "\n", $text );
1020
1021                 $lastPref = $text = $lastLine = '';
1022                 $this->mDTopen = $inBlockElem = false;
1023                 $npl = 0;
1024                 $pstack = false;
1025
1026                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1027                 foreach ( $a as $t ) {
1028                         $oLine = $t;
1029                         $opl = strlen( $lastPref );
1030                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1031                         $preOpenMatch = preg_match("/<pre/i", $t );
1032                         if (!$this->mInPre) {
1033                                 $this->mInPre = !empty($preOpenMatch);
1034                         }
1035                         if ( !$this->mInPre ) {
1036                                 $npl = strspn( $t, "*#:;" );
1037                                 $pref = substr( $t, 0, $npl );
1038                                 $pref2 = str_replace( ";", ":", $pref );
1039                                 $t = substr( $t, $npl );
1040                         } else {
1041                                 $npl = 0;
1042                                 $pref = $pref2 = '';
1043                         }
1044
1045                         // list generation
1046                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1047                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1048                                 if ( $pstack ) { $pstack = false; }
1049
1050                                 if ( ";" == substr( $pref, -1 ) ) {
1051                                         $cpos = strpos( $t, ":" );
1052                                         if ( false !== $cpos ) {
1053                                                 $term = substr( $t, 0, $cpos );
1054                                                 $text .= $term . $this->nextItem( ":" );
1055                                                 $t = substr( $t, $cpos + 1 );
1056                                         }
1057                                 }
1058                         } else if (0 != $npl || 0 != $opl) {
1059                                 $cpl = $this->getCommon( $pref, $lastPref );
1060                                 if ( $pstack ) { $pstack = false; }
1061
1062                                 while ( $cpl < $opl ) {
1063                                         $text .= $this->closeList( $lastPref{$opl-1} );
1064                                         --$opl;
1065                                 }
1066                                 if ( $npl <= $cpl && $cpl > 0 ) {
1067                                         $text .= $this->nextItem( $pref{$cpl-1} );
1068                                 }
1069                                 while ( $npl > $cpl ) {
1070                                         $char = substr( $pref, $cpl, 1 );
1071                                         $text .= $this->openList( $char );
1072
1073                                         if ( ";" == $char ) {
1074                                                 $cpos = strpos( $t, ":" );
1075                                                 if ( ! ( false === $cpos ) ) {
1076                                                         $term = substr( $t, 0, $cpos );
1077                                                         $text .= $term . $this->nextItem( ":" );
1078                                                         $t = substr( $t, $cpos + 1 );
1079                                                 }
1080                                         }
1081                                         ++$cpl;
1082                                 }
1083                                 $lastPref = $pref2;
1084                         }
1085                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1086                                 $uniq_prefix = UNIQ_PREFIX;
1087                                 // XXX: use a stack for nestable elements like span, table and div
1088                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1089                                 $closematch = preg_match(
1090                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1091                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1092                                 if ( $openmatch or $closematch ) {
1093                                         if ( $pstack ) { $pstack = false; }
1094                                         $text .= $this->closeParagraph();
1095                                         if($preOpenMatch and !$preCloseMatch) {
1096                                                 $this->mInPre = true;
1097                                         }
1098                                         if ( $closematch  ) {
1099                                                 $inBlockElem = false;
1100                                         } else {
1101                                                 $inBlockElem = true;
1102                                         }
1103                                 } else if ( !$inBlockElem ) {
1104                                         if ( " " == $t{0} ) {
1105                                                 // pre
1106                                                 if ($this->mLastSection != 'pre') {
1107                                                         $pstack = false;
1108                                                         $text .= $this->closeParagraph().'<pre>';
1109                                                         $this->mLastSection = 'pre';
1110                                                 }
1111                                         } else {
1112                                                 // paragraph
1113                                                 if ( '' == trim($t) ) {
1114                                                         if ( $pstack ) {
1115                                                                 $text .= $pstack.'<br/>';
1116                                                                 $pstack = false;
1117                                                                 $this->mLastSection = 'p';
1118                                                         } else {
1119                                                                 if ($this->mLastSection != 'p' ) {
1120                                                                         $text .= $this->closeParagraph();
1121                                                                         $this->mLastSection = '';
1122                                                                         $pstack = "<p>";
1123                                                                 } else {
1124                                                                         $pstack = '</p><p>';
1125                                                                 }
1126                                                         }
1127                                                 } else {
1128                                                         if ( $pstack ) {
1129                                                                 $text .= $pstack;
1130                                                                 $pstack = false;
1131                                                                 $this->mLastSection = 'p';
1132                                                         } else if ($this->mLastSection != 'p') {
1133                                                                 $text .= $this->closeParagraph().'<p>';
1134                                                                 $this->mLastSection = 'p';
1135                                                         }
1136                                                 }
1137                                         }
1138                                 }
1139                         }
1140                         if ($pstack === false) {
1141                                 $text .= $t."\n";
1142                         }
1143                 }
1144                 while ( $npl ) {
1145                         $text .= $this->closeList( $pref2{$npl-1} );
1146                         --$npl;
1147                 }
1148                 if ( "" != $this->mLastSection ) {
1149                         $text .= "</" . $this->mLastSection . ">";
1150                         $this->mLastSection = "";
1151                 }
1152
1153                 wfProfileOut( $fname );
1154                 return $text;
1155         }
1156
1157         function getVariableValue( $index ) {
1158                 global $wgLang, $wgSitename, $wgServer;
1159
1160                 switch ( $index ) {
1161                         case MAG_CURRENTMONTH:
1162                                 return date( "m" );
1163                         case MAG_CURRENTMONTHNAME:
1164                                 return $wgLang->getMonthName( date("n") );
1165                         case MAG_CURRENTMONTHNAMEGEN:
1166                                 return $wgLang->getMonthNameGen( date("n") );
1167                         case MAG_CURRENTDAY:
1168                                 return date("j");
1169                         case MAG_CURRENTDAYNAME:
1170                                 return $wgLang->getWeekdayName( date("w")+1 );
1171                         case MAG_CURRENTYEAR:
1172                                 return date( "Y" );
1173                         case MAG_CURRENTTIME:
1174                                 return $wgLang->time( wfTimestampNow(), false );
1175                         case MAG_NUMBEROFARTICLES:
1176                                 return wfNumberOfArticles();
1177                         case MAG_SITENAME:
1178                                 return $wgSitename;
1179                         case MAG_SERVER:
1180                                 return $wgServer;
1181                         default:
1182                                 return NULL;
1183                 }
1184         }
1185
1186         function initialiseVariables()
1187         {
1188                 global $wgVariableIDs;
1189                 $this->mVariables = array();
1190                 foreach ( $wgVariableIDs as $id ) {
1191                         $mw =& MagicWord::get( $id );
1192                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1193                 }
1194         }
1195
1196         /* private */ function replaceVariables( $text, $args = array() )
1197         {
1198                 global $wgLang, $wgScript, $wgArticlePath;
1199
1200                 $fname = "Parser::replaceVariables";
1201                 wfProfileIn( $fname );
1202
1203                 $bail = false;
1204                 if ( !$this->mVariables ) {
1205                         $this->initialiseVariables();
1206                 }
1207                 $titleChars = Title::legalChars();
1208                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1209
1210                 # This function is called recursively. To keep track of arguments we need a stack:
1211                 array_push( $this->mArgStack, $args );
1212
1213                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1214                 $GLOBALS['wgCurParser'] =& $this;
1215                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1216
1217                 array_pop( $this->mArgStack );
1218
1219                 return $text;
1220         }
1221
1222         function braceSubstitution( $matches )
1223         {
1224                 global $wgLinkCache, $wgLang;
1225                 $fname = "Parser::braceSubstitution";
1226                 $found = false;
1227                 $nowiki = false;
1228                 $title = NULL;
1229
1230                 # $newline is an optional newline character before the braces
1231                 # $part1 is the bit before the first |, and must contain only title characters
1232                 # $args is a list of arguments, starting from index 0, not including $part1
1233
1234                 $newline = $matches[1];
1235                 $part1 = $matches[2];
1236                 # If the third subpattern matched anything, it will start with |
1237                 if ( $matches[3] !== "" ) {
1238                         $args = explode( "|", substr( $matches[3], 1 ) );
1239                 } else {
1240                         $args = array();
1241                 }
1242                 $argc = count( $args );
1243
1244                 # SUBST
1245                 $mwSubst =& MagicWord::get( MAG_SUBST );
1246                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1247                         if ( $this->mOutputType != OT_WIKI ) {
1248                                 # Invalid SUBST not replaced at PST time
1249                                 # Return without further processing
1250                                 $text = $matches[0];
1251                                 $found = true;
1252                         }
1253                 } elseif ( $this->mOutputType == OT_WIKI ) {
1254                         # SUBST not found in PST pass, do nothing
1255                         $text = $matches[0];
1256                         $found = true;
1257                 }
1258
1259                 # MSG, MSGNW and INT
1260                 if ( !$found ) {
1261                         # Check for MSGNW:
1262                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1263                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1264                                 $nowiki = true;
1265                         } else {
1266                                 # Remove obsolete MSG:
1267                                 $mwMsg =& MagicWord::get( MAG_MSG );
1268                                 $mwMsg->matchStartAndRemove( $part1 );
1269                         }
1270
1271                         # Check if it is an internal message
1272                         $mwInt =& MagicWord::get( MAG_INT );
1273                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1274                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1275                                         $text = wfMsgReal( $part1, $args, true );
1276                                         $found = true;
1277                                 }
1278                         }
1279                 }
1280
1281                 # NS
1282                 if ( !$found ) {
1283                         # Check for NS: (namespace expansion)
1284                         $mwNs = MagicWord::get( MAG_NS );
1285                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1286                                 if ( intval( $part1 ) ) {
1287                                         $text = $wgLang->getNsText( intval( $part1 ) );
1288                                         $found = true;
1289                                 } else {
1290                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1291                                         if ( !is_null( $index ) ) {
1292                                                 $text = $wgLang->getNsText( $index );
1293                                                 $found = true;
1294                                         }
1295                                 }
1296                         }
1297                 }
1298
1299                 # LOCALURL and LOCALURLE
1300                 if ( !$found ) {
1301                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1302                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1303
1304                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1305                                 $func = 'getLocalURL';
1306                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1307                                 $func = 'escapeLocalURL';
1308                         } else {
1309                                 $func = '';
1310                         }
1311
1312                         if ( $func !== '' ) {
1313                                 $title = Title::newFromText( $part1 );
1314                                 if ( !is_null( $title ) ) {
1315                                         if ( $argc > 0 ) {
1316                                                 $text = $title->$func( $args[0] );
1317                                         } else {
1318                                                 $text = $title->$func();
1319                                         }
1320                                         $found = true;
1321                                 }
1322                         }
1323                 }
1324
1325                 # Internal variables
1326                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1327                         $text = $this->mVariables[$part1];
1328                         $found = true;
1329                         $this->mOutput->mContainsOldMagic = true;
1330                 }
1331
1332                 # Arguments input from the caller
1333                 $inputArgs = end( $this->mArgStack );
1334                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1335                         $text = $inputArgs[$part1];
1336                         $found = true;
1337                 }
1338
1339                 # Load from database
1340                 if ( !$found ) {
1341                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1342                         if ( !is_null( $title ) && !$title->isExternal() ) {
1343                                 # Check for excessive inclusion
1344                                 $dbk = $title->getPrefixedDBkey();
1345                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1346                                         $article = new Article( $title );
1347                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1348                                         if ( $articleContent !== false ) {
1349                                                 $found = true;
1350                                                 $text = $articleContent;
1351
1352                                         }
1353                                 }
1354
1355                                 # If the title is valid but undisplayable, make a link to it
1356                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1357                                         $text = "[[" . $title->getPrefixedText() . "]]";
1358                                         $found = true;
1359                                 }
1360                         }
1361                 }
1362
1363                 # Recursive parsing, escaping and link table handling
1364                 # Only for HTML output
1365                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1366                         $text = wfEscapeWikiText( $text );
1367                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1368                         # Clean up argument array
1369                         $assocArgs = array();
1370                         $index = 1;
1371                         foreach( $args as $arg ) {
1372                                 $eqpos = strpos( $arg, "=" );
1373                                 if ( $eqpos === false ) {
1374                                         $assocArgs[$index++] = $arg;
1375                                 } else {
1376                                         $name = trim( substr( $arg, 0, $eqpos ) );
1377                                         $value = trim( substr( $arg, $eqpos+1 ) );
1378                                         if ( $value === false ) {
1379                                                 $value = "";
1380                                         }
1381                                         if ( $name !== false ) {
1382                                                 $assocArgs[$name] = $value;
1383                                         }
1384                                 }
1385                         }
1386
1387                         # Do not enter included links in link table
1388                         if ( !is_null( $title ) ) {
1389                                 $wgLinkCache->suspend();
1390                         }
1391
1392                         # Run full parser on the included text
1393                         $text = $this->strip( $text, $this->mStripState );
1394                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1395
1396                         # Add the result to the strip state for re-inclusion after
1397                         # the rest of the processing
1398                         $text = $this->insertStripItem( $text, $this->mStripState );
1399
1400                         # Resume the link cache and register the inclusion as a link
1401                         if ( !is_null( $title ) ) {
1402                                 $wgLinkCache->resume();
1403                                 $wgLinkCache->addLinkObj( $title );
1404                         }
1405                 }
1406
1407                 if ( !$found ) {
1408                         return $matches[0];
1409                 } else {
1410                         return $newline . $text;
1411                 }
1412         }
1413
1414         # Returns true if the function is allowed to include this entity
1415         function incrementIncludeCount( $dbk )
1416         {
1417                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1418                         $this->mIncludeCount[$dbk] = 0;
1419                 }
1420                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1421                         return true;
1422                 } else {
1423                         return false;
1424                 }
1425         }
1426
1427
1428         # Cleans up HTML, removes dangerous tags and attributes
1429         /* private */ function removeHTMLtags( $text )
1430         {
1431                 $fname = "Parser::removeHTMLtags";
1432                 wfProfileIn( $fname );
1433                 $htmlpairs = array( # Tags that must be closed
1434                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1435                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1436                         "strike", "strong", "tt", "var", "div", "center",
1437                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1438                         "ruby", "rt" , "rb" , "rp", "p"
1439                 );
1440                 $htmlsingle = array(
1441                         "br", "hr", "li", "dt", "dd"
1442                 );
1443                 $htmlnest = array( # Tags that can be nested--??
1444                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1445                         "dl", "font", "big", "small", "sub", "sup"
1446                 );
1447                 $tabletags = array( # Can only appear inside table
1448                         "td", "th", "tr"
1449                 );
1450
1451                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1452                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1453
1454                 $htmlattrs = $this->getHTMLattrs () ;
1455
1456                 # Remove HTML comments
1457                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1458
1459                 $bits = explode( "<", $text );
1460                 $text = array_shift( $bits );
1461                 $tagstack = array(); $tablestack = array();
1462
1463                 foreach ( $bits as $x ) {
1464                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1465                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1466                           $x, $regs );
1467                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1468                         error_reporting( $prev );
1469
1470                         $badtag = 0 ;
1471                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1472                                 # Check our stack
1473                                 if ( $slash ) {
1474                                         # Closing a tag...
1475                                         if ( ! in_array( $t, $htmlsingle ) &&
1476                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1477                                                 array_push( $tagstack, $ot );
1478                                                 $badtag = 1;
1479                                         } else {
1480                                                 if ( $t == "table" ) {
1481                                                         $tagstack = array_pop( $tablestack );
1482                                                 }
1483                                                 $newparams = "";
1484                                         }
1485                                 } else {
1486                                         # Keep track for later
1487                                         if ( in_array( $t, $tabletags ) &&
1488                                           ! in_array( "table", $tagstack ) ) {
1489                                                 $badtag = 1;
1490                                         } else if ( in_array( $t, $tagstack ) &&
1491                                           ! in_array ( $t , $htmlnest ) ) {
1492                                                 $badtag = 1 ;
1493                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1494                                                 if ( $t == "table" ) {
1495                                                         array_push( $tablestack, $tagstack );
1496                                                         $tagstack = array();
1497                                                 }
1498                                                 array_push( $tagstack, $t );
1499                                         }
1500                                         # Strip non-approved attributes from the tag
1501                                         $newparams = $this->fixTagAttributes($params);
1502
1503                                 }
1504                                 if ( ! $badtag ) {
1505                                         $rest = str_replace( ">", "&gt;", $rest );
1506                                         $text .= "<$slash$t $newparams$brace$rest";
1507                                         continue;
1508                                 }
1509                         }
1510                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1511                 }
1512                 # Close off any remaining tags
1513                 while ( $t = array_pop( $tagstack ) ) {
1514                         $text .= "</$t>\n";
1515                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1516                 }
1517                 wfProfileOut( $fname );
1518                 return $text;
1519         }
1520
1521 /*
1522  *
1523  * This function accomplishes several tasks:
1524  * 1) Auto-number headings if that option is enabled
1525  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1526  * 3) Add a Table of contents on the top for users who have enabled the option
1527  * 4) Auto-anchor headings
1528  *
1529  * It loops through all headlines, collects the necessary data, then splits up the
1530  * string and re-inserts the newly formatted headlines.
1531  *
1532  */
1533
1534         /* private */ function formatHeadings( $text )
1535         {
1536                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1537                 $doShowToc = $this->mOptions->getShowToc();
1538                 if( !$this->mTitle->userCanEdit() ) {
1539                         $showEditLink = 0;
1540                         $rightClickHack = 0;
1541                 } else {
1542                         $showEditLink = $this->mOptions->getEditSection();
1543                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1544                 }
1545
1546                 # Inhibit editsection links if requested in the page
1547                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1548                 if( $esw->matchAndRemove( $text ) ) {
1549                         $showEditLink = 0;
1550                 }
1551                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1552                 # do not add TOC
1553                 $mw =& MagicWord::get( MAG_NOTOC );
1554                 if( $mw->matchAndRemove( $text ) ) {
1555                         $doShowToc = 0;
1556                 }
1557
1558                 # never add the TOC to the Main Page. This is an entry page that should not
1559                 # be more than 1-2 screens large anyway
1560                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1561                         $doShowToc = 0;
1562                 }
1563
1564                 # Get all headlines for numbering them and adding funky stuff like [edit]
1565                 # links - this is for later, but we need the number of headlines right now
1566                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1567
1568                 # if there are fewer than 4 headlines in the article, do not show TOC
1569                 if( $numMatches < 4 ) {
1570                         $doShowToc = 0;
1571                 }
1572
1573                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1574                 # override above conditions and always show TOC
1575                 $mw =& MagicWord::get( MAG_FORCETOC );
1576                 if ($mw->matchAndRemove( $text ) ) {
1577                         $doShowToc = 1;
1578                 }
1579
1580
1581                 # We need this to perform operations on the HTML
1582                 $sk =& $this->mOptions->getSkin();
1583
1584                 # headline counter
1585                 $headlineCount = 0;
1586
1587                 # Ugh .. the TOC should have neat indentation levels which can be
1588                 # passed to the skin functions. These are determined here
1589                 $toclevel = 0;
1590                 $toc = "";
1591                 $full = "";
1592                 $head = array();
1593                 $sublevelCount = array();
1594                 $level = 0;
1595                 $prevlevel = 0;
1596                 foreach( $matches[3] as $headline ) {
1597                         $numbering = "";
1598                         if( $level ) {
1599                                 $prevlevel = $level;
1600                         }
1601                         $level = $matches[1][$headlineCount];
1602                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1603                                 # reset when we enter a new level
1604                                 $sublevelCount[$level] = 0;
1605                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1606                                 $toclevel += $level - $prevlevel;
1607                         }
1608                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1609                                 # reset when we step back a level
1610                                 $sublevelCount[$level+1]=0;
1611                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1612                                 $toclevel -= $prevlevel - $level;
1613                         }
1614                         # count number of headlines for each level
1615                         @$sublevelCount[$level]++;
1616                         if( $doNumberHeadings || $doShowToc ) {
1617                                 $dot = 0;
1618                                 for( $i = 1; $i <= $level; $i++ ) {
1619                                         if( !empty( $sublevelCount[$i] ) ) {
1620                                                 if( $dot ) {
1621                                                         $numbering .= ".";
1622                                                 }
1623                                                 $numbering .= $sublevelCount[$i];
1624                                                 $dot = 1;
1625                                         }
1626                                 }
1627                         }
1628
1629                         # The canonized header is a version of the header text safe to use for links
1630                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1631                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1632
1633                         # strip out HTML
1634                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1635                         $tocline = trim( $canonized_headline );
1636                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1637                         $refer[$headlineCount] = $canonized_headline;
1638
1639                         # count how many in assoc. array so we can track dupes in anchors
1640                         @$refers[$canonized_headline]++;
1641                         $refcount[$headlineCount]=$refers[$canonized_headline];
1642
1643                         # Prepend the number to the heading text
1644
1645                         if( $doNumberHeadings || $doShowToc ) {
1646                                 $tocline = $numbering . " " . $tocline;
1647
1648                                 # Don't number the heading if it is the only one (looks silly)
1649                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1650                                         # the two are different if the line contains a link
1651                                         $headline=$numbering . " " . $headline;
1652                                 }
1653                         }
1654
1655                         # Create the anchor for linking from the TOC to the section
1656                         $anchor = $canonized_headline;
1657                         if($refcount[$headlineCount] > 1 ) {
1658                                 $anchor .= "_" . $refcount[$headlineCount];
1659                         }
1660                         if( $doShowToc ) {
1661                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1662                         }
1663                         if( $showEditLink ) {
1664                                 if ( empty( $head[$headlineCount] ) ) {
1665                                         $head[$headlineCount] = "";
1666                                 }
1667                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1668                         }
1669
1670                         # Add the edit section span
1671                         if( $rightClickHack ) {
1672                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1673                         }
1674
1675                         # give headline the correct <h#> tag
1676                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1677
1678                         $headlineCount++;
1679                 }
1680
1681                 if( $doShowToc ) {
1682                         $toclines = $headlineCount;
1683                         $toc .= $sk->tocUnindent( $toclevel );
1684                         $toc = $sk->tocTable( $toc );
1685                 }
1686
1687                 # split up and insert constructed headlines
1688
1689                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1690                 $i = 0;
1691
1692                 foreach( $blocks as $block ) {
1693                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1694                             # This is the [edit] link that appears for the top block of text when
1695                                 # section editing is enabled
1696
1697                                 # Disabled because it broke block formatting
1698                                 # For example, a bullet point in the top line
1699                                 # $full .= $sk->editSectionLink(0);
1700                         }
1701                         $full .= $block;
1702                         if( $doShowToc && !$i) {
1703                         # Top anchor now in skin
1704                                 $full = $full.$toc;
1705                         }
1706
1707                         if( !empty( $head[$i] ) ) {
1708                                 $full .= $head[$i];
1709                         }
1710                         $i++;
1711                 }
1712
1713                 return $full;
1714         }
1715
1716         /* private */ function doMagicISBN( &$tokenizer )
1717         {
1718                 global $wgLang;
1719
1720                 # Check whether next token is a text token
1721                 # If yes, fetch it and convert the text into a
1722                 # Special::BookSources link
1723                 $token = $tokenizer->previewToken();
1724                 while ( $token["type"] == "" )
1725                 {
1726                         $tokenizer->nextToken();
1727                         $token = $tokenizer->previewToken();
1728                 }
1729                 if ( $token["type"] == "text" )
1730                 {
1731                         $token = $tokenizer->nextToken();
1732                         $x = $token["text"];
1733                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1734
1735                         $isbn = $blank = "" ;
1736                         while ( " " == $x{0} ) {
1737                                 $blank .= " ";
1738                                 $x = substr( $x, 1 );
1739                         }
1740                         while ( strstr( $valid, $x{0} ) != false ) {
1741                                 $isbn .= $x{0};
1742                                 $x = substr( $x, 1 );
1743                         }
1744                         $num = str_replace( "-", "", $isbn );
1745                         $num = str_replace( " ", "", $num );
1746
1747                         if ( "" == $num ) {
1748                                 $text = "ISBN $blank$x";
1749                         } else {
1750                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1751                                 $text = "<a href=\"" .
1752                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1753                                         "\" class=\"internal\">ISBN $isbn</a>";
1754                                 $text .= $x;
1755                         }
1756                 } else {
1757                         $text = "ISBN ";
1758                 }
1759                 return $text;
1760         }
1761         /* private */ function doMagicRFC( &$tokenizer )
1762         {
1763                 global $wgLang;
1764
1765                 # Check whether next token is a text token
1766                 # If yes, fetch it and convert the text into a
1767                 # link to an RFC source
1768                 $token = $tokenizer->previewToken();
1769                 while ( $token["type"] == "" )
1770                 {
1771                         $tokenizer->nextToken();
1772                         $token = $tokenizer->previewToken();
1773                 }
1774                 if ( $token["type"] == "text" )
1775                 {
1776                         $token = $tokenizer->nextToken();
1777                         $x = $token["text"];
1778                         $valid = "0123456789";
1779
1780                         $rfc = $blank = "" ;
1781                         while ( " " == $x{0} ) {
1782                                 $blank .= " ";
1783                                 $x = substr( $x, 1 );
1784                         }
1785                         while ( strstr( $valid, $x{0} ) != false ) {
1786                                 $rfc .= $x{0};
1787                                 $x = substr( $x, 1 );
1788                         }
1789
1790                         if ( "" == $rfc ) {
1791                                 $text .= "RFC $blank$x";
1792                         } else {
1793                                 $url = wfmsg( "rfcurl" );
1794                                 $url = str_replace( "$1", $rfc, $url);
1795                                 $sk =& $this->mOptions->getSkin();
1796                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1797                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1798                         }
1799                 } else {
1800                         $text = "RFC ";
1801                 }
1802                 return $text;
1803         }
1804
1805         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1806         {
1807                 $this->mOptions = $options;
1808                 $this->mTitle =& $title;
1809                 $this->mOutputType = OT_WIKI;
1810
1811                 if ( $clearState ) {
1812                         $this->clearState();
1813                 }
1814
1815                 $stripState = false;
1816                 $pairs = array(
1817                         "\r\n" => "\n",
1818                         );
1819                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1820                 // now with regexes
1821                 $pairs = array(
1822                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1823                         "/<br *?>/i" => "<br/>",
1824                 );
1825                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1826                 $text = $this->strip( $text, $stripState, false );
1827                 $text = $this->pstPass2( $text, $user );
1828                 $text = $this->unstrip( $text, $stripState );
1829                 return $text;
1830         }
1831
1832         /* private */ function pstPass2( $text, &$user )
1833         {
1834                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1835
1836                 # Variable replacement
1837                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1838                 $text = $this->replaceVariables( $text );
1839
1840                 # Signatures
1841                 #
1842                 $n = $user->getName();
1843                 $k = $user->getOption( "nickname" );
1844                 if ( "" == $k ) { $k = $n; }
1845                 if(isset($wgLocaltimezone)) {
1846                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1847                 }
1848                 /* Note: this is an ugly timezone hack for the European wikis */
1849                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1850                   " (" . date( "T" ) . ")";
1851                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1852
1853                 $text = preg_replace( "/~~~~~/", $d, $text );
1854                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1855                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1856                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1857                   Namespace::getUser() ) . ":$n|$k]]", $text );
1858
1859                 # Context links: [[|name]] and [[name (context)|]]
1860                 #
1861                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1862                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1863                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1864                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1865
1866                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1867                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1868                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1869                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1870                                                                                                                 # [[ns:page (cont)|]]
1871                 $context = "";
1872                 $t = $this->mTitle->getText();
1873                 if ( preg_match( $conpat, $t, $m ) ) {
1874                         $context = $m[2];
1875                 }
1876                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1877                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1878                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1879
1880                 if ( "" == $context ) {
1881                         $text = preg_replace( $p2, "[[\\1]]", $text );
1882                 } else {
1883                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1884                 }
1885
1886                 /*
1887                 $mw =& MagicWord::get( MAG_SUBST );
1888                 $wgCurParser = $this->fork();
1889                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1890                 $this->merge( $wgCurParser );
1891                 */
1892
1893                 # Trim trailing whitespace
1894                 # MAG_END (__END__) tag allows for trailing
1895                 # whitespace to be deliberately included
1896                 $text = rtrim( $text );
1897                 $mw =& MagicWord::get( MAG_END );
1898                 $mw->matchAndRemove( $text );
1899
1900                 return $text;
1901         }
1902
1903         # Set up some variables which are usually set up in parse()
1904         # so that an external function can call some class members with confidence
1905         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1906         {
1907                 $this->mTitle =& $title;
1908                 $this->mOptions = $options;
1909                 $this->mOutputType = $outputType;
1910                 if ( $clearState ) {
1911                         $this->clearState();
1912                 }
1913         }
1914
1915         function transformMsg( $text, $options ) {
1916                 global $wgTitle;
1917                 static $executing = false;
1918
1919                 # Guard against infinite recursion
1920                 if ( $executing ) {
1921                         return $text;
1922                 }
1923                 $executing = true;
1924
1925                 $this->mTitle = $wgTitle;
1926                 $this->mOptions = $options;
1927                 $this->mOutputType = OT_MSG;
1928                 $this->clearState();
1929                 $text = $this->replaceVariables( $text );
1930
1931                 $executing = false;
1932                 return $text;
1933         }
1934 }
1935
1936 class ParserOutput
1937 {
1938         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1939
1940         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1941                 $containsOldMagic = false )
1942         {
1943                 $this->mText = $text;
1944                 $this->mLanguageLinks = $languageLinks;
1945                 $this->mCategoryLinks = $categoryLinks;
1946                 $this->mContainsOldMagic = $containsOldMagic;
1947         }
1948
1949         function getText() { return $this->mText; }
1950         function getLanguageLinks() { return $this->mLanguageLinks; }
1951         function getCategoryLinks() { return $this->mCategoryLinks; }
1952         function containsOldMagic() { return $this->mContainsOldMagic; }
1953         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1954         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1955         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1956         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1957
1958         function merge( $other ) {
1959                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1960                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1961                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1962         }
1963
1964 }
1965
1966 class ParserOptions
1967 {
1968         # All variables are private
1969         var $mUseTeX;                    # Use texvc to expand <math> tags
1970         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1971         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1972         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1973         var $mAllowExternalImages;       # Allow external images inline
1974         var $mSkin;                      # Reference to the preferred skin
1975         var $mDateFormat;                # Date format index
1976         var $mEditSection;               # Create "edit section" links
1977         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1978         var $mNumberHeadings;            # Automatically number headings
1979         var $mShowToc;                   # Show table of contents
1980
1981         function getUseTeX() { return $this->mUseTeX; }
1982         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1983         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1984         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1985         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1986         function getSkin() { return $this->mSkin; }
1987         function getDateFormat() { return $this->mDateFormat; }
1988         function getEditSection() { return $this->mEditSection; }
1989         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1990         function getNumberHeadings() { return $this->mNumberHeadings; }
1991         function getShowToc() { return $this->mShowToc; }
1992
1993         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1994         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1995         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1996         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1997         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1998         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1999         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2000         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2001         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2002         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2003         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2004
2005         /* static */ function newFromUser( &$user )
2006         {
2007                 $popts = new ParserOptions;
2008                 $popts->initialiseFromUser( $user );
2009                 return $popts;
2010         }
2011
2012         function initialiseFromUser( &$userInput )
2013         {
2014                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2015
2016                 if ( !$userInput ) {
2017                         $user = new User;
2018                         $user->setLoaded( true );
2019                 } else {
2020                         $user =& $userInput;
2021                 }
2022
2023                 $this->mUseTeX = $wgUseTeX;
2024                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2025                 $this->mUseDynamicDates = $wgUseDynamicDates;
2026                 $this->mInterwikiMagic = $wgInterwikiMagic;
2027                 $this->mAllowExternalImages = $wgAllowExternalImages;
2028                 $this->mSkin =& $user->getSkin();
2029                 $this->mDateFormat = $user->getOption( "date" );
2030                 $this->mEditSection = $user->getOption( "editsection" );
2031                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2032                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2033                 $this->mShowToc = $user->getOption( "showtoc" );
2034         }
2035
2036
2037 }
2038
2039 # Regex callbacks, used in Parser::replaceVariables
2040 function wfBraceSubstitution( $matches )
2041 {
2042         global $wgCurParser;
2043         return $wgCurParser->braceSubstitution( $matches );
2044 }
2045
2046 ?>