includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div style="text-align:center;">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 # only once and last
 106                 $text = $this->doBlockLevels( $text, $linestart );
 107                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 108
 109                 $this->mOutput->setText( $text );
 110                 wfProfileOut( $fname );
 111                 return $this->mOutput;
 112         }
 113
 114         /* static */ function getRandomString()
 115         {
 116                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 117         }
 118
 119         # Replaces all occurrences of <$tag>content</$tag> in the text
 120         # with a random marker and returns the new text. the output parameter
 121         # $content will be an associative array filled with data on the form
 122         # $unique_marker => content.
 123
 124         # If $content is already set, the additional entries will be appended
 125
 126         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 127                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 128                 if ( !$content ) {
 129                         $content = array( );
 130                 }
 131                 $n = 1;
 132                 $stripped = "";
 133
 134                 while ( "" != $text ) {
 135                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 136                         $stripped .= $p[0];
 137                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 138                                 $text = "";
 139                         } else {
 140                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 141                                 $marker = $rnd . sprintf("%08X", $n++);
 142                                 $content[$marker] = $q[0];
 143                                 $stripped .= $marker;
 144                                 $text = $q[1];
 145                         }
 146                 }
 147                 return $stripped;
 148         }
 149
 150         # Strips <nowiki>, <pre> and <math>
 151         # Returns the text, and fills an array with data needed in unstrip()
 152         # If the $state is already a valid strip state, it adds to the state
 153         #
 154         function strip( $text, &$state )
 155         {
 156                 $render = ($this->mOutputType == OT_HTML);
 157                 $nowiki_content = array();
 158                 $hiero_content = array();
 159                 $math_content = array();
 160                 $pre_content = array();
 161                 $item_content = array();
 162
 163                 # Replace any instances of the placeholders
 164                 $uniq_prefix = UNIQ_PREFIX;
 165                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 166
 167                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 168                 foreach( $nowiki_content as $marker => $content ){
 169                         if( $render ){
 170                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 171                         } else {
 172                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 173                         }
 174                 }
 175
 176                 if( $GLOBALS['wgUseWikiHiero'] ){
 177                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 178                         foreach( $hiero_content as $marker => $content ){
 179                                 if( $render ){
 180                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 181                                 } else {
 182                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 183                                 }
 184                         }
 185                 }
 186
 187                 if( $this->mOptions->getUseTeX() ){
 188                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 189                         foreach( $math_content as $marker => $content ){
 190                                 if( $render ){
 191                                         $math_content[$marker] = renderMath( $content );
 192                                 } else {
 193                                         $math_content[$marker] = "<math>$content</math>";
 194                                 }
 195                         }
 196                 }
 197
 198                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 199                 foreach( $pre_content as $marker => $content ){
 200                         if( $render ){
 201                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 202                         } else {
 203                                 $pre_content[$marker] = "<pre>$content</pre>";
 204                         }
 205                 }
 206
 207                 # Merge state with the pre-existing state, if there is one
 208                 if ( $state ) {
 209                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 210                         $state['hiero'] = $state['hiero'] + $hiero_content;
 211                         $state['math'] = $state['math'] + $math_content;
 212                         $state['pre'] = $state['pre'] + $pre_content;
 213                 } else {
 214                         $state = array(
 215                           'nowiki' => $nowiki_content,
 216                           'hiero' => $hiero_content,
 217                           'math' => $math_content,
 218                           'pre' => $pre_content,
 219                           'item' => $item_content
 220                         );
 221                 }
 222                 return $text;
 223         }
 224
 225         function unstrip( $text, &$state )
 226         {
 227                 # Must expand in reverse order, otherwise nested tags will be corrupted
 228                 $contentDict = end( $state );
 229                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 230                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 231                                 $text = str_replace( key( $contentDict ), $content, $text );
 232                         }
 233                 }
 234
 235                 return $text;
 236         }
 237
 238         # Add an item to the strip state
 239         # Returns the unique tag which must be inserted into the stripped text
 240         # The tag will be replaced with the original text in unstrip()
 241
 242         function insertStripItem( $text, &$state )
 243         {
 244                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 245                 if ( !$state ) {
 246                         $state = array(
 247                           'nowiki' => array(),
 248                           'hiero' => array(),
 249                           'math' => array(),
 250                           'pre' => array(),
 251                           'item' => array()
 252                         );
 253                 }
 254                 $state['item'][$rnd] = $text;
 255                 return $rnd;
 256         }
 257
 258         function categoryMagic ()
 259         {
 260                 global $wgLang , $wgUser ;
 261                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 262                 $id = $this->mTitle->getArticleID() ;
 263                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 264                 $ti = $this->mTitle->getText() ;
 265                 $ti = explode ( ":" , $ti , 2 ) ;
 266                 if ( $cat != $ti[0] ) return "" ;
 267                 $r = '<br style="clear:both;"/>\n';
 268
 269                 $articles = array() ;
 270                 $parents = array () ;
 271                 $children = array() ;
 272
 273
 274 #               $sk =& $this->mGetSkin();
 275                 $sk =& $wgUser->getSkin() ;
 276
 277                 $data = array () ;
 278                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 279                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 280
 281                 $res = wfQuery ( $sql1, DB_READ ) ;
 282                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 283
 284                 $res = wfQuery ( $sql2, DB_READ ) ;
 285                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 286
 287
 288                 foreach ( $data AS $x )
 289                 {
 290                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 291                         if ( $t != "" ) $t .= ":" ;
 292                         $t .= $x->cur_title ;
 293
 294                         $y = explode ( ":" , $t , 2 ) ;
 295                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 296                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 297                         } else {
 298                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 299                         }
 300                 }
 301                 wfFreeResult ( $res ) ;
 302
 303                 # Children
 304                 if ( count ( $children ) > 0 )
 305                 {
 306                         asort ( $children ) ;
 307                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 308                         $r .= implode ( ", " , $children ) ;
 309                 }
 310
 311                 # Articles
 312                 if ( count ( $articles ) > 0 )
 313                 {
 314                         asort ( $articles ) ;
 315                         $h =  wfMsg( "category_header", $ti[1] );
 316                         $r .= "<h2>{$h}</h2>\n" ;
 317                         $r .= implode ( ", " , $articles ) ;
 318                 }
 319
 320
 321                 return $r ;
 322         }
 323
 324         function getHTMLattrs ()
 325         {
 326                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 327                                 "title", "align", "lang", "dir", "width", "height",
 328                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 329                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 330                                 /* FONT */ "type", "start", "value", "compact",
 331                                 /* For various lists, mostly deprecated but safe */
 332                                 "summary", "width", "border", "frame", "rules",
 333                                 "cellspacing", "cellpadding", "valign", "char",
 334                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 335                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 336                                 "id", "class", "name", "style" /* For CSS */
 337                                 );
 338                 return $htmlattrs ;
 339         }
 340
 341         function fixTagAttributes ( $t )
 342         {
 343                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 344                 $htmlattrs = $this->getHTMLattrs() ;
 345
 346                 # Strip non-approved attributes from the tag
 347                 $t = preg_replace(
 348                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 349                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 350                         $t);
 351                 # Strip javascript "expression" from stylesheets. Brute force approach:
 352                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 353
 354                 if( preg_match(
 355                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 356                         wfMungeToUtf8( $t ) ) )
 357                 {
 358                         $t="";
 359                 }
 360
 361                 return trim ( $t ) ;
 362         }
 363
 364         function doTableStuff ( $t )
 365         {
 366                 $t = explode ( "\n" , $t ) ;
 367                 $td = array () ; # Is currently a td tag open?
 368                         $ltd = array () ; # Was it TD or TH?
 369                         $tr = array () ; # Is currently a tr tag open?
 370                         $ltr = array () ; # tr attributes
 371                         foreach ( $t AS $k => $x )
 372                         {
 373                                 $x = rtrim ( $x ) ;
 374                                 $fc = substr ( $x , 0 , 1 ) ;
 375                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 376                                 {
 377                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 378                                         array_push ( $td , false ) ;
 379                                         array_push ( $ltd , "" ) ;
 380                                         array_push ( $tr , false ) ;
 381                                         array_push ( $ltr , "" ) ;
 382                                 }
 383                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 384                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 385                                 {
 386                                         $z = "</table>\n" ;
 387                                         $l = array_pop ( $ltd ) ;
 388                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 389                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 390                                         array_pop ( $ltr ) ;
 391                                         $t[$k] = $z ;
 392                                 }
 393                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 394                                                 {
 395                                                 $z = trim ( substr ( $x , 2 ) ) ;
 396                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 397                                                 }*/
 398                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 399                                 {
 400                                         $x = substr ( $x , 1 ) ;
 401                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 402                                         $z = "" ;
 403                                         $l = array_pop ( $ltd ) ;
 404                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 405                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 406                                         array_pop ( $ltr ) ;
 407                                         $t[$k] = $z ;
 408                                         array_push ( $tr , false ) ;
 409                                         array_push ( $td , false ) ;
 410                                         array_push ( $ltd , "" ) ;
 411                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 412                                 }
 413                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 414                                 {
 415                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 416                                         {
 417                                                 $fc = "+" ;
 418                                                 $x = substr ( $x , 1 ) ;
 419                                         }
 420                                         $after = substr ( $x , 1 ) ;
 421                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 422                                         $after = explode ( "||" , $after ) ;
 423                                         $t[$k] = "" ;
 424                                         foreach ( $after AS $theline )
 425                                         {
 426                                                 $z = "" ;
 427                                                 if ( $fc != "+" )
 428                                                 {
 429                                                         $tra = array_pop ( $ltr ) ;
 430                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 431                                                         array_push ( $tr , true ) ;
 432                                                         array_push ( $ltr , "" ) ;
 433                                                 }
 434
 435                                                 $l = array_pop ( $ltd ) ;
 436                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 437                                                 if ( $fc == "|" ) $l = "td" ;
 438                                                 else if ( $fc == "!" ) $l = "th" ;
 439                                                 else if ( $fc == "+" ) $l = "caption" ;
 440                                                 else $l = "" ;
 441                                                 array_push ( $ltd , $l ) ;
 442                                                 $y = explode ( "|" , $theline , 2 ) ;
 443                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 444                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 445                                                 $t[$k] .= $y ;
 446                                                 array_push ( $td , true ) ;
 447                                         }
 448                                 }
 449                         }
 450
 451                 # Closing open td, tr && table
 452                 while ( count ( $td ) > 0 )
 453                 {
 454                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 455                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 456                         $t[] = "</table>" ;
 457                 }
 458
 459                 $t = implode ( "\n" , $t ) ;
 460                 #               $t = $this->removeHTMLtags( $t );
 461                 return $t ;
 462         }
 463
 464         function internalParse( $text, $linestart, $args = array() )
 465         {
 466                 $fname = "Parser::internalParse";
 467                 wfProfileIn( $fname );
 468
 469                 $text = $this->removeHTMLtags( $text );
 470                 $text = $this->replaceVariables( $text, $args );
 471
 472                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 473
 474                 $text = $this->doHeadings( $text );
 475                 if($this->mOptions->getUseDynamicDates()) {
 476                         global $wgDateFormatter;
 477                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 478                 }
 479                 $text = $this->replaceExternalLinks( $text );
 480                 $text = $this->doTokenizedParser ( $text );
 481                 $text = $this->doTableStuff ( $text ) ;
 482                 $text = $this->formatHeadings( $text );
 483                 $sk =& $this->mOptions->getSkin();
 484                 $text = $sk->transformContent( $text );
 485
 486                 $text .= $this->categoryMagic () ;
 487
 488                 wfProfileOut( $fname );
 489                 return $text;
 490         }
 491
 492
 493         /* private */ function doHeadings( $text )
 494         {
 495                 for ( $i = 6; $i >= 1; --$i ) {
 496                         $h = substr( "======", 0, $i );
 497                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 498                           "<h{$i}>\\1</h{$i}>\\2", $text );
 499                 }
 500                 return $text;
 501         }
 502
 503         # Note: we have to do external links before the internal ones,
 504         # and otherwise take great care in the order of things here, so
 505         # that we don't end up interpreting some URLs twice.
 506
 507         /* private */ function replaceExternalLinks( $text )
 508         {
 509                 $fname = "Parser::replaceExternalLinks";
 510                 wfProfileIn( $fname );
 511                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 512                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 513                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 514                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 515                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 516                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 517                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 518                 wfProfileOut( $fname );
 519                 return $text;
 520         }
 521
 522         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 523         {
 524                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 525                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 526
 527                 # this is  the list of separators that should be ignored if they
 528                 # are the last character of an URL but that should be included
 529                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 530                 # in this case, the last comma should not become part of the URL,
 531                 # but in "www.foo.com/123,2342,32.htm" it should.
 532                 $sep = ",;\.:";
 533                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 534                 $images = "gif|png|jpg|jpeg";
 535
 536                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 537                 # they are interpreted as part of the string (used to tell PHP
 538                 # that the content of the string should be inserted there).
 539                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 540                   "((?i){$images})([^{$uc}]|$)/";
 541
 542                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 543                 $sk =& $this->mOptions->getSkin();
 544
 545                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 546                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 547                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 548                 }
 549                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 550                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 551                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 552                   "</a>\\5", $s );
 553                 $s = str_replace( $unique, $protocol, $s );
 554
 555                 $a = explode( "[{$protocol}:", " " . $s );
 556                 $s = array_shift( $a );
 557                 $s = substr( $s, 1 );
 558
 559                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 560                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 561
 562                 foreach ( $a as $line ) {
 563                         if ( preg_match( $e1, $line, $m ) ) {
 564                                 $link = "{$protocol}:{$m[1]}";
 565                                 $trail = $m[2];
 566                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 567                                 else { $text = wfEscapeHTML( $link ); }
 568                         } else if ( preg_match( $e2, $line, $m ) ) {
 569                                 $link = "{$protocol}:{$m[1]}";
 570                                 $text = $m[2];
 571                                 $trail = $m[3];
 572                         } else {
 573                                 $s .= "[{$protocol}:" . $line;
 574                                 continue;
 575                         }
 576                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 577                                 $paren = "";
 578                         } else {
 579                                 # Expand the URL for printable version
 580                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 581                         }
 582                         $la = $sk->getExternalLinkAttributes( $link, $text );
 583                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 584
 585                 }
 586                 return $s;
 587         }
 588
 589         /* private */ function handle3Quotes( &$state, $token )
 590         {
 591                 if ( $state["strong"] !== false ) {
 592                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 593                         {
 594                                 # ''' lala ''lala '''
 595                                 $s = "</em></strong><em>";
 596                         } else {
 597                                 $s = "</strong>";
 598                         }
 599                         $state["strong"] = FALSE;
 600                 } else {
 601                         $s = "<strong>";
 602                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 603                 }
 604                 return $s;
 605         }
 606
 607         /* private */ function handle2Quotes( &$state, $token )
 608         {
 609                 if ( $state["em"] !== false ) {
 610                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 611                         {
 612                                 # ''lala'''lala'' ....'''
 613                                 $s = "</strong></em><strong>";
 614                         } else {
 615                                 $s = "</em>";
 616                         }
 617                         $state["em"] = FALSE;
 618                 } else {
 619                         $s = "<em>";
 620                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 621
 622                 }
 623                 return $s;
 624         }
 625
 626         /* private */ function handle5Quotes( &$state, $token )
 627         {
 628                 $s = "";
 629                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 630                         if ( $state["em"] < $state["strong"] ) {
 631                                 $s .= "</strong></em>";
 632                         } else {
 633                                 $s .= "</em></strong>";
 634                         }
 635                         $state["strong"] = $state["em"] = FALSE;
 636                 } elseif ( $state["em"] !== false ) {
 637                         $s .= "</em><strong>";
 638                         $state["em"] = FALSE;
 639                         $state["strong"] = $token["pos"];
 640                 } elseif ( $state["strong"] !== false ) {
 641                         $s .= "</strong><em>";
 642                         $state["strong"] = FALSE;
 643                         $state["em"] = $token["pos"];
 644                 } else { # not $em and not $strong
 645                         $s .= "<strong><em>";
 646                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 647                 }
 648                 return $s;
 649         }
 650
 651         /* private */ function doTokenizedParser( $str )
 652         {
 653                 global $wgLang; # for language specific parser hook
 654
 655                 $tokenizer=Tokenizer::newFromString( $str );
 656                 $tokenStack = array();
 657
 658                 $s="";
 659                 $state["em"]      = FALSE;
 660                 $state["strong"]  = FALSE;
 661                 $tagIsOpen = FALSE;
 662                 $threeopen = false;
 663
 664                 # The tokenizer splits the text into tokens and returns them one by one.
 665                 # Every call to the tokenizer returns a new token.
 666                 while ( $token = $tokenizer->nextToken() )
 667                 {
 668                         switch ( $token["type"] )
 669                         {
 670                                 case "text":
 671                                         # simple text with no further markup
 672                                         $txt = $token["text"];
 673                                         break;
 674                                 case "[[[":
 675                                         # remember the tag opened with 3 [
 676                                         $threeopen = true;
 677                                 case "[[":
 678                                         # link opening tag.
 679                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 680                                         $tagIsOpen = TRUE;
 681                                         array_push( $tokenStack, $token );
 682                                         $txt="";
 683                                         break;
 684
 685                                 case "]]]":
 686                                 case "]]":
 687                                         # link close tag.
 688                                         # get text from stack, glue it together, and call the code to handle a
 689                                         # link
 690
 691                                         if ( count( $tokenStack ) == 0 )
 692                                         {
 693                                                 # stack empty. Found a ]] without an opening [[
 694                                                 $txt = "]]";
 695                                         } else {
 696                                                 $linkText = "";
 697                                                 $lastToken = array_pop( $tokenStack );
 698                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 699                                                 {
 700                                                         if( !empty( $lastToken["text"] ) ) {
 701                                                                 $linkText = $lastToken["text"] . $linkText;
 702                                                         }
 703                                                         $lastToken = array_pop( $tokenStack );
 704                                                 }
 705
 706                                                 $txt = $linkText ."]]";
 707
 708                                                 if( isset( $lastToken["text"] ) ) {
 709                                                         $prefix = $lastToken["text"];
 710                                                 } else {
 711                                                         $prefix = "";
 712                                                 }
 713                                                 $nextToken = $tokenizer->previewToken();
 714                                                 if ( $nextToken["type"] == "text" )
 715                                                 {
 716                                                         # Preview just looks at it. Now we have to fetch it.
 717                                                         $nextToken = $tokenizer->nextToken();
 718                                                         $txt .= $nextToken["text"];
 719                                                 }
 720                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 721
 722                                                 # did the tag start with 3 [ ?
 723                                                 if($threeopen) {
 724                                                         # show the first as text
 725                                                         $txt = "[".$txt;
 726                                                         $threeopen=false;
 727                                                 }
 728
 729                                         }
 730                                         $tagIsOpen = (count( $tokenStack ) != 0);
 731                                         break;
 732                                 case "----":
 733                                         $txt = "\n<hr />\n";
 734                                         break;
 735                                 case "'''":
 736                                         # This and the three next ones handle quotes
 737                                         $txt = $this->handle3Quotes( $state, $token );
 738                                         break;
 739                                 case "''":
 740                                         $txt = $this->handle2Quotes( $state, $token );
 741                                         break;
 742                                 case "'''''":
 743                                         $txt = $this->handle5Quotes( $state, $token );
 744                                         break;
 745                                 case "":
 746                                         # empty token
 747                                         $txt="";
 748                                         break;
 749                                 case "RFC ":
 750                                         if ( $tagIsOpen ) {
 751                                                 $txt = "RFC ";
 752                                         } else {
 753                                                 $txt = $this->doMagicRFC( $tokenizer );
 754                                         }
 755                                         break;
 756                                 case "ISBN ":
 757                                         if ( $tagIsOpen ) {
 758                                                 $txt = "ISBN ";
 759                                         } else {
 760                                                 $txt = $this->doMagicISBN( $tokenizer );
 761                                         }
 762                                         break;
 763                                 default:
 764                                         # Call language specific Hook.
 765                                         $txt = $wgLang->processToken( $token, $tokenStack );
 766                                         if ( NULL == $txt ) {
 767                                                 # An unkown token. Highlight.
 768                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 769                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 770                                         }
 771                                         break;
 772                         }
 773                         # If we're parsing the interior of a link, don't append the interior to $s,
 774                         # but push it to the stack so it can be processed when a ]] token is found.
 775                         if ( $tagIsOpen  && $txt != "" ) {
 776                                 $token["type"] = "text";
 777                                 $token["text"] = $txt;
 778                                 array_push( $tokenStack, $token );
 779                         } else {
 780                                 $s .= $txt;
 781                         }
 782                 } #end while
 783                 if ( count( $tokenStack ) != 0 )
 784                 {
 785                         # still objects on stack. opened [[ tag without closing ]] tag.
 786                         $txt = "";
 787                         while ( $lastToken = array_pop( $tokenStack ) )
 788                         {
 789                                 if ( $lastToken["type"] == "text" )
 790                                 {
 791                                         $txt = $lastToken["text"] . $txt;
 792                                 } else {
 793                                         $txt = $lastToken["type"] . $txt;
 794                                 }
 795                         }
 796                         $s .= $txt;
 797                 }
 798                 return $s;
 799         }
 800
 801         /* private */ function handleInternalLink( $line, $prefix )
 802         {
 803                 global $wgLang, $wgLinkCache;
 804                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 805                 static $fname = "Parser::handleInternalLink" ;
 806                 wfProfileIn( $fname );
 807
 808                 wfProfileIn( "$fname-setup" );
 809                 static $tc = FALSE;
 810                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 811                 $sk =& $this->mOptions->getSkin();
 812
 813                 # Match a link having the form [[namespace:link|alternate]]trail
 814                 static $e1 = FALSE;
 815                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 816                 # Match the end of a line for a word that's not followed by whitespace,
 817                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 818                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 819                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 820                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 821
 822
 823                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 824                 static $image = FALSE;
 825                 static $special = FALSE;
 826                 static $media = FALSE;
 827                 static $category = FALSE;
 828                 if ( !$image ) { $image = Namespace::getImage(); }
 829                 if ( !$special ) { $special = Namespace::getSpecial(); }
 830                 if ( !$media ) { $media = Namespace::getMedia(); }
 831                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 832
 833                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 834
 835                 wfProfileOut( "$fname-setup" );
 836                 $s = "";
 837
 838                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 839                         $text = $m[2];
 840                         $trail = $m[3];
 841                 } else { # Invalid form; output directly
 842                         $s .= $prefix . "[[" . $line ;
 843                         return $s;
 844                 }
 845
 846                 /* Valid link forms:
 847                 Foobar -- normal
 848                 :Foobar -- override special treatment of prefix (images, language links)
 849                 /Foobar -- convert to CurrentPage/Foobar
 850                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 851                 */
 852                 $c = substr($m[1],0,1);
 853                 $noforce = ($c != ":");
 854                 if( $c == "/" ) { # subpage
 855                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 856                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 857                                 $noslash=$m[1];
 858                         } else {
 859                                 $noslash=substr($m[1],1);
 860                         }
 861                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 862                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 863                                 if( "" == $text ) {
 864                                         $text= $m[1];
 865                                 } # this might be changed for ugliness reasons
 866                         } else {
 867                                 $link = $noslash; # no subpage allowed, use standard link
 868                         }
 869                 } elseif( $noforce ) { # no subpage
 870                         $link = $m[1];
 871                 } else {
 872                         $link = substr( $m[1], 1 );
 873                 }
 874                 if( "" == $text )
 875                         $text = $link;
 876
 877                 $nt = Title::newFromText( $link );
 878                 if( !$nt ) {
 879                         $s .= $prefix . "[[" . $line;
 880                         return $s;
 881                 }
 882                 $ns = $nt->getNamespace();
 883                 $iw = $nt->getInterWiki();
 884                 if( $noforce ) {
 885                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 886                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 887                                 return (trim($s) == '')? '': $s;
 888                         }
 889                         if( $ns == $image ) {
 890                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 891                                 $wgLinkCache->addImageLinkObj( $nt );
 892                                 return $s;
 893                         }
 894                 }
 895                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 896                     ( strpos( $link, "#" ) == FALSE ) ) {
 897                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 898                         return $s;
 899                 }
 900
 901                 # Category feature
 902                 $catns = strtoupper ( $nt->getDBkey () ) ;
 903                 $catns = explode ( ":" , $catns ) ;
 904                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 905                 else $catns = "" ;
 906                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 907                         $t = explode ( ":" , $nt->getText() ) ;
 908                         array_shift ( $t ) ;
 909                         $t = implode ( ":" , $t ) ;
 910                         $t = $wgLang->ucFirst ( $t ) ;
 911                         $nnt = Title::newFromText ( $category.":".$t ) ;
 912                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 913                         $this->mOutput->mCategoryLinks[] = $t ;
 914                         $s .= $prefix . $trail ;
 915                         return $s ;
 916                 }
 917
 918                 if( $ns == $media ) {
 919                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 920                         $wgLinkCache->addImageLinkObj( $nt );
 921                         return $s;
 922                 } elseif( $ns == $special ) {
 923                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 924                         return $s;
 925                 }
 926                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 927
 928                 wfProfileOut( $fname );
 929                 return $s;
 930         }
 931
 932         # Some functions here used by doBlockLevels()
 933         #
 934         /* private */ function closeParagraph()
 935         {
 936                 $result = "";
 937                 if ( '' != $this->mLastSection ) {
 938                         $result = "</" . $this->mLastSection  . ">\n";
 939                 }
 940                 $this->mInPre = false;
 941                 $this->mLastSection = "";
 942                 return $result;
 943         }
 944         # getCommon() returns the length of the longest common substring
 945         # of both arguments, starting at the beginning of both.
 946         #
 947         /* private */ function getCommon( $st1, $st2 )
 948         {
 949                 $fl = strlen( $st1 );
 950                 $shorter = strlen( $st2 );
 951                 if ( $fl < $shorter ) { $shorter = $fl; }
 952
 953                 for ( $i = 0; $i < $shorter; ++$i ) {
 954                         if ( $st1{$i} != $st2{$i} ) { break; }
 955                 }
 956                 return $i;
 957         }
 958         # These next three functions open, continue, and close the list
 959         # element appropriate to the prefix character passed into them.
 960         #
 961         /* private */ function openList( $char )
 962     {
 963                 $result = $this->closeParagraph();
 964
 965                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 966                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 967                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 968                 else if ( ";" == $char ) {
 969                         $result .= "<dl><dt>";
 970                         $this->mDTopen = true;
 971                 }
 972                 else { $result = "<!-- ERR 1 -->"; }
 973
 974                 return $result;
 975         }
 976
 977         /* private */ function nextItem( $char )
 978         {
 979                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 980                 else if ( ":" == $char || ";" == $char ) {
 981                         $close = "</dd>";
 982                         if ( $this->mDTopen ) { $close = "</dt>"; }
 983                         if ( ";" == $char ) {
 984                                 $this->mDTopen = true;
 985                                 return $close . "<dt>";
 986                         } else {
 987                                 $this->mDTopen = false;
 988                                 return $close . "<dd>";
 989                         }
 990                 }
 991                 return "<!-- ERR 2 -->";
 992         }
 993
 994         /* private */function closeList( $char )
 995         {
 996                 if ( "*" == $char ) { $text = "</li></ul>"; }
 997                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 998                 else if ( ":" == $char ) {
 999                         if ( $this->mDTopen ) {
1000                                 $this->mDTopen = false;
1001                                 $text = "</dt></dl>";
1002                         } else {
1003                                 $text = "</dd></dl>";
1004                         }
1005                 }
1006                 else {  return "<!-- ERR 3 -->"; }
1007                 return $text."\n";
1008         }
1009
1010         /* private */ function doBlockLevels( $text, $linestart )
1011         {
1012                 $fname = "Parser::doBlockLevels";
1013                 wfProfileIn( $fname );
1014                 # Parsing through the text line by line.  The main thing
1015                 # happening here is handling of block-level elements p, pre,
1016                 # and making lists from lines starting with * # : etc.
1017                 #
1018                 $a = explode( "\n", $text );
1019
1020                 $lastPref = $text = $lastLine = '';
1021                 $this->mDTopen = $inBlockElem = false;
1022                 $npl = 0;
1023                 $pstack = false;
1024
1025                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1026                 foreach ( $a as $t ) {
1027                         $oLine = $t;
1028                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1029                         $preOpenMatch = preg_match("/<pre/i", $t );
1030                         if (!$this->mInPre) {
1031                                 $this->mInPre = ($preOpenMatch)? true : false;
1032                         }
1033                         if ( !$this->mInPre ) {
1034                                 $opl = strlen( $lastPref );
1035                                 $npl = strspn( $t, "*#:;" );
1036                                 $pref = substr( $t, 0, $npl );
1037                                 $pref2 = str_replace( ";", ":", $pref );
1038                                 $t = substr( $t, $npl );
1039                                 // list generation
1040                                 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1041                                         $text .= $this->nextItem( substr( $pref, -1 ) );
1042                                         if ( $pstack ) { $pstack = false; }
1043
1044                                         if ( ";" == substr( $pref, -1 ) ) {
1045                                                 $cpos = strpos( $t, ":" );
1046                                                 if ( false !== $cpos ) {
1047                                                         $term = substr( $t, 0, $cpos );
1048                                                         $text .= $term . $this->nextItem( ":" );
1049                                                         $t = substr( $t, $cpos + 1 );
1050                                                 }
1051                                         }
1052                                 } else if (0 != $npl || 0 != $opl) {
1053                                         $cpl = $this->getCommon( $pref, $lastPref );
1054                                         if ( $pstack ) { $pstack = false; }
1055
1056                                         while ( $cpl < $opl ) {
1057                                                 $text .= $this->closeList( $lastPref{$opl-1} );
1058                                                 --$opl;
1059                                         }
1060                                         if ( $npl <= $cpl && $cpl > 0 ) {
1061                                                 $text .= $this->nextItem( $pref{$cpl-1} );
1062                                         }
1063                                         while ( $npl > $cpl ) {
1064                                                 $char = substr( $pref, $cpl, 1 );
1065                                                 $text .= $this->openList( $char );
1066
1067                                                 if ( ";" == $char ) {
1068                                                         $cpos = strpos( $t, ":" );
1069                                                         if ( ! ( false === $cpos ) ) {
1070                                                                 $term = substr( $t, 0, $cpos );
1071                                                                 $text .= $term . $this->nextItem( ":" );
1072                                                                 $t = substr( $t, $cpos + 1 );
1073                                                         }
1074                                                 }
1075                                                 ++$cpl;
1076                                         }
1077                                         $lastPref = $pref2;
1078                                 }
1079                         }
1080                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1081                                 $uniq_prefix = UNIQ_PREFIX;
1082                                 // XXX: use a stack for nestable elements like span, table and div
1083                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1084                                 $closematch = preg_match(
1085                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1086                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1087                                 if ( $openmatch or $closematch ) {
1088                                         if ( $pstack ) { $pstack = false; }
1089                                         $text .= $this->closeParagraph();
1090                                         if($preOpenMatch and !$preCloseMatch) {
1091                                                 $this->mInPre = true;
1092                                         }
1093                                         if ( $closematch  ) {
1094                                                 $inBlockElem = false;
1095                                         } else {
1096                                                 $inBlockElem = true;
1097                                         }
1098                                 } else if ( !$inBlockElem ) {
1099                                         if ( " " == $t{0} ) {
1100                                                 // pre
1101                                                 if ($this->mLastSection != 'pre') {
1102                                                         $pstack = false;
1103                                                         $text .= $this->closeParagraph().'<pre>';
1104                                                         $this->mLastSection = 'pre';
1105                                                 }
1106                                         } else {
1107                                                 // paragraph
1108                                                 if ( '' == trim($t) ) {
1109                                                         if ( $pstack ) {
1110                                                                 $text .= $pstack.'<br/>';
1111                                                                 $pstack = false;
1112                                                                 $this->mLastSection = 'p';
1113                                                         } else {
1114                                                                 if ($this->mLastSection != 'p' ) {
1115                                                                         $text .= $this->closeParagraph();
1116                                                                         $this->mLastSection = '';
1117                                                                         $pstack = "<p>";
1118                                                                 } else {
1119                                                                         $pstack = '</p><p>';
1120                                                                 }
1121                                                         }
1122                                                 } else {
1123                                                         if ( $pstack ) {
1124                                                                 $text .= $pstack;
1125                                                                 $pstack = false;
1126                                                                 $this->mLastSection = 'p';
1127                                                         } else if ($this->mLastSection != 'p') {
1128                                                                 $text .= $this->closeParagraph().'<p>';
1129                                                                 $this->mLastSection = 'p';
1130                                                         }
1131                                                 }
1132                                         }
1133                                 }
1134                         }
1135                         if ($pstack === false) {
1136                                 $text .= $t."\n";
1137                         }
1138                 }
1139                 while ( $npl ) {
1140                         $text .= $this->closeList( $pref2{$npl-1} );
1141                         --$npl;
1142                 }
1143                 if ( "" != $this->mLastSection ) {
1144                         $text .= "</" . $this->mLastSection . ">";
1145                         $this->mLastSection = "";
1146                 }
1147
1148                 wfProfileOut( $fname );
1149                 return $text;
1150         }
1151
1152         function getVariableValue( $index ) {
1153                 global $wgLang, $wgSitename, $wgServer;
1154
1155                 switch ( $index ) {
1156                         case MAG_CURRENTMONTH:
1157                                 return date( "m" );
1158                         case MAG_CURRENTMONTHNAME:
1159                                 return $wgLang->getMonthName( date("n") );
1160                         case MAG_CURRENTMONTHNAMEGEN:
1161                                 return $wgLang->getMonthNameGen( date("n") );
1162                         case MAG_CURRENTDAY:
1163                                 return date("j");
1164                         case MAG_CURRENTDAYNAME:
1165                                 return $wgLang->getWeekdayName( date("w")+1 );
1166                         case MAG_CURRENTYEAR:
1167                                 return date( "Y" );
1168                         case MAG_CURRENTTIME:
1169                                 return $wgLang->time( wfTimestampNow(), false );
1170                         case MAG_NUMBEROFARTICLES:
1171                                 return wfNumberOfArticles();
1172                         case MAG_SITENAME:
1173                                 return $wgSitename;
1174                         case MAG_SERVER:
1175                                 return $wgServer;
1176                         default:
1177                                 return NULL;
1178                 }
1179         }
1180
1181         function initialiseVariables()
1182         {
1183                 global $wgVariableIDs;
1184                 $this->mVariables = array();
1185                 foreach ( $wgVariableIDs as $id ) {
1186                         $mw =& MagicWord::get( $id );
1187                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1188                 }
1189         }
1190
1191         /* private */ function replaceVariables( $text, $args = array() )
1192         {
1193                 global $wgLang, $wgScript, $wgArticlePath;
1194
1195                 $fname = "Parser::replaceVariables";
1196                 wfProfileIn( $fname );
1197
1198                 $bail = false;
1199                 if ( !$this->mVariables ) {
1200                         $this->initialiseVariables();
1201                 }
1202                 $titleChars = Title::legalChars();
1203                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1204
1205                 # This function is called recursively. To keep track of arguments we need a stack:
1206                 array_push( $this->mArgStack, $args );
1207
1208                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1209                 $GLOBALS['wgCurParser'] =& $this;
1210                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1211
1212                 array_pop( $this->mArgStack );
1213
1214                 return $text;
1215         }
1216
1217         function braceSubstitution( $matches )
1218         {
1219                 global $wgLinkCache, $wgLang;
1220                 $fname = "Parser::braceSubstitution";
1221                 $found = false;
1222                 $nowiki = false;
1223                 $title = NULL;
1224
1225                 # $newline is an optional newline character before the braces
1226                 # $part1 is the bit before the first |, and must contain only title characters
1227                 # $args is a list of arguments, starting from index 0, not including $part1
1228
1229                 $newline = $matches[1];
1230                 $part1 = $matches[2];
1231                 # If the third subpattern matched anything, it will start with |
1232                 if ( $matches[3] !== "" ) {
1233                         $args = explode( "|", substr( $matches[3], 1 ) );
1234                 } else {
1235                         $args = array();
1236                 }
1237                 $argc = count( $args );
1238
1239                 # SUBST
1240                 $mwSubst =& MagicWord::get( MAG_SUBST );
1241                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1242                         if ( $this->mOutputType != OT_WIKI ) {
1243                                 # Invalid SUBST not replaced at PST time
1244                                 # Return without further processing
1245                                 $text = $matches[0];
1246                                 $found = true;
1247                         }
1248                 } elseif ( $this->mOutputType == OT_WIKI ) {
1249                         # SUBST not found in PST pass, do nothing
1250                         $text = $matches[0];
1251                         $found = true;
1252                 }
1253
1254                 # MSG, MSGNW and INT
1255                 if ( !$found ) {
1256                         # Check for MSGNW:
1257                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1258                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1259                                 $nowiki = true;
1260                         } else {
1261                                 # Remove obsolete MSG:
1262                                 $mwMsg =& MagicWord::get( MAG_MSG );
1263                                 $mwMsg->matchStartAndRemove( $part1 );
1264                         }
1265
1266                         # Check if it is an internal message
1267                         $mwInt =& MagicWord::get( MAG_INT );
1268                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1269                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1270                                         $text = wfMsgReal( $part1, $args, true );
1271                                         $found = true;
1272                                 }
1273                         }
1274                 }
1275
1276                 # NS
1277                 if ( !$found ) {
1278                         # Check for NS: (namespace expansion)
1279                         $mwNs = MagicWord::get( MAG_NS );
1280                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1281                                 if ( intval( $part1 ) ) {
1282                                         $text = $wgLang->getNsText( intval( $part1 ) );
1283                                         $found = true;
1284                                 } else {
1285                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1286                                         if ( !is_null( $index ) ) {
1287                                                 $text = $wgLang->getNsText( $index );
1288                                                 $found = true;
1289                                         }
1290                                 }
1291                         }
1292                 }
1293
1294                 # LOCALURL and LOCALURLE
1295                 if ( !$found ) {
1296                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1297                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1298
1299                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1300                                 $func = 'getLocalURL';
1301                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1302                                 $func = 'escapeLocalURL';
1303                         } else {
1304                                 $func = '';
1305                         }
1306
1307                         if ( $func !== '' ) {
1308                                 $title = Title::newFromText( $part1 );
1309                                 if ( !is_null( $title ) ) {
1310                                         if ( $argc > 0 ) {
1311                                                 $text = $title->$func( $args[0] );
1312                                         } else {
1313                                                 $text = $title->$func();
1314                                         }
1315                                         $found = true;
1316                                 }
1317                         }
1318                 }
1319
1320                 # Internal variables
1321                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1322                         $text = $this->mVariables[$part1];
1323                         $found = true;
1324                         $this->mOutput->mContainsOldMagic = true;
1325                 }
1326
1327                 # Arguments input from the caller
1328                 $inputArgs = end( $this->mArgStack );
1329                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1330                         $text = $inputArgs[$part1];
1331                         $found = true;
1332                 }
1333
1334                 # Load from database
1335                 if ( !$found ) {
1336                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1337                         if ( !is_null( $title ) && !$title->isExternal() ) {
1338                                 # Check for excessive inclusion
1339                                 $dbk = $title->getPrefixedDBkey();
1340                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1341                                         $article = new Article( $title );
1342                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1343                                         if ( $articleContent !== false ) {
1344                                                 $found = true;
1345                                                 $text = $articleContent;
1346
1347                                         }
1348                                 }
1349
1350                                 # If the title is valid but undisplayable, make a link to it
1351                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1352                                         $text = "[[" . $title->getPrefixedText() . "]]";
1353                                         $found = true;
1354                                 }
1355                         }
1356                 }
1357
1358                 # Recursive parsing, escaping and link table handling
1359                 # Only for HTML output
1360                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1361                         $text = wfEscapeWikiText( $text );
1362                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1363                         # Clean up argument array
1364                         $assocArgs = array();
1365                         $index = 1;
1366                         foreach( $args as $arg ) {
1367                                 $eqpos = strpos( $arg, "=" );
1368                                 if ( $eqpos === false ) {
1369                                         $assocArgs[$index++] = $arg;
1370                                 } else {
1371                                         $name = trim( substr( $arg, 0, $eqpos ) );
1372                                         $value = trim( substr( $arg, $eqpos+1 ) );
1373                                         if ( $value === false ) {
1374                                                 $value = "";
1375                                         }
1376                                         if ( $name !== false ) {
1377                                                 $assocArgs[$name] = $value;
1378                                         }
1379                                 }
1380                         }
1381
1382                         # Do not enter included links in link table
1383                         if ( !is_null( $title ) ) {
1384                                 $wgLinkCache->suspend();
1385                         }
1386
1387                         # Run full parser on the included text
1388                         $text = $this->strip( $text, $this->mStripState );
1389                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1390
1391                         # Add the result to the strip state for re-inclusion after
1392                         # the rest of the processing
1393                         $text = $this->insertStripItem( $text, $this->mStripState );
1394
1395                         # Resume the link cache and register the inclusion as a link
1396                         if ( !is_null( $title ) ) {
1397                                 $wgLinkCache->resume();
1398                                 $wgLinkCache->addLinkObj( $title );
1399                         }
1400                 }
1401
1402                 if ( !$found ) {
1403                         return $matches[0];
1404                 } else {
1405                         return $newline . $text;
1406                 }
1407         }
1408
1409         # Returns true if the function is allowed to include this entity
1410         function incrementIncludeCount( $dbk )
1411         {
1412                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1413                         $this->mIncludeCount[$dbk] = 0;
1414                 }
1415                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1416                         return true;
1417                 } else {
1418                         return false;
1419                 }
1420         }
1421
1422
1423         # Cleans up HTML, removes dangerous tags and attributes
1424         /* private */ function removeHTMLtags( $text )
1425         {
1426                 $fname = "Parser::removeHTMLtags";
1427                 wfProfileIn( $fname );
1428                 $htmlpairs = array( # Tags that must be closed
1429                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1430                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1431                         "strike", "strong", "tt", "var", "div", "center",
1432                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1433                         "ruby", "rt" , "rb" , "rp", "p"
1434                 );
1435                 $htmlsingle = array(
1436                         "br", "hr", "li", "dt", "dd"
1437                 );
1438                 $htmlnest = array( # Tags that can be nested--??
1439                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1440                         "dl", "font", "big", "small", "sub", "sup"
1441                 );
1442                 $tabletags = array( # Can only appear inside table
1443                         "td", "th", "tr"
1444                 );
1445
1446                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1447                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1448
1449                 $htmlattrs = $this->getHTMLattrs () ;
1450
1451                 # Remove HTML comments
1452                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1453
1454                 $bits = explode( "<", $text );
1455                 $text = array_shift( $bits );
1456                 $tagstack = array(); $tablestack = array();
1457
1458                 foreach ( $bits as $x ) {
1459                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1460                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1461                           $x, $regs );
1462                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1463                         error_reporting( $prev );
1464
1465                         $badtag = 0 ;
1466                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1467                                 # Check our stack
1468                                 if ( $slash ) {
1469                                         # Closing a tag...
1470                                         if ( ! in_array( $t, $htmlsingle ) &&
1471                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1472                                                 array_push( $tagstack, $ot );
1473                                                 $badtag = 1;
1474                                         } else {
1475                                                 if ( $t == "table" ) {
1476                                                         $tagstack = array_pop( $tablestack );
1477                                                 }
1478                                                 $newparams = "";
1479                                         }
1480                                 } else {
1481                                         # Keep track for later
1482                                         if ( in_array( $t, $tabletags ) &&
1483                                           ! in_array( "table", $tagstack ) ) {
1484                                                 $badtag = 1;
1485                                         } else if ( in_array( $t, $tagstack ) &&
1486                                           ! in_array ( $t , $htmlnest ) ) {
1487                                                 $badtag = 1 ;
1488                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1489                                                 if ( $t == "table" ) {
1490                                                         array_push( $tablestack, $tagstack );
1491                                                         $tagstack = array();
1492                                                 }
1493                                                 array_push( $tagstack, $t );
1494                                         }
1495                                         # Strip non-approved attributes from the tag
1496                                         $newparams = $this->fixTagAttributes($params);
1497
1498                                 }
1499                                 if ( ! $badtag ) {
1500                                         $rest = str_replace( ">", "&gt;", $rest );
1501                                         $text .= "<$slash$t $newparams$brace$rest";
1502                                         continue;
1503                                 }
1504                         }
1505                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1506                 }
1507                 # Close off any remaining tags
1508                 while ( $t = array_pop( $tagstack ) ) {
1509                         $text .= "</$t>\n";
1510                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1511                 }
1512                 wfProfileOut( $fname );
1513                 return $text;
1514         }
1515
1516 /*
1517  *
1518  * This function accomplishes several tasks:
1519  * 1) Auto-number headings if that option is enabled
1520  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1521  * 3) Add a Table of contents on the top for users who have enabled the option
1522  * 4) Auto-anchor headings
1523  *
1524  * It loops through all headlines, collects the necessary data, then splits up the
1525  * string and re-inserts the newly formatted headlines.
1526  *
1527  */
1528
1529         /* private */ function formatHeadings( $text )
1530         {
1531                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1532                 $doShowToc = $this->mOptions->getShowToc();
1533                 if( !$this->mTitle->userCanEdit() ) {
1534                         $showEditLink = 0;
1535                         $rightClickHack = 0;
1536                 } else {
1537                         $showEditLink = $this->mOptions->getEditSection();
1538                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1539                 }
1540
1541                 # Inhibit editsection links if requested in the page
1542                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1543                 if( $esw->matchAndRemove( $text ) ) {
1544                         $showEditLink = 0;
1545                 }
1546                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1547                 # do not add TOC
1548                 $mw =& MagicWord::get( MAG_NOTOC );
1549                 if( $mw->matchAndRemove( $text ) ) {
1550                         $doShowToc = 0;
1551                 }
1552
1553                 # never add the TOC to the Main Page. This is an entry page that should not
1554                 # be more than 1-2 screens large anyway
1555                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1556                         $doShowToc = 0;
1557                 }
1558
1559                 # Get all headlines for numbering them and adding funky stuff like [edit]
1560                 # links - this is for later, but we need the number of headlines right now
1561                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1562
1563                 # if there are fewer than 4 headlines in the article, do not show TOC
1564                 if( $numMatches < 4 ) {
1565                         $doShowToc = 0;
1566                 }
1567
1568                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1569                 # override above conditions and always show TOC
1570                 $mw =& MagicWord::get( MAG_FORCETOC );
1571                 if ($mw->matchAndRemove( $text ) ) {
1572                         $doShowToc = 1;
1573                 }
1574
1575
1576                 # We need this to perform operations on the HTML
1577                 $sk =& $this->mOptions->getSkin();
1578
1579                 # headline counter
1580                 $headlineCount = 0;
1581
1582                 # Ugh .. the TOC should have neat indentation levels which can be
1583                 # passed to the skin functions. These are determined here
1584                 $toclevel = 0;
1585                 $toc = "";
1586                 $full = "";
1587                 $head = array();
1588                 $sublevelCount = array();
1589                 $level = 0;
1590                 $prevlevel = 0;
1591                 foreach( $matches[3] as $headline ) {
1592                         $numbering = "";
1593                         if( $level ) {
1594                                 $prevlevel = $level;
1595                         }
1596                         $level = $matches[1][$headlineCount];
1597                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1598                                 # reset when we enter a new level
1599                                 $sublevelCount[$level] = 0;
1600                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1601                                 $toclevel += $level - $prevlevel;
1602                         }
1603                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1604                                 # reset when we step back a level
1605                                 $sublevelCount[$level+1]=0;
1606                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1607                                 $toclevel -= $prevlevel - $level;
1608                         }
1609                         # count number of headlines for each level
1610                         @$sublevelCount[$level]++;
1611                         if( $doNumberHeadings || $doShowToc ) {
1612                                 $dot = 0;
1613                                 for( $i = 1; $i <= $level; $i++ ) {
1614                                         if( !empty( $sublevelCount[$i] ) ) {
1615                                                 if( $dot ) {
1616                                                         $numbering .= ".";
1617                                                 }
1618                                                 $numbering .= $sublevelCount[$i];
1619                                                 $dot = 1;
1620                                         }
1621                                 }
1622                         }
1623
1624                         # The canonized header is a version of the header text safe to use for links
1625                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1626                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1627
1628                         # strip out HTML
1629                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1630                         $tocline = trim( $canonized_headline );
1631                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1632                         $refer[$headlineCount] = $canonized_headline;
1633
1634                         # count how many in assoc. array so we can track dupes in anchors
1635                         @$refers[$canonized_headline]++;
1636                         $refcount[$headlineCount]=$refers[$canonized_headline];
1637
1638                         # Prepend the number to the heading text
1639
1640                         if( $doNumberHeadings || $doShowToc ) {
1641                                 $tocline = $numbering . " " . $tocline;
1642
1643                                 # Don't number the heading if it is the only one (looks silly)
1644                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1645                                         # the two are different if the line contains a link
1646                                         $headline=$numbering . " " . $headline;
1647                                 }
1648                         }
1649
1650                         # Create the anchor for linking from the TOC to the section
1651                         $anchor = $canonized_headline;
1652                         if($refcount[$headlineCount] > 1 ) {
1653                                 $anchor .= "_" . $refcount[$headlineCount];
1654                         }
1655                         if( $doShowToc ) {
1656                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1657                         }
1658                         if( $showEditLink ) {
1659                                 if ( empty( $head[$headlineCount] ) ) {
1660                                         $head[$headlineCount] = "";
1661                                 }
1662                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1663                         }
1664
1665                         # Add the edit section span
1666                         if( $rightClickHack ) {
1667                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1668                         }
1669
1670                         # give headline the correct <h#> tag
1671                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1672
1673                         $headlineCount++;
1674                 }
1675
1676                 if( $doShowToc ) {
1677                         $toclines = $headlineCount;
1678                         $toc .= $sk->tocUnindent( $toclevel );
1679                         $toc = $sk->tocTable( $toc );
1680                 }
1681
1682                 # split up and insert constructed headlines
1683
1684                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1685                 $i = 0;
1686
1687                 foreach( $blocks as $block ) {
1688                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1689                             # This is the [edit] link that appears for the top block of text when
1690                                 # section editing is enabled
1691
1692                                 # Disabled because it broke block formatting
1693                                 # For example, a bullet point in the top line
1694                                 # $full .= $sk->editSectionLink(0);
1695                         }
1696                         $full .= $block;
1697                         if( $doShowToc && !$i) {
1698                         # Top anchor now in skin
1699                                 $full = $full.$toc;
1700                         }
1701
1702                         if( !empty( $head[$i] ) ) {
1703                                 $full .= $head[$i];
1704                         }
1705                         $i++;
1706                 }
1707
1708                 return $full;
1709         }
1710
1711         /* private */ function doMagicISBN( &$tokenizer )
1712         {
1713                 global $wgLang;
1714
1715                 # Check whether next token is a text token
1716                 # If yes, fetch it and convert the text into a
1717                 # Special::BookSources link
1718                 $token = $tokenizer->previewToken();
1719                 while ( $token["type"] == "" )
1720                 {
1721                         $tokenizer->nextToken();
1722                         $token = $tokenizer->previewToken();
1723                 }
1724                 if ( $token["type"] == "text" )
1725                 {
1726                         $token = $tokenizer->nextToken();
1727                         $x = $token["text"];
1728                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1729
1730                         $isbn = $blank = "" ;
1731                         while ( " " == $x{0} ) {
1732                                 $blank .= " ";
1733                                 $x = substr( $x, 1 );
1734                         }
1735                         while ( strstr( $valid, $x{0} ) != false ) {
1736                                 $isbn .= $x{0};
1737                                 $x = substr( $x, 1 );
1738                         }
1739                         $num = str_replace( "-", "", $isbn );
1740                         $num = str_replace( " ", "", $num );
1741
1742                         if ( "" == $num ) {
1743                                 $text = "ISBN $blank$x";
1744                         } else {
1745                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1746                                 $text = "<a href=\"" .
1747                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1748                                         "\" class=\"internal\">ISBN $isbn</a>";
1749                                 $text .= $x;
1750                         }
1751                 } else {
1752                         $text = "ISBN ";
1753                 }
1754                 return $text;
1755         }
1756         /* private */ function doMagicRFC( &$tokenizer )
1757         {
1758                 global $wgLang;
1759
1760                 # Check whether next token is a text token
1761                 # If yes, fetch it and convert the text into a
1762                 # link to an RFC source
1763                 $token = $tokenizer->previewToken();
1764                 while ( $token["type"] == "" )
1765                 {
1766                         $tokenizer->nextToken();
1767                         $token = $tokenizer->previewToken();
1768                 }
1769                 if ( $token["type"] == "text" )
1770                 {
1771                         $token = $tokenizer->nextToken();
1772                         $x = $token["text"];
1773                         $valid = "0123456789";
1774
1775                         $rfc = $blank = "" ;
1776                         while ( " " == $x{0} ) {
1777                                 $blank .= " ";
1778                                 $x = substr( $x, 1 );
1779                         }
1780                         while ( strstr( $valid, $x{0} ) != false ) {
1781                                 $rfc .= $x{0};
1782                                 $x = substr( $x, 1 );
1783                         }
1784
1785                         if ( "" == $rfc ) {
1786                                 $text .= "RFC $blank$x";
1787                         } else {
1788                                 $url = wfmsg( "rfcurl" );
1789                                 $url = str_replace( "$1", $rfc, $url);
1790                                 $sk =& $this->mOptions->getSkin();
1791                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1792                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1793                         }
1794                 } else {
1795                         $text = "RFC ";
1796                 }
1797                 return $text;
1798         }
1799
1800         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1801         {
1802                 $this->mOptions = $options;
1803                 $this->mTitle =& $title;
1804                 $this->mOutputType = OT_WIKI;
1805
1806                 if ( $clearState ) {
1807                         $this->clearState();
1808                 }
1809
1810                 $stripState = false;
1811                 $pairs = array(
1812                         "\r\n" => "\n",
1813                         );
1814                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1815                 // now with regexes
1816                 $pairs = array(
1817                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1818                         "/<br *?>/i" => "<br/>",
1819                 );
1820                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1821                 $text = $this->strip( $text, $stripState, false );
1822                 $text = $this->pstPass2( $text, $user );
1823                 $text = $this->unstrip( $text, $stripState );
1824                 return $text;
1825         }
1826
1827         /* private */ function pstPass2( $text, &$user )
1828         {
1829                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1830
1831                 # Variable replacement
1832                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1833                 $text = $this->replaceVariables( $text );
1834
1835                 # Signatures
1836                 #
1837                 $n = $user->getName();
1838                 $k = $user->getOption( "nickname" );
1839                 if ( "" == $k ) { $k = $n; }
1840                 if(isset($wgLocaltimezone)) {
1841                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1842                 }
1843                 /* Note: this is an ugly timezone hack for the European wikis */
1844                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1845                   " (" . date( "T" ) . ")";
1846                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1847
1848                 $text = preg_replace( "/~~~~~/", $d, $text );
1849                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1850                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1851                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1852                   Namespace::getUser() ) . ":$n|$k]]", $text );
1853
1854                 # Context links: [[|name]] and [[name (context)|]]
1855                 #
1856                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1857                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1858                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1859                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1860
1861                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1862                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1863                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1864                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1865                                                                                                                 # [[ns:page (cont)|]]
1866                 $context = "";
1867                 $t = $this->mTitle->getText();
1868                 if ( preg_match( $conpat, $t, $m ) ) {
1869                         $context = $m[2];
1870                 }
1871                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1872                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1873                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1874
1875                 if ( "" == $context ) {
1876                         $text = preg_replace( $p2, "[[\\1]]", $text );
1877                 } else {
1878                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1879                 }
1880
1881                 /*
1882                 $mw =& MagicWord::get( MAG_SUBST );
1883                 $wgCurParser = $this->fork();
1884                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1885                 $this->merge( $wgCurParser );
1886                 */
1887
1888                 # Trim trailing whitespace
1889                 # MAG_END (__END__) tag allows for trailing
1890                 # whitespace to be deliberately included
1891                 $text = rtrim( $text );
1892                 $mw =& MagicWord::get( MAG_END );
1893                 $mw->matchAndRemove( $text );
1894
1895                 return $text;
1896         }
1897
1898         # Set up some variables which are usually set up in parse()
1899         # so that an external function can call some class members with confidence
1900         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1901         {
1902                 $this->mTitle =& $title;
1903                 $this->mOptions = $options;
1904                 $this->mOutputType = $outputType;
1905                 if ( $clearState ) {
1906                         $this->clearState();
1907                 }
1908         }
1909
1910         function transformMsg( $text, $options ) {
1911                 global $wgTitle;
1912                 static $executing = false;
1913
1914                 # Guard against infinite recursion
1915                 if ( $executing ) {
1916                         return $text;
1917                 }
1918                 $executing = true;
1919
1920                 $this->mTitle = $wgTitle;
1921                 $this->mOptions = $options;
1922                 $this->mOutputType = OT_MSG;
1923                 $this->clearState();
1924                 $text = $this->replaceVariables( $text );
1925
1926                 $executing = false;
1927                 return $text;
1928         }
1929 }
1930
1931 class ParserOutput
1932 {
1933         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1934
1935         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1936                 $containsOldMagic = false )
1937         {
1938                 $this->mText = $text;
1939                 $this->mLanguageLinks = $languageLinks;
1940                 $this->mCategoryLinks = $categoryLinks;
1941                 $this->mContainsOldMagic = $containsOldMagic;
1942         }
1943
1944         function getText() { return $this->mText; }
1945         function getLanguageLinks() { return $this->mLanguageLinks; }
1946         function getCategoryLinks() { return $this->mCategoryLinks; }
1947         function containsOldMagic() { return $this->mContainsOldMagic; }
1948         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1949         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1950         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1951         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1952
1953         function merge( $other ) {
1954                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1955                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1956                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1957         }
1958
1959 }
1960
1961 class ParserOptions
1962 {
1963         # All variables are private
1964         var $mUseTeX;                    # Use texvc to expand <math> tags
1965         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1966         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1967         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1968         var $mAllowExternalImages;       # Allow external images inline
1969         var $mSkin;                      # Reference to the preferred skin
1970         var $mDateFormat;                # Date format index
1971         var $mEditSection;               # Create "edit section" links
1972         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1973         var $mNumberHeadings;            # Automatically number headings
1974         var $mShowToc;                   # Show table of contents
1975
1976         function getUseTeX() { return $this->mUseTeX; }
1977         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1978         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1979         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1980         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1981         function getSkin() { return $this->mSkin; }
1982         function getDateFormat() { return $this->mDateFormat; }
1983         function getEditSection() { return $this->mEditSection; }
1984         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1985         function getNumberHeadings() { return $this->mNumberHeadings; }
1986         function getShowToc() { return $this->mShowToc; }
1987
1988         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1989         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1990         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1991         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1992         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1993         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1994         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1995         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1996         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1997         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1998         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1999
2000         /* static */ function newFromUser( &$user )
2001         {
2002                 $popts = new ParserOptions;
2003                 $popts->initialiseFromUser( &$user );
2004                 return $popts;
2005         }
2006
2007         function initialiseFromUser( &$userInput )
2008         {
2009                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2010
2011                 if ( !$userInput ) {
2012                         $user = new User;
2013                         $user->setLoaded( true );
2014                 } else {
2015                         $user =& $userInput;
2016                 }
2017
2018                 $this->mUseTeX = $wgUseTeX;
2019                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2020                 $this->mUseDynamicDates = $wgUseDynamicDates;
2021                 $this->mInterwikiMagic = $wgInterwikiMagic;
2022                 $this->mAllowExternalImages = $wgAllowExternalImages;
2023                 $this->mSkin =& $user->getSkin();
2024                 $this->mDateFormat = $user->getOption( "date" );
2025                 $this->mEditSection = $user->getOption( "editsection" );
2026                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2027                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2028                 $this->mShowToc = $user->getOption( "showtoc" );
2029         }
2030
2031
2032 }
2033
2034 # Regex callbacks, used in Parser::replaceVariables
2035 function wfBraceSubstitution( $matches )
2036 {
2037         global $wgCurParser;
2038         return $wgCurParser->braceSubstitution( $matches );
2039 }
2040
2041 ?>