includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 $text = $this->unstrip( $text, $this->mStripState );
  95                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  96                 $fixtags = array(
  97                         "/<hr *>/i" => '<hr/>',
  98                         "/<br *>/i" => '<br/>',
  99                         "/<center *>/i"=>'<div class="center">',
 100                         "/<\\/center *>/i" => '</div>',
 101                         # Clean up spare ampersands; note that we probably ought to be
 102                         # more careful about named entities.
 103                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 104                 );
 105                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 106
 107                 # only once and last
 108                 $text = $this->doBlockLevels( $text, $linestart );
 109
 110                 $this->mOutput->setText( $text );
 111                 wfProfileOut( $fname );
 112                 return $this->mOutput;
 113         }
 114
 115         /* static */ function getRandomString()
 116         {
 117                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 118         }
 119
 120         # Replaces all occurrences of <$tag>content</$tag> in the text
 121         # with a random marker and returns the new text. the output parameter
 122         # $content will be an associative array filled with data on the form
 123         # $unique_marker => content.
 124
 125         # If $content is already set, the additional entries will be appended
 126
 127         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 128                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 129                 if ( !$content ) {
 130                         $content = array( );
 131                 }
 132                 $n = 1;
 133                 $stripped = "";
 134
 135                 while ( "" != $text ) {
 136                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 137                         $stripped .= $p[0];
 138                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 139                                 $text = "";
 140                         } else {
 141                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 142                                 $marker = $rnd . sprintf("%08X", $n++);
 143                                 $content[$marker] = $q[0];
 144                                 $stripped .= $marker;
 145                                 $text = $q[1];
 146                         }
 147                 }
 148                 return $stripped;
 149         }
 150
 151         # Strips <nowiki>, <pre> and <math>
 152         # Returns the text, and fills an array with data needed in unstrip()
 153         # If the $state is already a valid strip state, it adds to the state
 154         #
 155         function strip( $text, &$state )
 156         {
 157                 $render = ($this->mOutputType == OT_HTML);
 158                 $nowiki_content = array();
 159                 $hiero_content = array();
 160                 $math_content = array();
 161                 $pre_content = array();
 162                 $item_content = array();
 163
 164                 # Replace any instances of the placeholders
 165                 $uniq_prefix = UNIQ_PREFIX;
 166                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 167
 168                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 169                 foreach( $nowiki_content as $marker => $content ){
 170                         if( $render ){
 171                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 172                         } else {
 173                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 174                         }
 175                 }
 176
 177                 if( $GLOBALS['wgUseWikiHiero'] ){
 178                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 179                         foreach( $hiero_content as $marker => $content ){
 180                                 if( $render ){
 181                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 182                                 } else {
 183                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 184                                 }
 185                         }
 186                 }
 187
 188                 if( $this->mOptions->getUseTeX() ){
 189                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 190                         foreach( $math_content as $marker => $content ){
 191                                 if( $render ){
 192                                         $math_content[$marker] = renderMath( $content );
 193                                 } else {
 194                                         $math_content[$marker] = "<math>$content</math>";
 195                                 }
 196                         }
 197                 }
 198
 199                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 200                 foreach( $pre_content as $marker => $content ){
 201                         if( $render ){
 202                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 203                         } else {
 204                                 $pre_content[$marker] = "<pre>$content</pre>";
 205                         }
 206                 }
 207
 208                 # Merge state with the pre-existing state, if there is one
 209                 if ( $state ) {
 210                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 211                         $state['hiero'] = $state['hiero'] + $hiero_content;
 212                         $state['math'] = $state['math'] + $math_content;
 213                         $state['pre'] = $state['pre'] + $pre_content;
 214                 } else {
 215                         $state = array(
 216                           'nowiki' => $nowiki_content,
 217                           'hiero' => $hiero_content,
 218                           'math' => $math_content,
 219                           'pre' => $pre_content,
 220                           'item' => $item_content
 221                         );
 222                 }
 223                 return $text;
 224         }
 225
 226         function unstrip( $text, &$state )
 227         {
 228                 # Must expand in reverse order, otherwise nested tags will be corrupted
 229                 $contentDict = end( $state );
 230                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 231                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 232                                 $text = str_replace( key( $contentDict ), $content, $text );
 233                         }
 234                 }
 235
 236                 return $text;
 237         }
 238
 239         # Add an item to the strip state
 240         # Returns the unique tag which must be inserted into the stripped text
 241         # The tag will be replaced with the original text in unstrip()
 242
 243         function insertStripItem( $text, &$state )
 244         {
 245                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 246                 if ( !$state ) {
 247                         $state = array(
 248                           'nowiki' => array(),
 249                           'hiero' => array(),
 250                           'math' => array(),
 251                           'pre' => array(),
 252                           'item' => array()
 253                         );
 254                 }
 255                 $state['item'][$rnd] = $text;
 256                 return $rnd;
 257         }
 258
 259         # This method generates the list of subcategories and pages for a category
 260         function categoryMagic ()
 261         {
 262                 global $wgLang , $wgUser ;
 263                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 264
 265                 $cns = Namespace::getCategory() ;
 266                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 267
 268                 $r = "<br style=\"clear:both;\"/>\n";
 269
 270
 271                 $sk =& $wgUser->getSkin() ;
 272
 273                 $articles = array() ;
 274                 $children = array() ;
 275                 $data = array () ;
 276                 $id = $this->mTitle->getArticleID() ;
 277
 278                 # For existing categories
 279                 if( $id ) {
 280                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 281                         $res = wfQuery ( $sql, DB_READ ) ;
 282                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 283                 } else {
 284                         # For non-existing categories
 285                         $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
 286                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
 287                         $res = wfQuery ( $sql, DB_READ ) ;
 288                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 289                 }
 290
 291                 # For all pages that link to this category
 292                 foreach ( $data AS $x )
 293                 {
 294                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 295                         if ( $t != "" ) $t .= ":" ;
 296                         $t .= $x->cur_title ;
 297
 298                         if ( $x->cur_namespace == $cns ) {
 299                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 300                         } else {
 301                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 302                         }
 303                 }
 304                 wfFreeResult ( $res ) ;
 305
 306                 # Showing subcategories
 307                 if ( count ( $children ) > 0 )
 308                 {
 309                         asort ( $children ) ;
 310                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 311                         $r .= implode ( ", " , $children ) ;
 312                 }
 313
 314                 # Showing pages in this category
 315                 if ( count ( $articles ) > 0 )
 316                 {
 317                         $ti = $this->mTitle->getText() ;
 318                         asort ( $articles ) ;
 319                         $h =  wfMsg( "category_header", $ti );
 320                         $r .= "<h2>{$h}</h2>\n" ;
 321                         $r .= implode ( ", " , $articles ) ;
 322                 }
 323
 324
 325                 return $r ;
 326         }
 327
 328         function getHTMLattrs ()
 329         {
 330                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 331                                 "title", "align", "lang", "dir", "width", "height",
 332                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 333                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 334                                 /* FONT */ "type", "start", "value", "compact",
 335                                 /* For various lists, mostly deprecated but safe */
 336                                 "summary", "width", "border", "frame", "rules",
 337                                 "cellspacing", "cellpadding", "valign", "char",
 338                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 339                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 340                                 "id", "class", "name", "style" /* For CSS */
 341                                 );
 342                 return $htmlattrs ;
 343         }
 344
 345         function fixTagAttributes ( $t )
 346         {
 347                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 348                 $htmlattrs = $this->getHTMLattrs() ;
 349
 350                 # Strip non-approved attributes from the tag
 351                 $t = preg_replace(
 352                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 353                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 354                         $t);
 355                 # Strip javascript "expression" from stylesheets. Brute force approach:
 356                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 357
 358                 if( preg_match(
 359                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 360                         wfMungeToUtf8( $t ) ) )
 361                 {
 362                         $t="";
 363                 }
 364
 365                 return trim ( $t ) ;
 366         }
 367
 368         function doTableStuff ( $t )
 369         {
 370                 $t = explode ( "\n" , $t ) ;
 371                 $td = array () ; # Is currently a td tag open?
 372                         $ltd = array () ; # Was it TD or TH?
 373                         $tr = array () ; # Is currently a tr tag open?
 374                         $ltr = array () ; # tr attributes
 375                         foreach ( $t AS $k => $x )
 376                         {
 377                                 $x = trim ( $x ) ;
 378                                 $fc = substr ( $x , 0 , 1 ) ;
 379                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 380                                 {
 381                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 382                                         array_push ( $td , false ) ;
 383                                         array_push ( $ltd , "" ) ;
 384                                         array_push ( $tr , false ) ;
 385                                         array_push ( $ltr , "" ) ;
 386                                 }
 387                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 388                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 389                                 {
 390                                         $z = "</table>\n" ;
 391                                         $l = array_pop ( $ltd ) ;
 392                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 393                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 394                                         array_pop ( $ltr ) ;
 395                                         $t[$k] = $z ;
 396                                 }
 397                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 398                                                 {
 399                                                 $z = trim ( substr ( $x , 2 ) ) ;
 400                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 401                                                 }*/
 402                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 403                                 {
 404                                         $x = substr ( $x , 1 ) ;
 405                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 406                                         $z = "" ;
 407                                         $l = array_pop ( $ltd ) ;
 408                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 409                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 410                                         array_pop ( $ltr ) ;
 411                                         $t[$k] = $z ;
 412                                         array_push ( $tr , false ) ;
 413                                         array_push ( $td , false ) ;
 414                                         array_push ( $ltd , "" ) ;
 415                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 416                                 }
 417                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 418                                 {
 419                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 420                                         {
 421                                                 $fc = "+" ;
 422                                                 $x = substr ( $x , 1 ) ;
 423                                         }
 424                                         $after = substr ( $x , 1 ) ;
 425                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 426                                         $after = explode ( "||" , $after ) ;
 427                                         $t[$k] = "" ;
 428                                         foreach ( $after AS $theline )
 429                                         {
 430                                                 $z = "" ;
 431                                                 if ( $fc != "+" )
 432                                                 {
 433                                                         $tra = array_pop ( $ltr ) ;
 434                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 435                                                         array_push ( $tr , true ) ;
 436                                                         array_push ( $ltr , "" ) ;
 437                                                 }
 438
 439                                                 $l = array_pop ( $ltd ) ;
 440                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 441                                                 if ( $fc == "|" ) $l = "td" ;
 442                                                 else if ( $fc == "!" ) $l = "th" ;
 443                                                 else if ( $fc == "+" ) $l = "caption" ;
 444                                                 else $l = "" ;
 445                                                 array_push ( $ltd , $l ) ;
 446                                                 $y = explode ( "|" , $theline , 2 ) ;
 447                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 448                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 449                                                 $t[$k] .= $y ;
 450                                                 array_push ( $td , true ) ;
 451                                         }
 452                                 }
 453                         }
 454
 455                 # Closing open td, tr && table
 456                 while ( count ( $td ) > 0 )
 457                 {
 458                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 459                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 460                         $t[] = "</table>" ;
 461                 }
 462
 463                 $t = implode ( "\n" , $t ) ;
 464                 #               $t = $this->removeHTMLtags( $t );
 465                 return $t ;
 466         }
 467
 468         function internalParse( $text, $linestart, $args = array() )
 469         {
 470                 $fname = "Parser::internalParse";
 471                 wfProfileIn( $fname );
 472
 473                 $text = $this->removeHTMLtags( $text );
 474                 $text = $this->replaceVariables( $text, $args );
 475
 476                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 477
 478                 $text = $this->doHeadings( $text );
 479                 if($this->mOptions->getUseDynamicDates()) {
 480                         global $wgDateFormatter;
 481                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 482                 }
 483                 $text = $this->replaceExternalLinks( $text );
 484                 $text = $this->doTokenizedParser ( $text );
 485                 $text = $this->doTableStuff ( $text ) ;
 486                 $text = $this->formatHeadings( $text );
 487                 $sk =& $this->mOptions->getSkin();
 488                 $text = $sk->transformContent( $text );
 489
 490                 if ( !isset ( $this->categoryMagicDone ) ) {
 491                    $text .= $this->categoryMagic () ;
 492                    $this->categoryMagicDone = true ;
 493                    }
 494
 495                 wfProfileOut( $fname );
 496                 return $text;
 497         }
 498
 499
 500         /* private */ function doHeadings( $text )
 501         {
 502                 for ( $i = 6; $i >= 1; --$i ) {
 503                         $h = substr( "======", 0, $i );
 504                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 505                           "<h{$i}>\\1</h{$i}>\\2", $text );
 506                 }
 507                 return $text;
 508         }
 509
 510         # Note: we have to do external links before the internal ones,
 511         # and otherwise take great care in the order of things here, so
 512         # that we don't end up interpreting some URLs twice.
 513
 514         /* private */ function replaceExternalLinks( $text )
 515         {
 516                 $fname = "Parser::replaceExternalLinks";
 517                 wfProfileIn( $fname );
 518                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 519                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 520                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 521                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 522                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 523                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 524                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 525                 wfProfileOut( $fname );
 526                 return $text;
 527         }
 528
 529         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 530         {
 531                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 532                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 533
 534                 # this is  the list of separators that should be ignored if they
 535                 # are the last character of an URL but that should be included
 536                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 537                 # in this case, the last comma should not become part of the URL,
 538                 # but in "www.foo.com/123,2342,32.htm" it should.
 539                 $sep = ",;\.:";
 540                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 541                 $images = "gif|png|jpg|jpeg";
 542
 543                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 544                 # they are interpreted as part of the string (used to tell PHP
 545                 # that the content of the string should be inserted there).
 546                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 547                   "((?i){$images})([^{$uc}]|$)/";
 548
 549                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 550                 $sk =& $this->mOptions->getSkin();
 551
 552                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 553                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 554                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 555                 }
 556                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 557                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 558                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 559                   "</a>\\5", $s );
 560                 $s = str_replace( $unique, $protocol, $s );
 561
 562                 $a = explode( "[{$protocol}:", " " . $s );
 563                 $s = array_shift( $a );
 564                 $s = substr( $s, 1 );
 565
 566                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 567                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 568
 569                 foreach ( $a as $line ) {
 570                         if ( preg_match( $e1, $line, $m ) ) {
 571                                 $link = "{$protocol}:{$m[1]}";
 572                                 $trail = $m[2];
 573                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 574                                 else { $text = wfEscapeHTML( $link ); }
 575                         } else if ( preg_match( $e2, $line, $m ) ) {
 576                                 $link = "{$protocol}:{$m[1]}";
 577                                 $text = $m[2];
 578                                 $trail = $m[3];
 579                         } else {
 580                                 $s .= "[{$protocol}:" . $line;
 581                                 continue;
 582                         }
 583                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 584                                 $paren = "";
 585                         } else {
 586                                 # Expand the URL for printable version
 587                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 588                         }
 589                         $la = $sk->getExternalLinkAttributes( $link, $text );
 590                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 591
 592                 }
 593                 return $s;
 594         }
 595
 596         /* private */ function handle3Quotes( &$state, $token )
 597         {
 598                 if ( $state["strong"] !== false ) {
 599                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 600                         {
 601                                 # ''' lala ''lala '''
 602                                 $s = "</em></strong><em>";
 603                         } else {
 604                                 $s = "</strong>";
 605                         }
 606                         $state["strong"] = FALSE;
 607                 } else {
 608                         $s = "<strong>";
 609                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 610                 }
 611                 return $s;
 612         }
 613
 614         /* private */ function handle2Quotes( &$state, $token )
 615         {
 616                 if ( $state["em"] !== false ) {
 617                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 618                         {
 619                                 # ''lala'''lala'' ....'''
 620                                 $s = "</strong></em><strong>";
 621                         } else {
 622                                 $s = "</em>";
 623                         }
 624                         $state["em"] = FALSE;
 625                 } else {
 626                         $s = "<em>";
 627                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 628
 629                 }
 630                 return $s;
 631         }
 632
 633         /* private */ function handle5Quotes( &$state, $token )
 634         {
 635                 $s = "";
 636                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 637                         if ( $state["em"] < $state["strong"] ) {
 638                                 $s .= "</strong></em>";
 639                         } else {
 640                                 $s .= "</em></strong>";
 641                         }
 642                         $state["strong"] = $state["em"] = FALSE;
 643                 } elseif ( $state["em"] !== false ) {
 644                         $s .= "</em><strong>";
 645                         $state["em"] = FALSE;
 646                         $state["strong"] = $token["pos"];
 647                 } elseif ( $state["strong"] !== false ) {
 648                         $s .= "</strong><em>";
 649                         $state["strong"] = FALSE;
 650                         $state["em"] = $token["pos"];
 651                 } else { # not $em and not $strong
 652                         $s .= "<strong><em>";
 653                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 654                 }
 655                 return $s;
 656         }
 657
 658         /* private */ function doTokenizedParser( $str )
 659         {
 660                 global $wgLang; # for language specific parser hook
 661
 662                 $tokenizer=Tokenizer::newFromString( $str );
 663                 $tokenStack = array();
 664
 665                 $s="";
 666                 $state["em"]      = FALSE;
 667                 $state["strong"]  = FALSE;
 668                 $tagIsOpen = FALSE;
 669                 $threeopen = false;
 670
 671                 # The tokenizer splits the text into tokens and returns them one by one.
 672                 # Every call to the tokenizer returns a new token.
 673                 while ( $token = $tokenizer->nextToken() )
 674                 {
 675                         switch ( $token["type"] )
 676                         {
 677                                 case "text":
 678                                         # simple text with no further markup
 679                                         $txt = $token["text"];
 680                                         break;
 681                                 case "blank":
 682                                         # Text that contains blanks that have to be converted to
 683                                         # non-breakable spaces for French.
 684                                         # U+202F NARROW NO-BREAK SPACE might be a better choice, but
 685                                         # browser support for Unicode spacing is poor.
 686                                         $txt = str_replace( " ", "&nbsp;", $token["text"] );
 687                                         break;
 688                                 case "[[[":
 689                                         # remember the tag opened with 3 [
 690                                         $threeopen = true;
 691                                 case "[[":
 692                                         # link opening tag.
 693                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 694                                         $tagIsOpen = TRUE;
 695                                         array_push( $tokenStack, $token );
 696                                         $txt="";
 697                                         break;
 698
 699                                 case "]]]":
 700                                 case "]]":
 701                                         # link close tag.
 702                                         # get text from stack, glue it together, and call the code to handle a
 703                                         # link
 704
 705                                         if ( count( $tokenStack ) == 0 )
 706                                         {
 707                                                 # stack empty. Found a ]] without an opening [[
 708                                                 $txt = "]]";
 709                                         } else {
 710                                                 $linkText = "";
 711                                                 $lastToken = array_pop( $tokenStack );
 712                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 713                                                 {
 714                                                         if( !empty( $lastToken["text"] ) ) {
 715                                                                 $linkText = $lastToken["text"] . $linkText;
 716                                                         }
 717                                                         $lastToken = array_pop( $tokenStack );
 718                                                 }
 719
 720                                                 $txt = $linkText ."]]";
 721
 722                                                 if( isset( $lastToken["text"] ) ) {
 723                                                         $prefix = $lastToken["text"];
 724                                                 } else {
 725                                                         $prefix = "";
 726                                                 }
 727                                                 $nextToken = $tokenizer->previewToken();
 728                                                 if ( $nextToken["type"] == "text" )
 729                                                 {
 730                                                         # Preview just looks at it. Now we have to fetch it.
 731                                                         $nextToken = $tokenizer->nextToken();
 732                                                         $txt .= $nextToken["text"];
 733                                                 }
 734                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 735
 736                                                 # did the tag start with 3 [ ?
 737                                                 if($threeopen) {
 738                                                         # show the first as text
 739                                                         $txt = "[".$txt;
 740                                                         $threeopen=false;
 741                                                 }
 742
 743                                         }
 744                                         $tagIsOpen = (count( $tokenStack ) != 0);
 745                                         break;
 746                                 case "----":
 747                                         $txt = "\n<hr />\n";
 748                                         break;
 749                                 case "'''":
 750                                         # This and the three next ones handle quotes
 751                                         $txt = $this->handle3Quotes( $state, $token );
 752                                         break;
 753                                 case "''":
 754                                         $txt = $this->handle2Quotes( $state, $token );
 755                                         break;
 756                                 case "'''''":
 757                                         $txt = $this->handle5Quotes( $state, $token );
 758                                         break;
 759                                 case "":
 760                                         # empty token
 761                                         $txt="";
 762                                         break;
 763                                 case "RFC ":
 764                                         if ( $tagIsOpen ) {
 765                                                 $txt = "RFC ";
 766                                         } else {
 767                                                 $txt = $this->doMagicRFC( $tokenizer );
 768                                         }
 769                                         break;
 770                                 case "ISBN ":
 771                                         if ( $tagIsOpen ) {
 772                                                 $txt = "ISBN ";
 773                                         } else {
 774                                                 $txt = $this->doMagicISBN( $tokenizer );
 775                                         }
 776                                         break;
 777                                 default:
 778                                         # Call language specific Hook.
 779                                         $txt = $wgLang->processToken( $token, $tokenStack );
 780                                         if ( NULL == $txt ) {
 781                                                 # An unkown token. Highlight.
 782                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 783                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 784                                         }
 785                                         break;
 786                         }
 787                         # If we're parsing the interior of a link, don't append the interior to $s,
 788                         # but push it to the stack so it can be processed when a ]] token is found.
 789                         if ( $tagIsOpen  && $txt != "" ) {
 790                                 $token["type"] = "text";
 791                                 $token["text"] = $txt;
 792                                 array_push( $tokenStack, $token );
 793                         } else {
 794                                 $s .= $txt;
 795                         }
 796                 } #end while
 797                 if ( count( $tokenStack ) != 0 )
 798                 {
 799                         # still objects on stack. opened [[ tag without closing ]] tag.
 800                         $txt = "";
 801                         while ( $lastToken = array_pop( $tokenStack ) )
 802                         {
 803                                 if ( $lastToken["type"] == "text" )
 804                                 {
 805                                         $txt = $lastToken["text"] . $txt;
 806                                 } else {
 807                                         $txt = $lastToken["type"] . $txt;
 808                                 }
 809                         }
 810                         $s .= $txt;
 811                 }
 812                 return $s;
 813         }
 814
 815         /* private */ function handleInternalLink( $line, $prefix )
 816         {
 817                 global $wgLang, $wgLinkCache;
 818                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 819                 static $fname = "Parser::handleInternalLink" ;
 820                 wfProfileIn( $fname );
 821
 822                 wfProfileIn( "$fname-setup" );
 823                 static $tc = FALSE;
 824                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 825                 $sk =& $this->mOptions->getSkin();
 826
 827                 # Match a link having the form [[namespace:link|alternate]]trail
 828                 static $e1 = FALSE;
 829                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 830                 # Match the end of a line for a word that's not followed by whitespace,
 831                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 832                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 833                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 834                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 835
 836
 837                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 838                 static $image = FALSE;
 839                 static $special = FALSE;
 840                 static $media = FALSE;
 841                 static $category = FALSE;
 842                 if ( !$image ) { $image = Namespace::getImage(); }
 843                 if ( !$special ) { $special = Namespace::getSpecial(); }
 844                 if ( !$media ) { $media = Namespace::getMedia(); }
 845                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 846
 847                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 848
 849                 wfProfileOut( "$fname-setup" );
 850                 $s = "";
 851
 852                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 853                         $text = $m[2];
 854                         $trail = $m[3];
 855                 } else { # Invalid form; output directly
 856                         $s .= $prefix . "[[" . $line ;
 857                         return $s;
 858                 }
 859
 860                 /* Valid link forms:
 861                 Foobar -- normal
 862                 :Foobar -- override special treatment of prefix (images, language links)
 863                 /Foobar -- convert to CurrentPage/Foobar
 864                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 865                 */
 866                 $c = substr($m[1],0,1);
 867                 $noforce = ($c != ":");
 868                 if( $c == "/" ) { # subpage
 869                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 870                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 871                                 $noslash=$m[1];
 872                         } else {
 873                                 $noslash=substr($m[1],1);
 874                         }
 875                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 876                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 877                                 if( "" == $text ) {
 878                                         $text= $m[1];
 879                                 } # this might be changed for ugliness reasons
 880                         } else {
 881                                 $link = $noslash; # no subpage allowed, use standard link
 882                         }
 883                 } elseif( $noforce ) { # no subpage
 884                         $link = $m[1];
 885                 } else {
 886                         $link = substr( $m[1], 1 );
 887                 }
 888                 if( "" == $text )
 889                         $text = $link;
 890
 891                 $nt = Title::newFromText( $link );
 892                 if( !$nt ) {
 893                         $s .= $prefix . "[[" . $line;
 894                         return $s;
 895                 }
 896                 $ns = $nt->getNamespace();
 897                 $iw = $nt->getInterWiki();
 898                 if( $noforce ) {
 899                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 900                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 901                                 return (trim($s) == '')? '': $s;
 902                         }
 903                         if( $ns == $image ) {
 904                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 905                                 $wgLinkCache->addImageLinkObj( $nt );
 906                                 return $s;
 907                         }
 908                         if ( $ns == $category ) {
 909                                 $t = $nt->getText() ;
 910                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 911                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 912                                 $this->mOutput->mCategoryLinks[] = $t ;
 913                                 $s .= $prefix . $trail ;
 914                                 return $s ;
 915                         }
 916                 }
 917                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 918                     ( strpos( $link, "#" ) == FALSE ) ) {
 919                         # Self-links are handled specially; generally de-link and change to bold.
 920                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 921                         return $s;
 922                 }
 923
 924                 if( $ns == $media ) {
 925                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 926                         $wgLinkCache->addImageLinkObj( $nt );
 927                         return $s;
 928                 } elseif( $ns == $special ) {
 929                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 930                         return $s;
 931                 }
 932                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 933
 934                 wfProfileOut( $fname );
 935                 return $s;
 936         }
 937
 938         # Some functions here used by doBlockLevels()
 939         #
 940         /* private */ function closeParagraph()
 941         {
 942                 $result = "";
 943                 if ( '' != $this->mLastSection ) {
 944                         $result = "</" . $this->mLastSection  . ">\n";
 945                 }
 946                 $this->mInPre = false;
 947                 $this->mLastSection = "";
 948                 return $result;
 949         }
 950         # getCommon() returns the length of the longest common substring
 951         # of both arguments, starting at the beginning of both.
 952         #
 953         /* private */ function getCommon( $st1, $st2 )
 954         {
 955                 $fl = strlen( $st1 );
 956                 $shorter = strlen( $st2 );
 957                 if ( $fl < $shorter ) { $shorter = $fl; }
 958
 959                 for ( $i = 0; $i < $shorter; ++$i ) {
 960                         if ( $st1{$i} != $st2{$i} ) { break; }
 961                 }
 962                 return $i;
 963         }
 964         # These next three functions open, continue, and close the list
 965         # element appropriate to the prefix character passed into them.
 966         #
 967         /* private */ function openList( $char )
 968     {
 969                 $result = $this->closeParagraph();
 970
 971                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 972                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 973                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 974                 else if ( ";" == $char ) {
 975                         $result .= "<dl><dt>";
 976                         $this->mDTopen = true;
 977                 }
 978                 else { $result = "<!-- ERR 1 -->"; }
 979
 980                 return $result;
 981         }
 982
 983         /* private */ function nextItem( $char )
 984         {
 985                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 986                 else if ( ":" == $char || ";" == $char ) {
 987                         $close = "</dd>";
 988                         if ( $this->mDTopen ) { $close = "</dt>"; }
 989                         if ( ";" == $char ) {
 990                                 $this->mDTopen = true;
 991                                 return $close . "<dt>";
 992                         } else {
 993                                 $this->mDTopen = false;
 994                                 return $close . "<dd>";
 995                         }
 996                 }
 997                 return "<!-- ERR 2 -->";
 998         }
 999
1000         /* private */function closeList( $char )
1001         {
1002                 if ( "*" == $char ) { $text = "</li></ul>"; }
1003                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1004                 else if ( ":" == $char ) {
1005                         if ( $this->mDTopen ) {
1006                                 $this->mDTopen = false;
1007                                 $text = "</dt></dl>";
1008                         } else {
1009                                 $text = "</dd></dl>";
1010                         }
1011                 }
1012                 else {  return "<!-- ERR 3 -->"; }
1013                 return $text."\n";
1014         }
1015
1016         /* private */ function doBlockLevels( $text, $linestart )
1017         {
1018                 $fname = "Parser::doBlockLevels";
1019                 wfProfileIn( $fname );
1020                 # Parsing through the text line by line.  The main thing
1021                 # happening here is handling of block-level elements p, pre,
1022                 # and making lists from lines starting with * # : etc.
1023                 #
1024                 $a = explode( "\n", $text );
1025
1026                 $lastPref = $text = $lastLine = '';
1027                 $this->mDTopen = $inBlockElem = false;
1028                 $npl = 0;
1029                 $pstack = false;
1030
1031                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1032                 foreach ( $a as $t ) {
1033                         $oLine = $t;
1034                         $opl = strlen( $lastPref );
1035                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1036                         $preOpenMatch = preg_match("/<pre/i", $t );
1037                         if (!$this->mInPre) {
1038                                 $this->mInPre = !empty($preOpenMatch);
1039                         }
1040                         if ( !$this->mInPre ) {
1041                                 $npl = strspn( $t, "*#:;" );
1042                                 $pref = substr( $t, 0, $npl );
1043                                 $pref2 = str_replace( ";", ":", $pref );
1044                                 $t = substr( $t, $npl );
1045                         } else {
1046                                 $npl = 0;
1047                                 $pref = $pref2 = '';
1048                         }
1049
1050                         // list generation
1051                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1052                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1053                                 if ( $pstack ) { $pstack = false; }
1054
1055                                 if ( ";" == substr( $pref, -1 ) ) {
1056                                         $cpos = strpos( $t, ":" );
1057                                         if ( false !== $cpos ) {
1058                                                 $term = substr( $t, 0, $cpos );
1059                                                 $text .= $term . $this->nextItem( ":" );
1060                                                 $t = substr( $t, $cpos + 1 );
1061                                         }
1062                                 }
1063                         } else if (0 != $npl || 0 != $opl) {
1064                                 $cpl = $this->getCommon( $pref, $lastPref );
1065                                 if ( $pstack ) { $pstack = false; }
1066
1067                                 while ( $cpl < $opl ) {
1068                                         $text .= $this->closeList( $lastPref{$opl-1} );
1069                                         --$opl;
1070                                 }
1071                                 if ( $npl <= $cpl && $cpl > 0 ) {
1072                                         $text .= $this->nextItem( $pref{$cpl-1} );
1073                                 }
1074                                 while ( $npl > $cpl ) {
1075                                         $char = substr( $pref, $cpl, 1 );
1076                                         $text .= $this->openList( $char );
1077
1078                                         if ( ";" == $char ) {
1079                                                 $cpos = strpos( $t, ":" );
1080                                                 if ( ! ( false === $cpos ) ) {
1081                                                         $term = substr( $t, 0, $cpos );
1082                                                         $text .= $term . $this->nextItem( ":" );
1083                                                         $t = substr( $t, $cpos + 1 );
1084                                                 }
1085                                         }
1086                                         ++$cpl;
1087                                 }
1088                                 $lastPref = $pref2;
1089                         }
1090                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1091                                 $uniq_prefix = UNIQ_PREFIX;
1092                                 // XXX: use a stack for nestable elements like span, table and div
1093                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1094                                 $closematch = preg_match(
1095                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1096                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1097                                 if ( $openmatch or $closematch ) {
1098                                         if ( $pstack ) { $pstack = false; }
1099                                         $text .= $this->closeParagraph();
1100                                         if($preOpenMatch and !$preCloseMatch) {
1101                                                 $this->mInPre = true;
1102                                         }
1103                                         if ( $closematch  ) {
1104                                                 $inBlockElem = false;
1105                                         } else {
1106                                                 $inBlockElem = true;
1107                                         }
1108                                 } else if ( !$inBlockElem ) {
1109                                         if ( " " == $t{0} ) {
1110                                                 // pre
1111                                                 if ($this->mLastSection != 'pre') {
1112                                                         $pstack = false;
1113                                                         $text .= $this->closeParagraph().'<pre>';
1114                                                         $this->mLastSection = 'pre';
1115                                                 }
1116                                         } else {
1117                                                 // paragraph
1118                                                 if ( '' == trim($t) ) {
1119                                                         if ( $pstack ) {
1120                                                                 $text .= $pstack.'<br/>';
1121                                                                 $pstack = false;
1122                                                                 $this->mLastSection = 'p';
1123                                                         } else {
1124                                                                 if ($this->mLastSection != 'p' ) {
1125                                                                         $text .= $this->closeParagraph();
1126                                                                         $this->mLastSection = '';
1127                                                                         $pstack = "<p>";
1128                                                                 } else {
1129                                                                         $pstack = '</p><p>';
1130                                                                 }
1131                                                         }
1132                                                 } else {
1133                                                         if ( $pstack ) {
1134                                                                 $text .= $pstack;
1135                                                                 $pstack = false;
1136                                                                 $this->mLastSection = 'p';
1137                                                         } else if ($this->mLastSection != 'p') {
1138                                                                 $text .= $this->closeParagraph().'<p>';
1139                                                                 $this->mLastSection = 'p';
1140                                                         }
1141                                                 }
1142                                         }
1143                                 }
1144                         }
1145                         if ($pstack === false) {
1146                                 $text .= $t."\n";
1147                         }
1148                 }
1149                 while ( $npl ) {
1150                         $text .= $this->closeList( $pref2{$npl-1} );
1151                         --$npl;
1152                 }
1153                 if ( "" != $this->mLastSection ) {
1154                         $text .= "</" . $this->mLastSection . ">";
1155                         $this->mLastSection = "";
1156                 }
1157
1158                 wfProfileOut( $fname );
1159                 return $text;
1160         }
1161
1162         function getVariableValue( $index ) {
1163                 global $wgLang, $wgSitename, $wgServer;
1164
1165                 switch ( $index ) {
1166                         case MAG_CURRENTMONTH:
1167                                 return date( "m" );
1168                         case MAG_CURRENTMONTHNAME:
1169                                 return $wgLang->getMonthName( date("n") );
1170                         case MAG_CURRENTMONTHNAMEGEN:
1171                                 return $wgLang->getMonthNameGen( date("n") );
1172                         case MAG_CURRENTDAY:
1173                                 return date("j");
1174                         case MAG_PAGENAME:
1175                                 return $this->mTitle->getText();
1176                         case MAG_NAMESPACE:
1177                                 return Namespace::getCanonicalName($this->mTitle->getNamespace());
1178                         case MAG_CURRENTDAYNAME:
1179                                 return $wgLang->getWeekdayName( date("w")+1 );
1180                         case MAG_CURRENTYEAR:
1181                                 return date( "Y" );
1182                         case MAG_CURRENTTIME:
1183                                 return $wgLang->time( wfTimestampNow(), false );
1184                         case MAG_NUMBEROFARTICLES:
1185                                 return wfNumberOfArticles();
1186                         case MAG_SITENAME:
1187                                 return $wgSitename;
1188                         case MAG_SERVER:
1189                                 return $wgServer;
1190                         default:
1191                                 return NULL;
1192                 }
1193         }
1194
1195         function initialiseVariables()
1196         {
1197                 global $wgVariableIDs;
1198                 $this->mVariables = array();
1199                 foreach ( $wgVariableIDs as $id ) {
1200                         $mw =& MagicWord::get( $id );
1201                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1202                 }
1203         }
1204
1205         /* private */ function replaceVariables( $text, $args = array() )
1206         {
1207                 global $wgLang, $wgScript, $wgArticlePath;
1208
1209                 $fname = "Parser::replaceVariables";
1210                 wfProfileIn( $fname );
1211
1212                 $bail = false;
1213                 if ( !$this->mVariables ) {
1214                         $this->initialiseVariables();
1215                 }
1216                 $titleChars = Title::legalChars();
1217                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1218
1219                 # This function is called recursively. To keep track of arguments we need a stack:
1220                 array_push( $this->mArgStack, $args );
1221
1222                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1223                 $GLOBALS['wgCurParser'] =& $this;
1224                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1225
1226                 array_pop( $this->mArgStack );
1227
1228                 return $text;
1229         }
1230
1231         function braceSubstitution( $matches )
1232         {
1233                 global $wgLinkCache, $wgLang;
1234                 $fname = "Parser::braceSubstitution";
1235                 $found = false;
1236                 $nowiki = false;
1237                 $title = NULL;
1238
1239                 # $newline is an optional newline character before the braces
1240                 # $part1 is the bit before the first |, and must contain only title characters
1241                 # $args is a list of arguments, starting from index 0, not including $part1
1242
1243                 $newline = $matches[1];
1244                 $part1 = $matches[2];
1245                 # If the third subpattern matched anything, it will start with |
1246                 if ( $matches[3] !== "" ) {
1247                         $args = explode( "|", substr( $matches[3], 1 ) );
1248                 } else {
1249                         $args = array();
1250                 }
1251                 $argc = count( $args );
1252
1253                 # SUBST
1254                 $mwSubst =& MagicWord::get( MAG_SUBST );
1255                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1256                         if ( $this->mOutputType != OT_WIKI ) {
1257                                 # Invalid SUBST not replaced at PST time
1258                                 # Return without further processing
1259                                 $text = $matches[0];
1260                                 $found = true;
1261                         }
1262                 } elseif ( $this->mOutputType == OT_WIKI ) {
1263                         # SUBST not found in PST pass, do nothing
1264                         $text = $matches[0];
1265                         $found = true;
1266                 }
1267
1268                 # MSG, MSGNW and INT
1269                 if ( !$found ) {
1270                         # Check for MSGNW:
1271                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1272                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1273                                 $nowiki = true;
1274                         } else {
1275                                 # Remove obsolete MSG:
1276                                 $mwMsg =& MagicWord::get( MAG_MSG );
1277                                 $mwMsg->matchStartAndRemove( $part1 );
1278                         }
1279
1280                         # Check if it is an internal message
1281                         $mwInt =& MagicWord::get( MAG_INT );
1282                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1283                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1284                                         $text = wfMsgReal( $part1, $args, true );
1285                                         $found = true;
1286                                 }
1287                         }
1288                 }
1289
1290                 # NS
1291                 if ( !$found ) {
1292                         # Check for NS: (namespace expansion)
1293                         $mwNs = MagicWord::get( MAG_NS );
1294                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1295                                 if ( intval( $part1 ) ) {
1296                                         $text = $wgLang->getNsText( intval( $part1 ) );
1297                                         $found = true;
1298                                 } else {
1299                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1300                                         if ( !is_null( $index ) ) {
1301                                                 $text = $wgLang->getNsText( $index );
1302                                                 $found = true;
1303                                         }
1304                                 }
1305                         }
1306                 }
1307
1308                 # LOCALURL and LOCALURLE
1309                 if ( !$found ) {
1310                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1311                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1312
1313                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1314                                 $func = 'getLocalURL';
1315                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1316                                 $func = 'escapeLocalURL';
1317                         } else {
1318                                 $func = '';
1319                         }
1320
1321                         if ( $func !== '' ) {
1322                                 $title = Title::newFromText( $part1 );
1323                                 if ( !is_null( $title ) ) {
1324                                         if ( $argc > 0 ) {
1325                                                 $text = $title->$func( $args[0] );
1326                                         } else {
1327                                                 $text = $title->$func();
1328                                         }
1329                                         $found = true;
1330                                 }
1331                         }
1332                 }
1333
1334                 # Internal variables
1335                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1336                         $text = $this->mVariables[$part1];
1337                         $found = true;
1338                         $this->mOutput->mContainsOldMagic = true;
1339                 }
1340
1341                 # Arguments input from the caller
1342                 $inputArgs = end( $this->mArgStack );
1343                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1344                         $text = $inputArgs[$part1];
1345                         $found = true;
1346                 }
1347
1348                 # Load from database
1349                 if ( !$found ) {
1350                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1351                         if ( !is_null( $title ) && !$title->isExternal() ) {
1352                                 # Check for excessive inclusion
1353                                 $dbk = $title->getPrefixedDBkey();
1354                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1355                                         $article = new Article( $title );
1356                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1357                                         if ( $articleContent !== false ) {
1358                                                 $found = true;
1359                                                 $text = $articleContent;
1360
1361                                         }
1362                                 }
1363
1364                                 # If the title is valid but undisplayable, make a link to it
1365                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1366                                         $text = "[[" . $title->getPrefixedText() . "]]";
1367                                         $found = true;
1368                                 }
1369                         }
1370                 }
1371
1372                 # Recursive parsing, escaping and link table handling
1373                 # Only for HTML output
1374                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1375                         $text = wfEscapeWikiText( $text );
1376                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1377                         # Clean up argument array
1378                         $assocArgs = array();
1379                         $index = 1;
1380                         foreach( $args as $arg ) {
1381                                 $eqpos = strpos( $arg, "=" );
1382                                 if ( $eqpos === false ) {
1383                                         $assocArgs[$index++] = $arg;
1384                                 } else {
1385                                         $name = trim( substr( $arg, 0, $eqpos ) );
1386                                         $value = trim( substr( $arg, $eqpos+1 ) );
1387                                         if ( $value === false ) {
1388                                                 $value = "";
1389                                         }
1390                                         if ( $name !== false ) {
1391                                                 $assocArgs[$name] = $value;
1392                                         }
1393                                 }
1394                         }
1395
1396                         # Do not enter included links in link table
1397                         if ( !is_null( $title ) ) {
1398                                 $wgLinkCache->suspend();
1399                         }
1400
1401                         # Run full parser on the included text
1402                         $text = $this->strip( $text, $this->mStripState );
1403                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1404
1405                         # Add the result to the strip state for re-inclusion after
1406                         # the rest of the processing
1407                         $text = $this->insertStripItem( $text, $this->mStripState );
1408
1409                         # Resume the link cache and register the inclusion as a link
1410                         if ( !is_null( $title ) ) {
1411                                 $wgLinkCache->resume();
1412                                 $wgLinkCache->addLinkObj( $title );
1413                         }
1414                 }
1415
1416                 if ( !$found ) {
1417                         return $matches[0];
1418                 } else {
1419                         return $newline . $text;
1420                 }
1421         }
1422
1423         # Returns true if the function is allowed to include this entity
1424         function incrementIncludeCount( $dbk )
1425         {
1426                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1427                         $this->mIncludeCount[$dbk] = 0;
1428                 }
1429                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1430                         return true;
1431                 } else {
1432                         return false;
1433                 }
1434         }
1435
1436
1437         # Cleans up HTML, removes dangerous tags and attributes
1438         /* private */ function removeHTMLtags( $text )
1439         {
1440                 $fname = "Parser::removeHTMLtags";
1441                 wfProfileIn( $fname );
1442                 $htmlpairs = array( # Tags that must be closed
1443                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1444                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1445                         "strike", "strong", "tt", "var", "div", "center",
1446                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1447                         "ruby", "rt" , "rb" , "rp", "p"
1448                 );
1449                 $htmlsingle = array(
1450                         "br", "hr", "li", "dt", "dd"
1451                 );
1452                 $htmlnest = array( # Tags that can be nested--??
1453                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1454                         "dl", "font", "big", "small", "sub", "sup"
1455                 );
1456                 $tabletags = array( # Can only appear inside table
1457                         "td", "th", "tr"
1458                 );
1459
1460                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1461                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1462
1463                 $htmlattrs = $this->getHTMLattrs () ;
1464
1465                 # Remove HTML comments
1466                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1467
1468                 $bits = explode( "<", $text );
1469                 $text = array_shift( $bits );
1470                 $tagstack = array(); $tablestack = array();
1471
1472                 foreach ( $bits as $x ) {
1473                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1474                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1475                           $x, $regs );
1476                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1477                         error_reporting( $prev );
1478
1479                         $badtag = 0 ;
1480                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1481                                 # Check our stack
1482                                 if ( $slash ) {
1483                                         # Closing a tag...
1484                                         if ( ! in_array( $t, $htmlsingle ) &&
1485                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1486                                                 array_push( $tagstack, $ot );
1487                                                 $badtag = 1;
1488                                         } else {
1489                                                 if ( $t == "table" ) {
1490                                                         $tagstack = array_pop( $tablestack );
1491                                                 }
1492                                                 $newparams = "";
1493                                         }
1494                                 } else {
1495                                         # Keep track for later
1496                                         if ( in_array( $t, $tabletags ) &&
1497                                           ! in_array( "table", $tagstack ) ) {
1498                                                 $badtag = 1;
1499                                         } else if ( in_array( $t, $tagstack ) &&
1500                                           ! in_array ( $t , $htmlnest ) ) {
1501                                                 $badtag = 1 ;
1502                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1503                                                 if ( $t == "table" ) {
1504                                                         array_push( $tablestack, $tagstack );
1505                                                         $tagstack = array();
1506                                                 }
1507                                                 array_push( $tagstack, $t );
1508                                         }
1509                                         # Strip non-approved attributes from the tag
1510                                         $newparams = $this->fixTagAttributes($params);
1511
1512                                 }
1513                                 if ( ! $badtag ) {
1514                                         $rest = str_replace( ">", "&gt;", $rest );
1515                                         $text .= "<$slash$t $newparams$brace$rest";
1516                                         continue;
1517                                 }
1518                         }
1519                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1520                 }
1521                 # Close off any remaining tags
1522                 while ( $t = array_pop( $tagstack ) ) {
1523                         $text .= "</$t>\n";
1524                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1525                 }
1526                 wfProfileOut( $fname );
1527                 return $text;
1528         }
1529
1530 /*
1531  *
1532  * This function accomplishes several tasks:
1533  * 1) Auto-number headings if that option is enabled
1534  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1535  * 3) Add a Table of contents on the top for users who have enabled the option
1536  * 4) Auto-anchor headings
1537  *
1538  * It loops through all headlines, collects the necessary data, then splits up the
1539  * string and re-inserts the newly formatted headlines.
1540  *
1541  */
1542
1543         /* private */ function formatHeadings( $text )
1544         {
1545                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1546                 $doShowToc = $this->mOptions->getShowToc();
1547                 if( !$this->mTitle->userCanEdit() ) {
1548                         $showEditLink = 0;
1549                         $rightClickHack = 0;
1550                 } else {
1551                         $showEditLink = $this->mOptions->getEditSection();
1552                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1553                 }
1554
1555                 # Inhibit editsection links if requested in the page
1556                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1557                 if( $esw->matchAndRemove( $text ) ) {
1558                         $showEditLink = 0;
1559                 }
1560                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1561                 # do not add TOC
1562                 $mw =& MagicWord::get( MAG_NOTOC );
1563                 if( $mw->matchAndRemove( $text ) ) {
1564                         $doShowToc = 0;
1565                 }
1566
1567                 # never add the TOC to the Main Page. This is an entry page that should not
1568                 # be more than 1-2 screens large anyway
1569                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1570                         $doShowToc = 0;
1571                 }
1572
1573                 # Get all headlines for numbering them and adding funky stuff like [edit]
1574                 # links - this is for later, but we need the number of headlines right now
1575                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1576
1577                 # if there are fewer than 4 headlines in the article, do not show TOC
1578                 if( $numMatches < 4 ) {
1579                         $doShowToc = 0;
1580                 }
1581
1582                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1583                 # override above conditions and always show TOC
1584                 $mw =& MagicWord::get( MAG_FORCETOC );
1585                 if ($mw->matchAndRemove( $text ) ) {
1586                         $doShowToc = 1;
1587                 }
1588
1589
1590                 # We need this to perform operations on the HTML
1591                 $sk =& $this->mOptions->getSkin();
1592
1593                 # headline counter
1594                 $headlineCount = 0;
1595
1596                 # Ugh .. the TOC should have neat indentation levels which can be
1597                 # passed to the skin functions. These are determined here
1598                 $toclevel = 0;
1599                 $toc = "";
1600                 $full = "";
1601                 $head = array();
1602                 $sublevelCount = array();
1603                 $level = 0;
1604                 $prevlevel = 0;
1605                 foreach( $matches[3] as $headline ) {
1606                         $numbering = "";
1607                         if( $level ) {
1608                                 $prevlevel = $level;
1609                         }
1610                         $level = $matches[1][$headlineCount];
1611                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1612                                 # reset when we enter a new level
1613                                 $sublevelCount[$level] = 0;
1614                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1615                                 $toclevel += $level - $prevlevel;
1616                         }
1617                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1618                                 # reset when we step back a level
1619                                 $sublevelCount[$level+1]=0;
1620                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1621                                 $toclevel -= $prevlevel - $level;
1622                         }
1623                         # count number of headlines for each level
1624                         @$sublevelCount[$level]++;
1625                         if( $doNumberHeadings || $doShowToc ) {
1626                                 $dot = 0;
1627                                 for( $i = 1; $i <= $level; $i++ ) {
1628                                         if( !empty( $sublevelCount[$i] ) ) {
1629                                                 if( $dot ) {
1630                                                         $numbering .= ".";
1631                                                 }
1632                                                 $numbering .= $sublevelCount[$i];
1633                                                 $dot = 1;
1634                                         }
1635                                 }
1636                         }
1637
1638                         # The canonized header is a version of the header text safe to use for links
1639                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1640                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1641
1642                         # strip out HTML
1643                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1644                         $tocline = trim( $canonized_headline );
1645                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1646                         $refer[$headlineCount] = $canonized_headline;
1647
1648                         # count how many in assoc. array so we can track dupes in anchors
1649                         @$refers[$canonized_headline]++;
1650                         $refcount[$headlineCount]=$refers[$canonized_headline];
1651
1652                         # Prepend the number to the heading text
1653
1654                         if( $doNumberHeadings || $doShowToc ) {
1655                                 $tocline = $numbering . " " . $tocline;
1656
1657                                 # Don't number the heading if it is the only one (looks silly)
1658                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1659                                         # the two are different if the line contains a link
1660                                         $headline=$numbering . " " . $headline;
1661                                 }
1662                         }
1663
1664                         # Create the anchor for linking from the TOC to the section
1665                         $anchor = $canonized_headline;
1666                         if($refcount[$headlineCount] > 1 ) {
1667                                 $anchor .= "_" . $refcount[$headlineCount];
1668                         }
1669                         if( $doShowToc ) {
1670                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1671                         }
1672                         if( $showEditLink ) {
1673                                 if ( empty( $head[$headlineCount] ) ) {
1674                                         $head[$headlineCount] = "";
1675                                 }
1676                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1677                         }
1678
1679                         # Add the edit section span
1680                         if( $rightClickHack ) {
1681                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1682                         }
1683
1684                         # give headline the correct <h#> tag
1685                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1686
1687                         $headlineCount++;
1688                 }
1689
1690                 if( $doShowToc ) {
1691                         $toclines = $headlineCount;
1692                         $toc .= $sk->tocUnindent( $toclevel );
1693                         $toc = $sk->tocTable( $toc );
1694                 }
1695
1696                 # split up and insert constructed headlines
1697
1698                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1699                 $i = 0;
1700
1701                 foreach( $blocks as $block ) {
1702                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1703                             # This is the [edit] link that appears for the top block of text when
1704                                 # section editing is enabled
1705
1706                                 # Disabled because it broke block formatting
1707                                 # For example, a bullet point in the top line
1708                                 # $full .= $sk->editSectionLink(0);
1709                         }
1710                         $full .= $block;
1711                         if( $doShowToc && !$i) {
1712                         # Top anchor now in skin
1713                                 $full = $full.$toc;
1714                         }
1715
1716                         if( !empty( $head[$i] ) ) {
1717                                 $full .= $head[$i];
1718                         }
1719                         $i++;
1720                 }
1721
1722                 return $full;
1723         }
1724
1725         /* private */ function doMagicISBN( &$tokenizer )
1726         {
1727                 global $wgLang;
1728
1729                 # Check whether next token is a text token
1730                 # If yes, fetch it and convert the text into a
1731                 # Special::BookSources link
1732                 $token = $tokenizer->previewToken();
1733                 while ( $token["type"] == "" )
1734                 {
1735                         $tokenizer->nextToken();
1736                         $token = $tokenizer->previewToken();
1737                 }
1738                 if ( $token["type"] == "text" )
1739                 {
1740                         $token = $tokenizer->nextToken();
1741                         $x = $token["text"];
1742                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1743
1744                         $isbn = $blank = "" ;
1745                         while ( " " == $x{0} ) {
1746                                 $blank .= " ";
1747                                 $x = substr( $x, 1 );
1748                         }
1749                         while ( strstr( $valid, $x{0} ) != false ) {
1750                                 $isbn .= $x{0};
1751                                 $x = substr( $x, 1 );
1752                         }
1753                         $num = str_replace( "-", "", $isbn );
1754                         $num = str_replace( " ", "", $num );
1755
1756                         if ( "" == $num ) {
1757                                 $text = "ISBN $blank$x";
1758                         } else {
1759                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1760                                 $text = "<a href=\"" .
1761                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1762                                         "\" class=\"internal\">ISBN $isbn</a>";
1763                                 $text .= $x;
1764                         }
1765                 } else {
1766                         $text = "ISBN ";
1767                 }
1768                 return $text;
1769         }
1770         /* private */ function doMagicRFC( &$tokenizer )
1771         {
1772                 global $wgLang;
1773
1774                 # Check whether next token is a text token
1775                 # If yes, fetch it and convert the text into a
1776                 # link to an RFC source
1777                 $token = $tokenizer->previewToken();
1778                 while ( $token["type"] == "" )
1779                 {
1780                         $tokenizer->nextToken();
1781                         $token = $tokenizer->previewToken();
1782                 }
1783                 if ( $token["type"] == "text" )
1784                 {
1785                         $token = $tokenizer->nextToken();
1786                         $x = $token["text"];
1787                         $valid = "0123456789";
1788
1789                         $rfc = $blank = "" ;
1790                         while ( " " == $x{0} ) {
1791                                 $blank .= " ";
1792                                 $x = substr( $x, 1 );
1793                         }
1794                         while ( strstr( $valid, $x{0} ) != false ) {
1795                                 $rfc .= $x{0};
1796                                 $x = substr( $x, 1 );
1797                         }
1798
1799                         if ( "" == $rfc ) {
1800                                 $text .= "RFC $blank$x";
1801                         } else {
1802                                 $url = wfmsg( "rfcurl" );
1803                                 $url = str_replace( "$1", $rfc, $url);
1804                                 $sk =& $this->mOptions->getSkin();
1805                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1806                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1807                         }
1808                 } else {
1809                         $text = "RFC ";
1810                 }
1811                 return $text;
1812         }
1813
1814         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1815         {
1816                 $this->mOptions = $options;
1817                 $this->mTitle =& $title;
1818                 $this->mOutputType = OT_WIKI;
1819
1820                 if ( $clearState ) {
1821                         $this->clearState();
1822                 }
1823
1824                 $stripState = false;
1825                 $pairs = array(
1826                         "\r\n" => "\n",
1827                         );
1828                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1829                 // now with regexes
1830                 $pairs = array(
1831                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1832                         "/<br *?>/i" => "<br/>",
1833                 );
1834                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1835                 $text = $this->strip( $text, $stripState, false );
1836                 $text = $this->pstPass2( $text, $user );
1837                 $text = $this->unstrip( $text, $stripState );
1838                 return $text;
1839         }
1840
1841         /* private */ function pstPass2( $text, &$user )
1842         {
1843                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1844
1845                 # Variable replacement
1846                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1847                 $text = $this->replaceVariables( $text );
1848
1849                 # Signatures
1850                 #
1851                 $n = $user->getName();
1852                 $k = $user->getOption( "nickname" );
1853                 if ( "" == $k ) { $k = $n; }
1854                 if(isset($wgLocaltimezone)) {
1855                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1856                 }
1857                 /* Note: this is an ugly timezone hack for the European wikis */
1858                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1859                   " (" . date( "T" ) . ")";
1860                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1861
1862                 $text = preg_replace( "/~~~~~/", $d, $text );
1863                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1864                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1865                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1866                   Namespace::getUser() ) . ":$n|$k]]", $text );
1867
1868                 # Context links: [[|name]] and [[name (context)|]]
1869                 #
1870                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1871                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1872                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1873                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1874
1875                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1876                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1877                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1878                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1879                                                                                                                 # [[ns:page (cont)|]]
1880                 $context = "";
1881                 $t = $this->mTitle->getText();
1882                 if ( preg_match( $conpat, $t, $m ) ) {
1883                         $context = $m[2];
1884                 }
1885                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1886                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1887                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1888
1889                 if ( "" == $context ) {
1890                         $text = preg_replace( $p2, "[[\\1]]", $text );
1891                 } else {
1892                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1893                 }
1894
1895                 /*
1896                 $mw =& MagicWord::get( MAG_SUBST );
1897                 $wgCurParser = $this->fork();
1898                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1899                 $this->merge( $wgCurParser );
1900                 */
1901
1902                 # Trim trailing whitespace
1903                 # MAG_END (__END__) tag allows for trailing
1904                 # whitespace to be deliberately included
1905                 $text = rtrim( $text );
1906                 $mw =& MagicWord::get( MAG_END );
1907                 $mw->matchAndRemove( $text );
1908
1909                 return $text;
1910         }
1911
1912         # Set up some variables which are usually set up in parse()
1913         # so that an external function can call some class members with confidence
1914         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1915         {
1916                 $this->mTitle =& $title;
1917                 $this->mOptions = $options;
1918                 $this->mOutputType = $outputType;
1919                 if ( $clearState ) {
1920                         $this->clearState();
1921                 }
1922         }
1923
1924         function transformMsg( $text, $options ) {
1925                 global $wgTitle;
1926                 static $executing = false;
1927
1928                 # Guard against infinite recursion
1929                 if ( $executing ) {
1930                         return $text;
1931                 }
1932                 $executing = true;
1933
1934                 $this->mTitle = $wgTitle;
1935                 $this->mOptions = $options;
1936                 $this->mOutputType = OT_MSG;
1937                 $this->clearState();
1938                 $text = $this->replaceVariables( $text );
1939
1940                 $executing = false;
1941                 return $text;
1942         }
1943 }
1944
1945 class ParserOutput
1946 {
1947         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1948
1949         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1950                 $containsOldMagic = false )
1951         {
1952                 $this->mText = $text;
1953                 $this->mLanguageLinks = $languageLinks;
1954                 $this->mCategoryLinks = $categoryLinks;
1955                 $this->mContainsOldMagic = $containsOldMagic;
1956         }
1957
1958         function getText() { return $this->mText; }
1959         function getLanguageLinks() { return $this->mLanguageLinks; }
1960         function getCategoryLinks() { return $this->mCategoryLinks; }
1961         function containsOldMagic() { return $this->mContainsOldMagic; }
1962         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1963         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1964         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1965         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1966
1967         function merge( $other ) {
1968                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1969                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1970                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1971         }
1972
1973 }
1974
1975 class ParserOptions
1976 {
1977         # All variables are private
1978         var $mUseTeX;                    # Use texvc to expand <math> tags
1979         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1980         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1981         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1982         var $mAllowExternalImages;       # Allow external images inline
1983         var $mSkin;                      # Reference to the preferred skin
1984         var $mDateFormat;                # Date format index
1985         var $mEditSection;               # Create "edit section" links
1986         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1987         var $mNumberHeadings;            # Automatically number headings
1988         var $mShowToc;                   # Show table of contents
1989
1990         function getUseTeX() { return $this->mUseTeX; }
1991         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1992         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1993         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1994         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1995         function getSkin() { return $this->mSkin; }
1996         function getDateFormat() { return $this->mDateFormat; }
1997         function getEditSection() { return $this->mEditSection; }
1998         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1999         function getNumberHeadings() { return $this->mNumberHeadings; }
2000         function getShowToc() { return $this->mShowToc; }
2001
2002         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2003         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2004         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2005         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2006         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2007         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2008         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2009         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2010         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2011         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2012         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2013
2014         /* static */ function newFromUser( &$user )
2015         {
2016                 $popts = new ParserOptions;
2017                 $popts->initialiseFromUser( $user );
2018                 return $popts;
2019         }
2020
2021         function initialiseFromUser( &$userInput )
2022         {
2023                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2024
2025                 if ( !$userInput ) {
2026                         $user = new User;
2027                         $user->setLoaded( true );
2028                 } else {
2029                         $user =& $userInput;
2030                 }
2031
2032                 $this->mUseTeX = $wgUseTeX;
2033                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2034                 $this->mUseDynamicDates = $wgUseDynamicDates;
2035                 $this->mInterwikiMagic = $wgInterwikiMagic;
2036                 $this->mAllowExternalImages = $wgAllowExternalImages;
2037                 $this->mSkin =& $user->getSkin();
2038                 $this->mDateFormat = $user->getOption( "date" );
2039                 $this->mEditSection = $user->getOption( "editsection" );
2040                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2041                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2042                 $this->mShowToc = $user->getOption( "showtoc" );
2043         }
2044
2045
2046 }
2047
2048 # Regex callbacks, used in Parser::replaceVariables
2049 function wfBraceSubstitution( $matches )
2050 {
2051         global $wgCurParser;
2052         return $wgCurParser->braceSubstitution( $matches );
2053 }
2054
2055 ?>