includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         include_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # prefix for escaping, used in two functions at least
  48 define( "UNIQ_PREFIX", "NaodW29");
  49
  50 class Parser
  51 {
  52         # Cleared with clearState():
  53         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  54         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  55
  56         # Temporary:
  57         var $mOptions, $mTitle, $mOutputType;
  58
  59         function Parser()
  60         {
  61                 $this->clearState();
  62         }
  63
  64         function clearState()
  65         {
  66                 $this->mOutput = new ParserOutput;
  67                 $this->mAutonumber = 0;
  68                 $this->mLastSection = "";
  69                 $this->mDTopen = false;
  70                 $this->mVariables = false;
  71                 $this->mIncludeCount = array();
  72                 $this->mStripState = array();
  73                 $this->mArgStack = array();
  74         }
  75
  76         # First pass--just handle <nowiki> sections, pass the rest off
  77         # to internalParse() which does all the real work.
  78         #
  79         # Returns a ParserOutput
  80         #
  81         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  82         {
  83                 $fname = "Parser::parse";
  84                 wfProfileIn( $fname );
  85
  86                 if ( $clearState ) {
  87                         $this->clearState();
  88                 }
  89
  90                 $this->mOptions = $options;
  91                 $this->mTitle =& $title;
  92                 $this->mOutputType = OT_HTML;
  93
  94                 $stripState = NULL;
  95                 $text = $this->strip( $text, $this->mStripState );
  96                 $text = $this->internalParse( $text, $linestart );
  97                 $text = $this->unstrip( $text, $this->mStripState );
  98                 # Clean up special characters, only run once, next-to-last before doBlockLevels
  99                 $fixtags = array(
 100                         "/<hr *>/i" => '<hr/>',
 101                         "/<br *>/i" => '<br/>',
 102                         "/<center *>/i"=>'<div class="center">',
 103                         "/<\\/center *>/i" => '</div>',
 104                         # Clean up spare ampersands; note that we probably ought to be
 105                         # more careful about named entities.
 106                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 107                 );
 108                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 109
 110                 # only once and last
 111                 $text = $this->doBlockLevels( $text, $linestart );
 112
 113                 $this->mOutput->setText( $text );
 114                 wfProfileOut( $fname );
 115                 return $this->mOutput;
 116         }
 117
 118         /* static */ function getRandomString()
 119         {
 120                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 121         }
 122
 123         # Replaces all occurrences of <$tag>content</$tag> in the text
 124         # with a random marker and returns the new text. the output parameter
 125         # $content will be an associative array filled with data on the form
 126         # $unique_marker => content.
 127
 128         # If $content is already set, the additional entries will be appended
 129
 130         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 131                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 132                 if ( !$content ) {
 133                         $content = array( );
 134                 }
 135                 $n = 1;
 136                 $stripped = "";
 137
 138                 while ( "" != $text ) {
 139                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 140                         $stripped .= $p[0];
 141                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 142                                 $text = "";
 143                         } else {
 144                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 145                                 $marker = $rnd . sprintf("%08X", $n++);
 146                                 $content[$marker] = $q[0];
 147                                 $stripped .= $marker;
 148                                 $text = $q[1];
 149                         }
 150                 }
 151                 return $stripped;
 152         }
 153
 154         # Strips <nowiki>, <pre> and <math>
 155         # Returns the text, and fills an array with data needed in unstrip()
 156         # If the $state is already a valid strip state, it adds to the state
 157         #
 158         function strip( $text, &$state )
 159         {
 160                 $render = ($this->mOutputType == OT_HTML);
 161                 $nowiki_content = array();
 162                 $hiero_content = array();
 163                 $math_content = array();
 164                 $pre_content = array();
 165                 $item_content = array();
 166
 167                 # Replace any instances of the placeholders
 168                 $uniq_prefix = UNIQ_PREFIX;
 169                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 170
 171                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 172                 foreach( $nowiki_content as $marker => $content ){
 173                         if( $render ){
 174                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 175                         } else {
 176                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 177                         }
 178                 }
 179
 180                 if( $GLOBALS['wgUseWikiHiero'] ){
 181                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 182                         foreach( $hiero_content as $marker => $content ){
 183                                 if( $render ){
 184                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 185                                 } else {
 186                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 187                                 }
 188                         }
 189                 }
 190
 191                 if( $this->mOptions->getUseTeX() ){
 192                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 193                         foreach( $math_content as $marker => $content ){
 194                                 if( $render ){
 195                                         $math_content[$marker] = renderMath( $content );
 196                                 } else {
 197                                         $math_content[$marker] = "<math>$content</math>";
 198                                 }
 199                         }
 200                 }
 201
 202                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 203                 foreach( $pre_content as $marker => $content ){
 204                         if( $render ){
 205                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 206                         } else {
 207                                 $pre_content[$marker] = "<pre>$content</pre>";
 208                         }
 209                 }
 210
 211                 # Merge state with the pre-existing state, if there is one
 212                 if ( $state ) {
 213                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 214                         $state['hiero'] = $state['hiero'] + $hiero_content;
 215                         $state['math'] = $state['math'] + $math_content;
 216                         $state['pre'] = $state['pre'] + $pre_content;
 217                 } else {
 218                         $state = array(
 219                           'nowiki' => $nowiki_content,
 220                           'hiero' => $hiero_content,
 221                           'math' => $math_content,
 222                           'pre' => $pre_content,
 223                           'item' => $item_content
 224                         );
 225                 }
 226                 return $text;
 227         }
 228
 229         function unstrip( $text, &$state )
 230         {
 231                 # Must expand in reverse order, otherwise nested tags will be corrupted
 232                 $contentDict = end( $state );
 233                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 234                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 235                                 $text = str_replace( key( $contentDict ), $content, $text );
 236                         }
 237                 }
 238
 239                 return $text;
 240         }
 241
 242         # Add an item to the strip state
 243         # Returns the unique tag which must be inserted into the stripped text
 244         # The tag will be replaced with the original text in unstrip()
 245
 246         function insertStripItem( $text, &$state )
 247         {
 248                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 249                 if ( !$state ) {
 250                         $state = array(
 251                           'nowiki' => array(),
 252                           'hiero' => array(),
 253                           'math' => array(),
 254                           'pre' => array(),
 255                           'item' => array()
 256                         );
 257                 }
 258                 $state['item'][$rnd] = $text;
 259                 return $rnd;
 260         }
 261
 262         # This method generates the list of subcategories and pages for a category
 263         function categoryMagic ()
 264         {
 265                 global $wgLang , $wgUser ;
 266                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 267
 268                 $cns = Namespace::getCategory() ;
 269                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 270
 271                 $r = "<br style=\"clear:both;\"/>\n";
 272
 273
 274                 $sk =& $wgUser->getSkin() ;
 275
 276                 $articles = array() ;
 277                 $children = array() ;
 278                 $data = array () ;
 279                 $id = $this->mTitle->getArticleID() ;
 280
 281                 # For existing categories
 282                 if( $id ) {
 283                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 284                         $res = wfQuery ( $sql, DB_READ ) ;
 285                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 286                 } else {
 287                         # For non-existing categories
 288                         $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
 289                         $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
 290                         $res = wfQuery ( $sql, DB_READ ) ;
 291                         while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 292                 }
 293
 294                 # For all pages that link to this category
 295                 foreach ( $data AS $x )
 296                 {
 297                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 298                         if ( $t != "" ) $t .= ":" ;
 299                         $t .= $x->cur_title ;
 300
 301                         if ( $x->cur_namespace == $cns ) {
 302                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 303                         } else {
 304                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 305                         }
 306                 }
 307                 wfFreeResult ( $res ) ;
 308
 309                 # Showing subcategories
 310                 if ( count ( $children ) > 0 )
 311                 {
 312                         asort ( $children ) ;
 313                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 314                         $r .= implode ( ", " , $children ) ;
 315                 }
 316
 317                 # Showing pages in this category
 318                 if ( count ( $articles ) > 0 )
 319                 {
 320                         $ti = $this->mTitle->getText() ;
 321                         asort ( $articles ) ;
 322                         $h =  wfMsg( "category_header", $ti );
 323                         $r .= "<h2>{$h}</h2>\n" ;
 324                         $r .= implode ( ", " , $articles ) ;
 325                 }
 326
 327
 328                 return $r ;
 329         }
 330
 331         function getHTMLattrs ()
 332         {
 333                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 334                                 "title", "align", "lang", "dir", "width", "height",
 335                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 336                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 337                                 /* FONT */ "type", "start", "value", "compact",
 338                                 /* For various lists, mostly deprecated but safe */
 339                                 "summary", "width", "border", "frame", "rules",
 340                                 "cellspacing", "cellpadding", "valign", "char",
 341                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 342                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 343                                 "id", "class", "name", "style" /* For CSS */
 344                                 );
 345                 return $htmlattrs ;
 346         }
 347
 348         function fixTagAttributes ( $t )
 349         {
 350                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 351                 $htmlattrs = $this->getHTMLattrs() ;
 352
 353                 # Strip non-approved attributes from the tag
 354                 $t = preg_replace(
 355                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 356                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 357                         $t);
 358                 # Strip javascript "expression" from stylesheets. Brute force approach:
 359                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 360
 361                 if( preg_match(
 362                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 363                         wfMungeToUtf8( $t ) ) )
 364                 {
 365                         $t="";
 366                 }
 367
 368                 return trim ( $t ) ;
 369         }
 370
 371         function doTableStuff ( $t )
 372         {
 373                 $t = explode ( "\n" , $t ) ;
 374                 $td = array () ; # Is currently a td tag open?
 375                         $ltd = array () ; # Was it TD or TH?
 376                         $tr = array () ; # Is currently a tr tag open?
 377                         $ltr = array () ; # tr attributes
 378                         foreach ( $t AS $k => $x )
 379                         {
 380                                 $x = trim ( $x ) ;
 381                                 $fc = substr ( $x , 0 , 1 ) ;
 382                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 383                                 {
 384                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 385                                         array_push ( $td , false ) ;
 386                                         array_push ( $ltd , "" ) ;
 387                                         array_push ( $tr , false ) ;
 388                                         array_push ( $ltr , "" ) ;
 389                                 }
 390                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 391                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 392                                 {
 393                                         $z = "</table>\n" ;
 394                                         $l = array_pop ( $ltd ) ;
 395                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 396                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 397                                         array_pop ( $ltr ) ;
 398                                         $t[$k] = $z ;
 399                                 }
 400                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 401                                                 {
 402                                                 $z = trim ( substr ( $x , 2 ) ) ;
 403                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 404                                                 }*/
 405                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 406                                 {
 407                                         $x = substr ( $x , 1 ) ;
 408                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 409                                         $z = "" ;
 410                                         $l = array_pop ( $ltd ) ;
 411                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 412                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 413                                         array_pop ( $ltr ) ;
 414                                         $t[$k] = $z ;
 415                                         array_push ( $tr , false ) ;
 416                                         array_push ( $td , false ) ;
 417                                         array_push ( $ltd , "" ) ;
 418                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 419                                 }
 420                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 421                                 {
 422                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 423                                         {
 424                                                 $fc = "+" ;
 425                                                 $x = substr ( $x , 1 ) ;
 426                                         }
 427                                         $after = substr ( $x , 1 ) ;
 428                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 429                                         $after = explode ( "||" , $after ) ;
 430                                         $t[$k] = "" ;
 431                                         foreach ( $after AS $theline )
 432                                         {
 433                                                 $z = "" ;
 434                                                 if ( $fc != "+" )
 435                                                 {
 436                                                         $tra = array_pop ( $ltr ) ;
 437                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 438                                                         array_push ( $tr , true ) ;
 439                                                         array_push ( $ltr , "" ) ;
 440                                                 }
 441
 442                                                 $l = array_pop ( $ltd ) ;
 443                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 444                                                 if ( $fc == "|" ) $l = "td" ;
 445                                                 else if ( $fc == "!" ) $l = "th" ;
 446                                                 else if ( $fc == "+" ) $l = "caption" ;
 447                                                 else $l = "" ;
 448                                                 array_push ( $ltd , $l ) ;
 449                                                 $y = explode ( "|" , $theline , 2 ) ;
 450                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 451                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 452                                                 $t[$k] .= $y ;
 453                                                 array_push ( $td , true ) ;
 454                                         }
 455                                 }
 456                         }
 457
 458                 # Closing open td, tr && table
 459                 while ( count ( $td ) > 0 )
 460                 {
 461                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 462                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 463                         $t[] = "</table>" ;
 464                 }
 465
 466                 $t = implode ( "\n" , $t ) ;
 467                 #               $t = $this->removeHTMLtags( $t );
 468                 return $t ;
 469         }
 470
 471         function internalParse( $text, $linestart, $args = array() )
 472         {
 473                 $fname = "Parser::internalParse";
 474                 wfProfileIn( $fname );
 475
 476                 $text = $this->removeHTMLtags( $text );
 477                 $text = $this->replaceVariables( $text, $args );
 478
 479                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 480
 481                 $text = $this->doHeadings( $text );
 482                 if($this->mOptions->getUseDynamicDates()) {
 483                         global $wgDateFormatter;
 484                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 485                 }
 486                 $text = $this->replaceExternalLinks( $text );
 487                 $text = $this->doTokenizedParser ( $text );
 488                 $text = $this->doTableStuff ( $text ) ;
 489                 $text = $this->formatHeadings( $text );
 490                 $sk =& $this->mOptions->getSkin();
 491                 $text = $sk->transformContent( $text );
 492
 493                 if ( !isset ( $this->categoryMagicDone ) ) {
 494                    $text .= $this->categoryMagic () ;
 495                    $this->categoryMagicDone = true ;
 496                    }
 497
 498                 wfProfileOut( $fname );
 499                 return $text;
 500         }
 501
 502
 503         /* private */ function doHeadings( $text )
 504         {
 505                 for ( $i = 6; $i >= 1; --$i ) {
 506                         $h = substr( "======", 0, $i );
 507                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 508                           "<h{$i}>\\1</h{$i}>\\2", $text );
 509                 }
 510                 return $text;
 511         }
 512
 513         # Note: we have to do external links before the internal ones,
 514         # and otherwise take great care in the order of things here, so
 515         # that we don't end up interpreting some URLs twice.
 516
 517         /* private */ function replaceExternalLinks( $text )
 518         {
 519                 $fname = "Parser::replaceExternalLinks";
 520                 wfProfileIn( $fname );
 521                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 522                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 523                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 524                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 525                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 526                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 527                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 528                 wfProfileOut( $fname );
 529                 return $text;
 530         }
 531
 532         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 533         {
 534                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 535                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 536
 537                 # this is  the list of separators that should be ignored if they
 538                 # are the last character of an URL but that should be included
 539                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 540                 # in this case, the last comma should not become part of the URL,
 541                 # but in "www.foo.com/123,2342,32.htm" it should.
 542                 $sep = ",;\.:";
 543                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 544                 $images = "gif|png|jpg|jpeg";
 545
 546                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 547                 # they are interpreted as part of the string (used to tell PHP
 548                 # that the content of the string should be inserted there).
 549                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 550                   "((?i){$images})([^{$uc}]|$)/";
 551
 552                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 553                 $sk =& $this->mOptions->getSkin();
 554
 555                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 556                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 557                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 558                 }
 559                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 560                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 561                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 562                   "</a>\\5", $s );
 563                 $s = str_replace( $unique, $protocol, $s );
 564
 565                 $a = explode( "[{$protocol}:", " " . $s );
 566                 $s = array_shift( $a );
 567                 $s = substr( $s, 1 );
 568
 569                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 570                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 571
 572                 foreach ( $a as $line ) {
 573                         if ( preg_match( $e1, $line, $m ) ) {
 574                                 $link = "{$protocol}:{$m[1]}";
 575                                 $trail = $m[2];
 576                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 577                                 else { $text = wfEscapeHTML( $link ); }
 578                         } else if ( preg_match( $e2, $line, $m ) ) {
 579                                 $link = "{$protocol}:{$m[1]}";
 580                                 $text = $m[2];
 581                                 $trail = $m[3];
 582                         } else {
 583                                 $s .= "[{$protocol}:" . $line;
 584                                 continue;
 585                         }
 586                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 587                                 $paren = "";
 588                         } else {
 589                                 # Expand the URL for printable version
 590                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 591                         }
 592                         $la = $sk->getExternalLinkAttributes( $link, $text );
 593                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 594
 595                 }
 596                 return $s;
 597         }
 598
 599         /* private */ function handle3Quotes( &$state, $token )
 600         {
 601                 if ( $state["strong"] !== false ) {
 602                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 603                         {
 604                                 # ''' lala ''lala '''
 605                                 $s = "</em></strong><em>";
 606                         } else {
 607                                 $s = "</strong>";
 608                         }
 609                         $state["strong"] = FALSE;
 610                 } else {
 611                         $s = "<strong>";
 612                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 613                 }
 614                 return $s;
 615         }
 616
 617         /* private */ function handle2Quotes( &$state, $token )
 618         {
 619                 if ( $state["em"] !== false ) {
 620                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 621                         {
 622                                 # ''lala'''lala'' ....'''
 623                                 $s = "</strong></em><strong>";
 624                         } else {
 625                                 $s = "</em>";
 626                         }
 627                         $state["em"] = FALSE;
 628                 } else {
 629                         $s = "<em>";
 630                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 631
 632                 }
 633                 return $s;
 634         }
 635
 636         /* private */ function handle5Quotes( &$state, $token )
 637         {
 638                 $s = "";
 639                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 640                         if ( $state["em"] < $state["strong"] ) {
 641                                 $s .= "</strong></em>";
 642                         } else {
 643                                 $s .= "</em></strong>";
 644                         }
 645                         $state["strong"] = $state["em"] = FALSE;
 646                 } elseif ( $state["em"] !== false ) {
 647                         $s .= "</em><strong>";
 648                         $state["em"] = FALSE;
 649                         $state["strong"] = $token["pos"];
 650                 } elseif ( $state["strong"] !== false ) {
 651                         $s .= "</strong><em>";
 652                         $state["strong"] = FALSE;
 653                         $state["em"] = $token["pos"];
 654                 } else { # not $em and not $strong
 655                         $s .= "<strong><em>";
 656                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 657                 }
 658                 return $s;
 659         }
 660
 661         /* private */ function doTokenizedParser( $str )
 662         {
 663                 global $wgLang; # for language specific parser hook
 664                 global $wgUploadDirectory, $wgUseTimeline;
 665
 666                 $tokenizer=Tokenizer::newFromString( $str );
 667                 $tokenStack = array();
 668
 669                 $s="";
 670                 $state["em"]      = FALSE;
 671                 $state["strong"]  = FALSE;
 672                 $tagIsOpen = FALSE;
 673                 $threeopen = false;
 674
 675                 # The tokenizer splits the text into tokens and returns them one by one.
 676                 # Every call to the tokenizer returns a new token.
 677                 while ( $token = $tokenizer->nextToken() )
 678                 {
 679                         switch ( $token["type"] )
 680                         {
 681                                 case "text":
 682                                         # simple text with no further markup
 683                                         $txt = $token["text"];
 684                                         break;
 685                                 case "blank":
 686                                         # Text that contains blanks that have to be converted to
 687                                         # non-breakable spaces for French.
 688                                         # U+202F NARROW NO-BREAK SPACE might be a better choice, but
 689                                         # browser support for Unicode spacing is poor.
 690                                         $txt = str_replace( " ", "&nbsp;", $token["text"] );
 691                                         break;
 692                                 case "[[[":
 693                                         # remember the tag opened with 3 [
 694                                         $threeopen = true;
 695                                 case "[[":
 696                                         # link opening tag.
 697                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 698                                         $tagIsOpen = TRUE;
 699                                         array_push( $tokenStack, $token );
 700                                         $txt="";
 701                                         break;
 702
 703                                 case "]]]":
 704                                 case "]]":
 705                                         # link close tag.
 706                                         # get text from stack, glue it together, and call the code to handle a
 707                                         # link
 708
 709                                         if ( count( $tokenStack ) == 0 )
 710                                         {
 711                                                 # stack empty. Found a ]] without an opening [[
 712                                                 $txt = "]]";
 713                                         } else {
 714                                                 $linkText = "";
 715                                                 $lastToken = array_pop( $tokenStack );
 716                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 717                                                 {
 718                                                         if( !empty( $lastToken["text"] ) ) {
 719                                                                 $linkText = $lastToken["text"] . $linkText;
 720                                                         }
 721                                                         $lastToken = array_pop( $tokenStack );
 722                                                 }
 723
 724                                                 $txt = $linkText ."]]";
 725
 726                                                 if( isset( $lastToken["text"] ) ) {
 727                                                         $prefix = $lastToken["text"];
 728                                                 } else {
 729                                                         $prefix = "";
 730                                                 }
 731                                                 $nextToken = $tokenizer->previewToken();
 732                                                 if ( $nextToken["type"] == "text" )
 733                                                 {
 734                                                         # Preview just looks at it. Now we have to fetch it.
 735                                                         $nextToken = $tokenizer->nextToken();
 736                                                         $txt .= $nextToken["text"];
 737                                                 }
 738                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 739
 740                                                 # did the tag start with 3 [ ?
 741                                                 if($threeopen) {
 742                                                         # show the first as text
 743                                                         $txt = "[".$txt;
 744                                                         $threeopen=false;
 745                                                 }
 746
 747                                         }
 748                                         $tagIsOpen = (count( $tokenStack ) != 0);
 749                                         break;
 750                                 case "----":
 751                                         $txt = "\n<hr />\n";
 752                                         break;
 753                                 case "'''":
 754                                         # This and the three next ones handle quotes
 755                                         $txt = $this->handle3Quotes( $state, $token );
 756                                         break;
 757                                 case "''":
 758                                         $txt = $this->handle2Quotes( $state, $token );
 759                                         break;
 760                                 case "'''''":
 761                                         $txt = $this->handle5Quotes( $state, $token );
 762                                         break;
 763                                 case "":
 764                                         # empty token
 765                                         $txt="";
 766                                         break;
 767                                 case "RFC ":
 768                                         if ( $tagIsOpen ) {
 769                                                 $txt = "RFC ";
 770                                         } else {
 771                                                 $txt = $this->doMagicRFC( $tokenizer );
 772                                         }
 773                                         break;
 774                                 case "ISBN ":
 775                                         if ( $tagIsOpen ) {
 776                                                 $txt = "ISBN ";
 777                                         } else {
 778                                                 $txt = $this->doMagicISBN( $tokenizer );
 779                                         }
 780                                         break;
 781                                 case "<timeline>":
 782                                         if ( $wgUseTimeline &&
 783                                              "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
 784                                         {
 785                                                 $txt = renderTimeline( $timelinesrc );
 786                                         } else {
 787                                                 $txt=$token["text"];
 788                                         }
 789                                         break;
 790                                 default:
 791                                         # Call language specific Hook.
 792                                         $txt = $wgLang->processToken( $token, $tokenStack );
 793                                         if ( NULL == $txt ) {
 794                                                 # An unkown token. Highlight.
 795                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 796                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 797                                         }
 798                                         break;
 799                         }
 800                         # If we're parsing the interior of a link, don't append the interior to $s,
 801                         # but push it to the stack so it can be processed when a ]] token is found.
 802                         if ( $tagIsOpen  && $txt != "" ) {
 803                                 $token["type"] = "text";
 804                                 $token["text"] = $txt;
 805                                 array_push( $tokenStack, $token );
 806                         } else {
 807                                 $s .= $txt;
 808                         }
 809                 } #end while
 810                 if ( count( $tokenStack ) != 0 )
 811                 {
 812                         # still objects on stack. opened [[ tag without closing ]] tag.
 813                         $txt = "";
 814                         while ( $lastToken = array_pop( $tokenStack ) )
 815                         {
 816                                 if ( $lastToken["type"] == "text" )
 817                                 {
 818                                         $txt = $lastToken["text"] . $txt;
 819                                 } else {
 820                                         $txt = $lastToken["type"] . $txt;
 821                                 }
 822                         }
 823                         $s .= $txt;
 824                 }
 825                 return $s;
 826         }
 827
 828         /* private */ function handleInternalLink( $line, $prefix )
 829         {
 830                 global $wgLang, $wgLinkCache;
 831                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 832                 static $fname = "Parser::handleInternalLink" ;
 833                 wfProfileIn( $fname );
 834
 835                 wfProfileIn( "$fname-setup" );
 836                 static $tc = FALSE;
 837                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 838                 $sk =& $this->mOptions->getSkin();
 839
 840                 # Match a link having the form [[namespace:link|alternate]]trail
 841                 static $e1 = FALSE;
 842                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 843                 # Match the end of a line for a word that's not followed by whitespace,
 844                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 845                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 846                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 847                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 848
 849
 850                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 851                 static $image = FALSE;
 852                 static $special = FALSE;
 853                 static $media = FALSE;
 854                 static $category = FALSE;
 855                 if ( !$image ) { $image = Namespace::getImage(); }
 856                 if ( !$special ) { $special = Namespace::getSpecial(); }
 857                 if ( !$media ) { $media = Namespace::getMedia(); }
 858                 if ( !$category ) { $category = Namespace::getCategory(); ; }
 859
 860                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 861
 862                 wfProfileOut( "$fname-setup" );
 863                 $s = "";
 864
 865                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 866                         $text = $m[2];
 867                         $trail = $m[3];
 868                 } else { # Invalid form; output directly
 869                         $s .= $prefix . "[[" . $line ;
 870                         return $s;
 871                 }
 872
 873                 /* Valid link forms:
 874                 Foobar -- normal
 875                 :Foobar -- override special treatment of prefix (images, language links)
 876                 /Foobar -- convert to CurrentPage/Foobar
 877                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 878                 */
 879                 $c = substr($m[1],0,1);
 880                 $noforce = ($c != ":");
 881                 if( $c == "/" ) { # subpage
 882                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 883                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 884                                 $noslash=$m[1];
 885                         } else {
 886                                 $noslash=substr($m[1],1);
 887                         }
 888                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 889                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 890                                 if( "" == $text ) {
 891                                         $text= $m[1];
 892                                 } # this might be changed for ugliness reasons
 893                         } else {
 894                                 $link = $noslash; # no subpage allowed, use standard link
 895                         }
 896                 } elseif( $noforce ) { # no subpage
 897                         $link = $m[1];
 898                 } else {
 899                         $link = substr( $m[1], 1 );
 900                 }
 901                 if( "" == $text )
 902                         $text = $link;
 903
 904                 $nt = Title::newFromText( $link );
 905                 if( !$nt ) {
 906                         $s .= $prefix . "[[" . $line;
 907                         return $s;
 908                 }
 909                 $ns = $nt->getNamespace();
 910                 $iw = $nt->getInterWiki();
 911                 if( $noforce ) {
 912                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 913                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 914                                 return (trim($s) == '')? '': $s;
 915                         }
 916                         if( $ns == $image ) {
 917                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 918                                 $wgLinkCache->addImageLinkObj( $nt );
 919                                 return $s;
 920                         }
 921                         if ( $ns == $category ) {
 922                                 $t = $nt->getText() ;
 923                                 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 924                                 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 925                                 $this->mOutput->mCategoryLinks[] = $t ;
 926                                 $s .= $prefix . $trail ;
 927                                 return $s ;
 928                         }
 929                 }
 930                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 931                     ( strpos( $link, "#" ) == FALSE ) ) {
 932                         # Self-links are handled specially; generally de-link and change to bold.
 933                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 934                         return $s;
 935                 }
 936
 937                 if( $ns == $media ) {
 938                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 939                         $wgLinkCache->addImageLinkObj( $nt );
 940                         return $s;
 941                 } elseif( $ns == $special ) {
 942                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 943                         return $s;
 944                 }
 945                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 946
 947                 wfProfileOut( $fname );
 948                 return $s;
 949         }
 950
 951         # Some functions here used by doBlockLevels()
 952         #
 953         /* private */ function closeParagraph()
 954         {
 955                 $result = "";
 956                 if ( '' != $this->mLastSection ) {
 957                         $result = "</" . $this->mLastSection  . ">\n";
 958                 }
 959                 $this->mInPre = false;
 960                 $this->mLastSection = "";
 961                 return $result;
 962         }
 963         # getCommon() returns the length of the longest common substring
 964         # of both arguments, starting at the beginning of both.
 965         #
 966         /* private */ function getCommon( $st1, $st2 )
 967         {
 968                 $fl = strlen( $st1 );
 969                 $shorter = strlen( $st2 );
 970                 if ( $fl < $shorter ) { $shorter = $fl; }
 971
 972                 for ( $i = 0; $i < $shorter; ++$i ) {
 973                         if ( $st1{$i} != $st2{$i} ) { break; }
 974                 }
 975                 return $i;
 976         }
 977         # These next three functions open, continue, and close the list
 978         # element appropriate to the prefix character passed into them.
 979         #
 980         /* private */ function openList( $char )
 981     {
 982                 $result = $this->closeParagraph();
 983
 984                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 985                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 986                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 987                 else if ( ";" == $char ) {
 988                         $result .= "<dl><dt>";
 989                         $this->mDTopen = true;
 990                 }
 991                 else { $result = "<!-- ERR 1 -->"; }
 992
 993                 return $result;
 994         }
 995
 996         /* private */ function nextItem( $char )
 997         {
 998                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 999                 else if ( ":" == $char || ";" == $char ) {
1000                         $close = "</dd>";
1001                         if ( $this->mDTopen ) { $close = "</dt>"; }
1002                         if ( ";" == $char ) {
1003                                 $this->mDTopen = true;
1004                                 return $close . "<dt>";
1005                         } else {
1006                                 $this->mDTopen = false;
1007                                 return $close . "<dd>";
1008                         }
1009                 }
1010                 return "<!-- ERR 2 -->";
1011         }
1012
1013         /* private */function closeList( $char )
1014         {
1015                 if ( "*" == $char ) { $text = "</li></ul>"; }
1016                 else if ( "#" == $char ) { $text = "</li></ol>"; }
1017                 else if ( ":" == $char ) {
1018                         if ( $this->mDTopen ) {
1019                                 $this->mDTopen = false;
1020                                 $text = "</dt></dl>";
1021                         } else {
1022                                 $text = "</dd></dl>";
1023                         }
1024                 }
1025                 else {  return "<!-- ERR 3 -->"; }
1026                 return $text."\n";
1027         }
1028
1029         /* private */ function doBlockLevels( $text, $linestart )
1030         {
1031                 $fname = "Parser::doBlockLevels";
1032                 wfProfileIn( $fname );
1033                 # Parsing through the text line by line.  The main thing
1034                 # happening here is handling of block-level elements p, pre,
1035                 # and making lists from lines starting with * # : etc.
1036                 #
1037                 $a = explode( "\n", $text );
1038
1039                 $lastPref = $text = $lastLine = '';
1040                 $this->mDTopen = $inBlockElem = false;
1041                 $npl = 0;
1042                 $pstack = false;
1043
1044                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1045                 foreach ( $a as $t ) {
1046                         $oLine = $t;
1047                         $opl = strlen( $lastPref );
1048                         $preCloseMatch = preg_match("/<\\/pre/i", $t );
1049                         $preOpenMatch = preg_match("/<pre/i", $t );
1050                         if (!$this->mInPre) {
1051                                 $this->mInPre = !empty($preOpenMatch);
1052                         }
1053                         if ( !$this->mInPre ) {
1054                                 $npl = strspn( $t, "*#:;" );
1055                                 $pref = substr( $t, 0, $npl );
1056                                 $pref2 = str_replace( ";", ":", $pref );
1057                                 $t = substr( $t, $npl );
1058                         } else {
1059                                 $npl = 0;
1060                                 $pref = $pref2 = '';
1061                         }
1062
1063                         // list generation
1064                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1065                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1066                                 if ( $pstack ) { $pstack = false; }
1067
1068                                 if ( ";" == substr( $pref, -1 ) ) {
1069                                         $cpos = strpos( $t, ":" );
1070                                         if ( false !== $cpos ) {
1071                                                 $term = substr( $t, 0, $cpos );
1072                                                 $text .= $term . $this->nextItem( ":" );
1073                                                 $t = substr( $t, $cpos + 1 );
1074                                         }
1075                                 }
1076                         } else if (0 != $npl || 0 != $opl) {
1077                                 $cpl = $this->getCommon( $pref, $lastPref );
1078                                 if ( $pstack ) { $pstack = false; }
1079
1080                                 while ( $cpl < $opl ) {
1081                                         $text .= $this->closeList( $lastPref{$opl-1} );
1082                                         --$opl;
1083                                 }
1084                                 if ( $npl <= $cpl && $cpl > 0 ) {
1085                                         $text .= $this->nextItem( $pref{$cpl-1} );
1086                                 }
1087                                 while ( $npl > $cpl ) {
1088                                         $char = substr( $pref, $cpl, 1 );
1089                                         $text .= $this->openList( $char );
1090
1091                                         if ( ";" == $char ) {
1092                                                 $cpos = strpos( $t, ":" );
1093                                                 if ( ! ( false === $cpos ) ) {
1094                                                         $term = substr( $t, 0, $cpos );
1095                                                         $text .= $term . $this->nextItem( ":" );
1096                                                         $t = substr( $t, $cpos + 1 );
1097                                                 }
1098                                         }
1099                                         ++$cpl;
1100                                 }
1101                                 $lastPref = $pref2;
1102                         }
1103                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1104                                 $uniq_prefix = UNIQ_PREFIX;
1105                                 // XXX: use a stack for nestable elements like span, table and div
1106                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1107                                 $closematch = preg_match(
1108                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1109                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1110                                 if ( $openmatch or $closematch ) {
1111                                         if ( $pstack ) { $pstack = false; }
1112                                         $text .= $this->closeParagraph();
1113                                         if($preOpenMatch and !$preCloseMatch) {
1114                                                 $this->mInPre = true;
1115                                         }
1116                                         if ( $closematch  ) {
1117                                                 $inBlockElem = false;
1118                                         } else {
1119                                                 $inBlockElem = true;
1120                                         }
1121                                 } else if ( !$inBlockElem ) {
1122                                         if ( " " == $t{0} ) {
1123                                                 // pre
1124                                                 if ($this->mLastSection != 'pre') {
1125                                                         $pstack = false;
1126                                                         $text .= $this->closeParagraph().'<pre>';
1127                                                         $this->mLastSection = 'pre';
1128                                                 }
1129                                         } else {
1130                                                 // paragraph
1131                                                 if ( '' == trim($t) ) {
1132                                                         if ( $pstack ) {
1133                                                                 $text .= $pstack.'<br/>';
1134                                                                 $pstack = false;
1135                                                                 $this->mLastSection = 'p';
1136                                                         } else {
1137                                                                 if ($this->mLastSection != 'p' ) {
1138                                                                         $text .= $this->closeParagraph();
1139                                                                         $this->mLastSection = '';
1140                                                                         $pstack = "<p>";
1141                                                                 } else {
1142                                                                         $pstack = '</p><p>';
1143                                                                 }
1144                                                         }
1145                                                 } else {
1146                                                         if ( $pstack ) {
1147                                                                 $text .= $pstack;
1148                                                                 $pstack = false;
1149                                                                 $this->mLastSection = 'p';
1150                                                         } else if ($this->mLastSection != 'p') {
1151                                                                 $text .= $this->closeParagraph().'<p>';
1152                                                                 $this->mLastSection = 'p';
1153                                                         }
1154                                                 }
1155                                         }
1156                                 }
1157                         }
1158                         if ($pstack === false) {
1159                                 $text .= $t."\n";
1160                         }
1161                 }
1162                 while ( $npl ) {
1163                         $text .= $this->closeList( $pref2{$npl-1} );
1164                         --$npl;
1165                 }
1166                 if ( "" != $this->mLastSection ) {
1167                         $text .= "</" . $this->mLastSection . ">";
1168                         $this->mLastSection = "";
1169                 }
1170
1171                 wfProfileOut( $fname );
1172                 return $text;
1173         }
1174
1175         function getVariableValue( $index ) {
1176                 global $wgLang, $wgSitename, $wgServer;
1177
1178                 switch ( $index ) {
1179                         case MAG_CURRENTMONTH:
1180                                 return date( "m" );
1181                         case MAG_CURRENTMONTHNAME:
1182                                 return $wgLang->getMonthName( date("n") );
1183                         case MAG_CURRENTMONTHNAMEGEN:
1184                                 return $wgLang->getMonthNameGen( date("n") );
1185                         case MAG_CURRENTDAY:
1186                                 return date("j");
1187                         case MAG_PAGENAME:
1188                                 return $this->mTitle->getText();
1189                         case MAG_NAMESPACE:
1190                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1191                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1192                         case MAG_CURRENTDAYNAME:
1193                                 return $wgLang->getWeekdayName( date("w")+1 );
1194                         case MAG_CURRENTYEAR:
1195                                 return date( "Y" );
1196                         case MAG_CURRENTTIME:
1197                                 return $wgLang->time( wfTimestampNow(), false );
1198                         case MAG_NUMBEROFARTICLES:
1199                                 return wfNumberOfArticles();
1200                         case MAG_SITENAME:
1201                                 return $wgSitename;
1202                         case MAG_SERVER:
1203                                 return $wgServer;
1204                         default:
1205                                 return NULL;
1206                 }
1207         }
1208
1209         function initialiseVariables()
1210         {
1211                 global $wgVariableIDs;
1212                 $this->mVariables = array();
1213                 foreach ( $wgVariableIDs as $id ) {
1214                         $mw =& MagicWord::get( $id );
1215                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1216                 }
1217         }
1218
1219         /* private */ function replaceVariables( $text, $args = array() )
1220         {
1221                 global $wgLang, $wgScript, $wgArticlePath;
1222
1223                 $fname = "Parser::replaceVariables";
1224                 wfProfileIn( $fname );
1225
1226                 $bail = false;
1227                 if ( !$this->mVariables ) {
1228                         $this->initialiseVariables();
1229                 }
1230                 $titleChars = Title::legalChars();
1231                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1232
1233                 # This function is called recursively. To keep track of arguments we need a stack:
1234                 array_push( $this->mArgStack, $args );
1235
1236                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1237                 $GLOBALS['wgCurParser'] =& $this;
1238                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1239
1240                 array_pop( $this->mArgStack );
1241
1242                 return $text;
1243         }
1244
1245         function braceSubstitution( $matches )
1246         {
1247                 global $wgLinkCache, $wgLang;
1248                 $fname = "Parser::braceSubstitution";
1249                 $found = false;
1250                 $nowiki = false;
1251                 $title = NULL;
1252
1253                 # $newline is an optional newline character before the braces
1254                 # $part1 is the bit before the first |, and must contain only title characters
1255                 # $args is a list of arguments, starting from index 0, not including $part1
1256
1257                 $newline = $matches[1];
1258                 $part1 = $matches[2];
1259                 # If the third subpattern matched anything, it will start with |
1260                 if ( $matches[3] !== "" ) {
1261                         $args = explode( "|", substr( $matches[3], 1 ) );
1262                 } else {
1263                         $args = array();
1264                 }
1265                 $argc = count( $args );
1266
1267                 # SUBST
1268                 $mwSubst =& MagicWord::get( MAG_SUBST );
1269                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1270                         if ( $this->mOutputType != OT_WIKI ) {
1271                                 # Invalid SUBST not replaced at PST time
1272                                 # Return without further processing
1273                                 $text = $matches[0];
1274                                 $found = true;
1275                         }
1276                 } elseif ( $this->mOutputType == OT_WIKI ) {
1277                         # SUBST not found in PST pass, do nothing
1278                         $text = $matches[0];
1279                         $found = true;
1280                 }
1281
1282                 # MSG, MSGNW and INT
1283                 if ( !$found ) {
1284                         # Check for MSGNW:
1285                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1286                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1287                                 $nowiki = true;
1288                         } else {
1289                                 # Remove obsolete MSG:
1290                                 $mwMsg =& MagicWord::get( MAG_MSG );
1291                                 $mwMsg->matchStartAndRemove( $part1 );
1292                         }
1293
1294                         # Check if it is an internal message
1295                         $mwInt =& MagicWord::get( MAG_INT );
1296                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1297                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1298                                         $text = wfMsgReal( $part1, $args, true );
1299                                         $found = true;
1300                                 }
1301                         }
1302                 }
1303
1304                 # NS
1305                 if ( !$found ) {
1306                         # Check for NS: (namespace expansion)
1307                         $mwNs = MagicWord::get( MAG_NS );
1308                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1309                                 if ( intval( $part1 ) ) {
1310                                         $text = $wgLang->getNsText( intval( $part1 ) );
1311                                         $found = true;
1312                                 } else {
1313                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1314                                         if ( !is_null( $index ) ) {
1315                                                 $text = $wgLang->getNsText( $index );
1316                                                 $found = true;
1317                                         }
1318                                 }
1319                         }
1320                 }
1321
1322                 # LOCALURL and LOCALURLE
1323                 if ( !$found ) {
1324                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1325                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1326
1327                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1328                                 $func = 'getLocalURL';
1329                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1330                                 $func = 'escapeLocalURL';
1331                         } else {
1332                                 $func = '';
1333                         }
1334
1335                         if ( $func !== '' ) {
1336                                 $title = Title::newFromText( $part1 );
1337                                 if ( !is_null( $title ) ) {
1338                                         if ( $argc > 0 ) {
1339                                                 $text = $title->$func( $args[0] );
1340                                         } else {
1341                                                 $text = $title->$func();
1342                                         }
1343                                         $found = true;
1344                                 }
1345                         }
1346                 }
1347
1348                 # Internal variables
1349                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1350                         $text = $this->mVariables[$part1];
1351                         $found = true;
1352                         $this->mOutput->mContainsOldMagic = true;
1353                 }
1354
1355                 # Arguments input from the caller
1356                 $inputArgs = end( $this->mArgStack );
1357                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1358                         $text = $inputArgs[$part1];
1359                         $found = true;
1360                 }
1361
1362                 # Load from database
1363                 if ( !$found ) {
1364                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1365                         if ( !is_null( $title ) && !$title->isExternal() ) {
1366                                 # Check for excessive inclusion
1367                                 $dbk = $title->getPrefixedDBkey();
1368                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1369                                         $article = new Article( $title );
1370                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1371                                         if ( $articleContent !== false ) {
1372                                                 $found = true;
1373                                                 $text = $articleContent;
1374
1375                                         }
1376                                 }
1377
1378                                 # If the title is valid but undisplayable, make a link to it
1379                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1380                                         $text = "[[" . $title->getPrefixedText() . "]]";
1381                                         $found = true;
1382                                 }
1383                         }
1384                 }
1385
1386                 # Recursive parsing, escaping and link table handling
1387                 # Only for HTML output
1388                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1389                         $text = wfEscapeWikiText( $text );
1390                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1391                         # Clean up argument array
1392                         $assocArgs = array();
1393                         $index = 1;
1394                         foreach( $args as $arg ) {
1395                                 $eqpos = strpos( $arg, "=" );
1396                                 if ( $eqpos === false ) {
1397                                         $assocArgs[$index++] = $arg;
1398                                 } else {
1399                                         $name = trim( substr( $arg, 0, $eqpos ) );
1400                                         $value = trim( substr( $arg, $eqpos+1 ) );
1401                                         if ( $value === false ) {
1402                                                 $value = "";
1403                                         }
1404                                         if ( $name !== false ) {
1405                                                 $assocArgs[$name] = $value;
1406                                         }
1407                                 }
1408                         }
1409
1410                         # Do not enter included links in link table
1411                         if ( !is_null( $title ) ) {
1412                                 $wgLinkCache->suspend();
1413                         }
1414
1415                         # Run full parser on the included text
1416                         $text = $this->strip( $text, $this->mStripState );
1417                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1418
1419                         # Add the result to the strip state for re-inclusion after
1420                         # the rest of the processing
1421                         $text = $this->insertStripItem( $text, $this->mStripState );
1422
1423                         # Resume the link cache and register the inclusion as a link
1424                         if ( !is_null( $title ) ) {
1425                                 $wgLinkCache->resume();
1426                                 $wgLinkCache->addLinkObj( $title );
1427                         }
1428                 }
1429
1430                 if ( !$found ) {
1431                         return $matches[0];
1432                 } else {
1433                         return $newline . $text;
1434                 }
1435         }
1436
1437         # Returns true if the function is allowed to include this entity
1438         function incrementIncludeCount( $dbk )
1439         {
1440                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1441                         $this->mIncludeCount[$dbk] = 0;
1442                 }
1443                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1444                         return true;
1445                 } else {
1446                         return false;
1447                 }
1448         }
1449
1450
1451         # Cleans up HTML, removes dangerous tags and attributes
1452         /* private */ function removeHTMLtags( $text )
1453         {
1454                 $fname = "Parser::removeHTMLtags";
1455                 wfProfileIn( $fname );
1456                 $htmlpairs = array( # Tags that must be closed
1457                         "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1458                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1459                         "strike", "strong", "tt", "var", "div", "center",
1460                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1461                         "ruby", "rt" , "rb" , "rp", "p"
1462                 );
1463                 $htmlsingle = array(
1464                         "br", "hr", "li", "dt", "dd"
1465                 );
1466                 $htmlnest = array( # Tags that can be nested--??
1467                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1468                         "dl", "font", "big", "small", "sub", "sup"
1469                 );
1470                 $tabletags = array( # Can only appear inside table
1471                         "td", "th", "tr"
1472                 );
1473
1474                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1475                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1476
1477                 $htmlattrs = $this->getHTMLattrs () ;
1478
1479                 # Remove HTML comments
1480                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1481
1482                 $bits = explode( "<", $text );
1483                 $text = array_shift( $bits );
1484                 $tagstack = array(); $tablestack = array();
1485
1486                 foreach ( $bits as $x ) {
1487                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1488                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1489                           $x, $regs );
1490                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1491                         error_reporting( $prev );
1492
1493                         $badtag = 0 ;
1494                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1495                                 # Check our stack
1496                                 if ( $slash ) {
1497                                         # Closing a tag...
1498                                         if ( ! in_array( $t, $htmlsingle ) &&
1499                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1500                                                 array_push( $tagstack, $ot );
1501                                                 $badtag = 1;
1502                                         } else {
1503                                                 if ( $t == "table" ) {
1504                                                         $tagstack = array_pop( $tablestack );
1505                                                 }
1506                                                 $newparams = "";
1507                                         }
1508                                 } else {
1509                                         # Keep track for later
1510                                         if ( in_array( $t, $tabletags ) &&
1511                                           ! in_array( "table", $tagstack ) ) {
1512                                                 $badtag = 1;
1513                                         } else if ( in_array( $t, $tagstack ) &&
1514                                           ! in_array ( $t , $htmlnest ) ) {
1515                                                 $badtag = 1 ;
1516                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1517                                                 if ( $t == "table" ) {
1518                                                         array_push( $tablestack, $tagstack );
1519                                                         $tagstack = array();
1520                                                 }
1521                                                 array_push( $tagstack, $t );
1522                                         }
1523                                         # Strip non-approved attributes from the tag
1524                                         $newparams = $this->fixTagAttributes($params);
1525
1526                                 }
1527                                 if ( ! $badtag ) {
1528                                         $rest = str_replace( ">", "&gt;", $rest );
1529                                         $text .= "<$slash$t $newparams$brace$rest";
1530                                         continue;
1531                                 }
1532                         }
1533                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1534                 }
1535                 # Close off any remaining tags
1536                 while ( $t = array_pop( $tagstack ) ) {
1537                         $text .= "</$t>\n";
1538                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1539                 }
1540                 wfProfileOut( $fname );
1541                 return $text;
1542         }
1543
1544 /*
1545  *
1546  * This function accomplishes several tasks:
1547  * 1) Auto-number headings if that option is enabled
1548  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1549  * 3) Add a Table of contents on the top for users who have enabled the option
1550  * 4) Auto-anchor headings
1551  *
1552  * It loops through all headlines, collects the necessary data, then splits up the
1553  * string and re-inserts the newly formatted headlines.
1554  *
1555  */
1556
1557         /* private */ function formatHeadings( $text )
1558         {
1559                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1560                 $doShowToc = $this->mOptions->getShowToc();
1561                 if( !$this->mTitle->userCanEdit() ) {
1562                         $showEditLink = 0;
1563                         $rightClickHack = 0;
1564                 } else {
1565                         $showEditLink = $this->mOptions->getEditSection();
1566                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1567                 }
1568
1569                 # Inhibit editsection links if requested in the page
1570                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1571                 if( $esw->matchAndRemove( $text ) ) {
1572                         $showEditLink = 0;
1573                 }
1574                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1575                 # do not add TOC
1576                 $mw =& MagicWord::get( MAG_NOTOC );
1577                 if( $mw->matchAndRemove( $text ) ) {
1578                         $doShowToc = 0;
1579                 }
1580
1581                 # never add the TOC to the Main Page. This is an entry page that should not
1582                 # be more than 1-2 screens large anyway
1583                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1584                         $doShowToc = 0;
1585                 }
1586
1587                 # Get all headlines for numbering them and adding funky stuff like [edit]
1588                 # links - this is for later, but we need the number of headlines right now
1589                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1590
1591                 # if there are fewer than 4 headlines in the article, do not show TOC
1592                 if( $numMatches < 4 ) {
1593                         $doShowToc = 0;
1594                 }
1595
1596                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1597                 # override above conditions and always show TOC
1598                 $mw =& MagicWord::get( MAG_FORCETOC );
1599                 if ($mw->matchAndRemove( $text ) ) {
1600                         $doShowToc = 1;
1601                 }
1602
1603
1604                 # We need this to perform operations on the HTML
1605                 $sk =& $this->mOptions->getSkin();
1606
1607                 # headline counter
1608                 $headlineCount = 0;
1609
1610                 # Ugh .. the TOC should have neat indentation levels which can be
1611                 # passed to the skin functions. These are determined here
1612                 $toclevel = 0;
1613                 $toc = "";
1614                 $full = "";
1615                 $head = array();
1616                 $sublevelCount = array();
1617                 $level = 0;
1618                 $prevlevel = 0;
1619                 foreach( $matches[3] as $headline ) {
1620                         $numbering = "";
1621                         if( $level ) {
1622                                 $prevlevel = $level;
1623                         }
1624                         $level = $matches[1][$headlineCount];
1625                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1626                                 # reset when we enter a new level
1627                                 $sublevelCount[$level] = 0;
1628                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1629                                 $toclevel += $level - $prevlevel;
1630                         }
1631                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1632                                 # reset when we step back a level
1633                                 $sublevelCount[$level+1]=0;
1634                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1635                                 $toclevel -= $prevlevel - $level;
1636                         }
1637                         # count number of headlines for each level
1638                         @$sublevelCount[$level]++;
1639                         if( $doNumberHeadings || $doShowToc ) {
1640                                 $dot = 0;
1641                                 for( $i = 1; $i <= $level; $i++ ) {
1642                                         if( !empty( $sublevelCount[$i] ) ) {
1643                                                 if( $dot ) {
1644                                                         $numbering .= ".";
1645                                                 }
1646                                                 $numbering .= $sublevelCount[$i];
1647                                                 $dot = 1;
1648                                         }
1649                                 }
1650                         }
1651
1652                         # The canonized header is a version of the header text safe to use for links
1653                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1654                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1655
1656                         # strip out HTML
1657                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1658                         $tocline = trim( $canonized_headline );
1659                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1660                         $refer[$headlineCount] = $canonized_headline;
1661
1662                         # count how many in assoc. array so we can track dupes in anchors
1663                         @$refers[$canonized_headline]++;
1664                         $refcount[$headlineCount]=$refers[$canonized_headline];
1665
1666                         # Prepend the number to the heading text
1667
1668                         if( $doNumberHeadings || $doShowToc ) {
1669                                 $tocline = $numbering . " " . $tocline;
1670
1671                                 # Don't number the heading if it is the only one (looks silly)
1672                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1673                                         # the two are different if the line contains a link
1674                                         $headline=$numbering . " " . $headline;
1675                                 }
1676                         }
1677
1678                         # Create the anchor for linking from the TOC to the section
1679                         $anchor = $canonized_headline;
1680                         if($refcount[$headlineCount] > 1 ) {
1681                                 $anchor .= "_" . $refcount[$headlineCount];
1682                         }
1683                         if( $doShowToc ) {
1684                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1685                         }
1686                         if( $showEditLink ) {
1687                                 if ( empty( $head[$headlineCount] ) ) {
1688                                         $head[$headlineCount] = "";
1689                                 }
1690                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1691                         }
1692
1693                         # Add the edit section span
1694                         if( $rightClickHack ) {
1695                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1696                         }
1697
1698                         # give headline the correct <h#> tag
1699                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1700
1701                         $headlineCount++;
1702                 }
1703
1704                 if( $doShowToc ) {
1705                         $toclines = $headlineCount;
1706                         $toc .= $sk->tocUnindent( $toclevel );
1707                         $toc = $sk->tocTable( $toc );
1708                 }
1709
1710                 # split up and insert constructed headlines
1711
1712                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1713                 $i = 0;
1714
1715                 foreach( $blocks as $block ) {
1716                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1717                             # This is the [edit] link that appears for the top block of text when
1718                                 # section editing is enabled
1719
1720                                 # Disabled because it broke block formatting
1721                                 # For example, a bullet point in the top line
1722                                 # $full .= $sk->editSectionLink(0);
1723                         }
1724                         $full .= $block;
1725                         if( $doShowToc && !$i) {
1726                         # Top anchor now in skin
1727                                 $full = $full.$toc;
1728                         }
1729
1730                         if( !empty( $head[$i] ) ) {
1731                                 $full .= $head[$i];
1732                         }
1733                         $i++;
1734                 }
1735
1736                 return $full;
1737         }
1738
1739         /* private */ function doMagicISBN( &$tokenizer )
1740         {
1741                 global $wgLang;
1742
1743                 # Check whether next token is a text token
1744                 # If yes, fetch it and convert the text into a
1745                 # Special::BookSources link
1746                 $token = $tokenizer->previewToken();
1747                 while ( $token["type"] == "" )
1748                 {
1749                         $tokenizer->nextToken();
1750                         $token = $tokenizer->previewToken();
1751                 }
1752                 if ( $token["type"] == "text" )
1753                 {
1754                         $token = $tokenizer->nextToken();
1755                         $x = $token["text"];
1756                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1757
1758                         $isbn = $blank = "" ;
1759                         while ( " " == $x{0} ) {
1760                                 $blank .= " ";
1761                                 $x = substr( $x, 1 );
1762                         }
1763                         while ( strstr( $valid, $x{0} ) != false ) {
1764                                 $isbn .= $x{0};
1765                                 $x = substr( $x, 1 );
1766                         }
1767                         $num = str_replace( "-", "", $isbn );
1768                         $num = str_replace( " ", "", $num );
1769
1770                         if ( "" == $num ) {
1771                                 $text = "ISBN $blank$x";
1772                         } else {
1773                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1774                                 $text = "<a href=\"" .
1775                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1776                                         "\" class=\"internal\">ISBN $isbn</a>";
1777                                 $text .= $x;
1778                         }
1779                 } else {
1780                         $text = "ISBN ";
1781                 }
1782                 return $text;
1783         }
1784         /* private */ function doMagicRFC( &$tokenizer )
1785         {
1786                 global $wgLang;
1787
1788                 # Check whether next token is a text token
1789                 # If yes, fetch it and convert the text into a
1790                 # link to an RFC source
1791                 $token = $tokenizer->previewToken();
1792                 while ( $token["type"] == "" )
1793                 {
1794                         $tokenizer->nextToken();
1795                         $token = $tokenizer->previewToken();
1796                 }
1797                 if ( $token["type"] == "text" )
1798                 {
1799                         $token = $tokenizer->nextToken();
1800                         $x = $token["text"];
1801                         $valid = "0123456789";
1802
1803                         $rfc = $blank = "" ;
1804                         while ( " " == $x{0} ) {
1805                                 $blank .= " ";
1806                                 $x = substr( $x, 1 );
1807                         }
1808                         while ( strstr( $valid, $x{0} ) != false ) {
1809                                 $rfc .= $x{0};
1810                                 $x = substr( $x, 1 );
1811                         }
1812
1813                         if ( "" == $rfc ) {
1814                                 $text .= "RFC $blank$x";
1815                         } else {
1816                                 $url = wfmsg( "rfcurl" );
1817                                 $url = str_replace( "$1", $rfc, $url);
1818                                 $sk =& $this->mOptions->getSkin();
1819                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1820                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1821                         }
1822                 } else {
1823                         $text = "RFC ";
1824                 }
1825                 return $text;
1826         }
1827
1828         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1829         {
1830                 $this->mOptions = $options;
1831                 $this->mTitle =& $title;
1832                 $this->mOutputType = OT_WIKI;
1833
1834                 if ( $clearState ) {
1835                         $this->clearState();
1836                 }
1837
1838                 $stripState = false;
1839                 $pairs = array(
1840                         "\r\n" => "\n",
1841                         );
1842                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1843                 // now with regexes
1844                 $pairs = array(
1845                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1846                         "/<br *?>/i" => "<br/>",
1847                 );
1848                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1849                 $text = $this->strip( $text, $stripState, false );
1850                 $text = $this->pstPass2( $text, $user );
1851                 $text = $this->unstrip( $text, $stripState );
1852                 return $text;
1853         }
1854
1855         /* private */ function pstPass2( $text, &$user )
1856         {
1857                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1858
1859                 # Variable replacement
1860                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1861                 $text = $this->replaceVariables( $text );
1862
1863                 # Signatures
1864                 #
1865                 $n = $user->getName();
1866                 $k = $user->getOption( "nickname" );
1867                 if ( "" == $k ) { $k = $n; }
1868                 if(isset($wgLocaltimezone)) {
1869                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1870                 }
1871                 /* Note: this is an ugly timezone hack for the European wikis */
1872                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1873                   " (" . date( "T" ) . ")";
1874                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1875
1876                 $text = preg_replace( "/~~~~~/", $d, $text );
1877                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1878                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1879                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1880                   Namespace::getUser() ) . ":$n|$k]]", $text );
1881
1882                 # Context links: [[|name]] and [[name (context)|]]
1883                 #
1884                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1885                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1886                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1887                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1888
1889                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1890                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1891                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1892                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1893                                                                                                                 # [[ns:page (cont)|]]
1894                 $context = "";
1895                 $t = $this->mTitle->getText();
1896                 if ( preg_match( $conpat, $t, $m ) ) {
1897                         $context = $m[2];
1898                 }
1899                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1900                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1901                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1902
1903                 if ( "" == $context ) {
1904                         $text = preg_replace( $p2, "[[\\1]]", $text );
1905                 } else {
1906                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1907                 }
1908
1909                 /*
1910                 $mw =& MagicWord::get( MAG_SUBST );
1911                 $wgCurParser = $this->fork();
1912                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1913                 $this->merge( $wgCurParser );
1914                 */
1915
1916                 # Trim trailing whitespace
1917                 # MAG_END (__END__) tag allows for trailing
1918                 # whitespace to be deliberately included
1919                 $text = rtrim( $text );
1920                 $mw =& MagicWord::get( MAG_END );
1921                 $mw->matchAndRemove( $text );
1922
1923                 return $text;
1924         }
1925
1926         # Set up some variables which are usually set up in parse()
1927         # so that an external function can call some class members with confidence
1928         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1929         {
1930                 $this->mTitle =& $title;
1931                 $this->mOptions = $options;
1932                 $this->mOutputType = $outputType;
1933                 if ( $clearState ) {
1934                         $this->clearState();
1935                 }
1936         }
1937
1938         function transformMsg( $text, $options ) {
1939                 global $wgTitle;
1940                 static $executing = false;
1941
1942                 # Guard against infinite recursion
1943                 if ( $executing ) {
1944                         return $text;
1945                 }
1946                 $executing = true;
1947
1948                 $this->mTitle = $wgTitle;
1949                 $this->mOptions = $options;
1950                 $this->mOutputType = OT_MSG;
1951                 $this->clearState();
1952                 $text = $this->replaceVariables( $text );
1953
1954                 $executing = false;
1955                 return $text;
1956         }
1957 }
1958
1959 class ParserOutput
1960 {
1961         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1962
1963         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1964                 $containsOldMagic = false )
1965         {
1966                 $this->mText = $text;
1967                 $this->mLanguageLinks = $languageLinks;
1968                 $this->mCategoryLinks = $categoryLinks;
1969                 $this->mContainsOldMagic = $containsOldMagic;
1970         }
1971
1972         function getText() { return $this->mText; }
1973         function getLanguageLinks() { return $this->mLanguageLinks; }
1974         function getCategoryLinks() { return $this->mCategoryLinks; }
1975         function containsOldMagic() { return $this->mContainsOldMagic; }
1976         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1977         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1978         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1979         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1980
1981         function merge( $other ) {
1982                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1983                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1984                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1985         }
1986
1987 }
1988
1989 class ParserOptions
1990 {
1991         # All variables are private
1992         var $mUseTeX;                    # Use texvc to expand <math> tags
1993         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1994         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1995         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1996         var $mAllowExternalImages;       # Allow external images inline
1997         var $mSkin;                      # Reference to the preferred skin
1998         var $mDateFormat;                # Date format index
1999         var $mEditSection;               # Create "edit section" links
2000         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2001         var $mNumberHeadings;            # Automatically number headings
2002         var $mShowToc;                   # Show table of contents
2003
2004         function getUseTeX() { return $this->mUseTeX; }
2005         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2006         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2007         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2008         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2009         function getSkin() { return $this->mSkin; }
2010         function getDateFormat() { return $this->mDateFormat; }
2011         function getEditSection() { return $this->mEditSection; }
2012         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2013         function getNumberHeadings() { return $this->mNumberHeadings; }
2014         function getShowToc() { return $this->mShowToc; }
2015
2016         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2017         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2018         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2019         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2020         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2021         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2022         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2023         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2024         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2025         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2026         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2027
2028         /* static */ function newFromUser( &$user )
2029         {
2030                 $popts = new ParserOptions;
2031                 $popts->initialiseFromUser( $user );
2032                 return $popts;
2033         }
2034
2035         function initialiseFromUser( &$userInput )
2036         {
2037                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2038
2039                 if ( !$userInput ) {
2040                         $user = new User;
2041                         $user->setLoaded( true );
2042                 } else {
2043                         $user =& $userInput;
2044                 }
2045
2046                 $this->mUseTeX = $wgUseTeX;
2047                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2048                 $this->mUseDynamicDates = $wgUseDynamicDates;
2049                 $this->mInterwikiMagic = $wgInterwikiMagic;
2050                 $this->mAllowExternalImages = $wgAllowExternalImages;
2051                 $this->mSkin =& $user->getSkin();
2052                 $this->mDateFormat = $user->getOption( "date" );
2053                 $this->mEditSection = $user->getOption( "editsection" );
2054                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2055                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2056                 $this->mShowToc = $user->getOption( "showtoc" );
2057         }
2058
2059
2060 }
2061
2062 # Regex callbacks, used in Parser::replaceVariables
2063 function wfBraceSubstitution( $matches )
2064 {
2065         global $wgCurParser;
2066         return $wgCurParser->braceSubstitution( $matches );
2067 }
2068
2069 ?>