includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 global $wgUseTidy;
  90                 $fname = "Parser::parse";
  91                 wfProfileIn( $fname );
  92
  93                 if ( $clearState ) {
  94                         $this->clearState();
  95                 }
  96
  97                 $this->mOptions = $options;
  98                 $this->mTitle =& $title;
  99                 $this->mOutputType = OT_HTML;
 100
 101                 $stripState = NULL;
 102                 $text = $this->strip( $text, $this->mStripState );
 103                 $text = $this->internalParse( $text, $linestart );
 104                 $text = $this->unstrip( $text, $this->mStripState );
 105                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 106                 if(!$wgUseTidy) {
 107                         $fixtags = array(
 108                                 "/<hr *>/i" => '<hr/>',
 109                                 "/<br *>/i" => '<br/>',
 110                                 "/<center *>/i"=>'<div class="center">',
 111                                 "/<\\/center *>/i" => '</div>',
 112                                 # Clean up spare ampersands; note that we probably ought to be
 113                                 # more careful about named entities.
 114                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 115                         );
 116                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 117                 } else {
 118                         $fixtags = array(
 119                                 "/<center *>/i"=>'<div class="center">',
 120                                 "/<\\/center *>/i" => '</div>'
 121                         );
 122                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 123                 }
 124                 # only once and last
 125                 $text = $this->doBlockLevels( $text, $linestart );
 126                 if($wgUseTidy) {
 127                         $text = $this->tidy($text);
 128                 }
 129                 $this->mOutput->setText( $text );
 130                 wfProfileOut( $fname );
 131                 return $this->mOutput;
 132         }
 133
 134         /* static */ function getRandomString()
 135         {
 136                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 137         }
 138
 139         # Replaces all occurrences of <$tag>content</$tag> in the text
 140         # with a random marker and returns the new text. the output parameter
 141         # $content will be an associative array filled with data on the form
 142         # $unique_marker => content.
 143
 144         # If $content is already set, the additional entries will be appended
 145
 146         # If $tag is set to STRIP_COMMENTS, the function will extract
 147         # <!-- HTML comments -->
 148
 149         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 150                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 151                 if ( !$content ) {
 152                         $content = array( );
 153                 }
 154                 $n = 1;
 155                 $stripped = "";
 156
 157                 while ( "" != $text ) {
 158                         if($tag==STRIP_COMMENTS) {
 159                                 $p = preg_split( "/<!--/i", $text, 2 );
 160                         } else {
 161                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 162                         }
 163                         $stripped .= $p[0];
 164                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 165                                 $text = "";
 166                         } else {
 167                                 if($tag==STRIP_COMMENTS) {
 168                                         $q = preg_split( "/-->/i", $p[1], 2 );
 169                                 } else {
 170                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 171                                 }
 172                                 $marker = $rnd . sprintf("%08X", $n++);
 173                                 $content[$marker] = $q[0];
 174                                 $stripped .= $marker;
 175                                 $text = $q[1];
 176                         }
 177                 }
 178                 return $stripped;
 179         }
 180
 181         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 182         # If $render is set, performs necessary rendering operations on plugins
 183         # Returns the text, and fills an array with data needed in unstrip()
 184         # If the $state is already a valid strip state, it adds to the state
 185
 186         # When $stripcomments is set, HTML comments <!-- like this -->
 187         # will be stripped in addition to other tags. This is important
 188         # for section editing, where these comments cause confusion when
 189         # counting the sections in the wikisource
 190         function strip( $text, &$state, $stripcomments = false )
 191         {
 192                 $render = ($this->mOutputType == OT_HTML);
 193                 $nowiki_content = array();
 194                 $hiero_content = array();
 195                 $timeline_content = array();
 196                 $math_content = array();
 197                 $pre_content = array();
 198                 $comment_content = array();
 199
 200                 # Replace any instances of the placeholders
 201                 $uniq_prefix = UNIQ_PREFIX;
 202                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 203
 204                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 205                 foreach( $nowiki_content as $marker => $content ){
 206                         if( $render ){
 207                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 208                         } else {
 209                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 210                         }
 211                 }
 212
 213                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 214                 foreach( $hiero_content as $marker => $content ){
 215                         if( $render && $GLOBALS['wgUseWikiHiero']){
 216                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 217                         } else {
 218                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 219                         }
 220                 }
 221
 222                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 223                 foreach( $timeline_content as $marker => $content ){
 224                         if( $render && $GLOBALS['wgUseTimeline']){
 225                                 $timeline_content[$marker] = renderTimeline( $content );
 226                         } else {
 227                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 228                         }
 229                 }
 230
 231                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 232                 foreach( $math_content as $marker => $content ){
 233                         if( $render ) {
 234                                 if( $this->mOptions->getUseTeX() ) {
 235                                         $math_content[$marker] = renderMath( $content );
 236                                 } else {
 237                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 238                                 }
 239                         } else {
 240                                 $math_content[$marker] = "<math>$content</math>";
 241                         }
 242                 }
 243
 244                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 245                 foreach( $pre_content as $marker => $content ){
 246                         if( $render ){
 247                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 248                         } else {
 249                                 $pre_content[$marker] = "<pre>$content</pre>";
 250                         }
 251                 }
 252                 if($stripcomments) {
 253                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 254                         foreach( $comment_content as $marker => $content ){
 255                                 $comment_content[$marker] = "<!--$content-->";
 256                         }
 257                 }
 258
 259                 # Merge state with the pre-existing state, if there is one
 260                 if ( $state ) {
 261                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 262                         $state['hiero'] = $state['hiero'] + $hiero_content;
 263                         $state['timeline'] = $state['timeline'] + $timeline_content;
 264                         $state['math'] = $state['math'] + $math_content;
 265                         $state['pre'] = $state['pre'] + $pre_content;
 266                         $state['comment'] = $state['comment'] + $comment_content;
 267                 } else {
 268                         $state = array(
 269                           'nowiki' => $nowiki_content,
 270                           'hiero' => $hiero_content,
 271                           'timeline' => $timeline_content,
 272                           'math' => $math_content,
 273                           'pre' => $pre_content,
 274                           'comment' => $comment_content
 275                         );
 276                 }
 277                 return $text;
 278         }
 279
 280         function unstrip( $text, &$state )
 281         {
 282                 # Must expand in reverse order, otherwise nested tags will be corrupted
 283                 $contentDict = end( $state );
 284                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 285                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 286                                 $text = str_replace( key( $contentDict ), $content, $text );
 287                         }
 288                 }
 289
 290                 return $text;
 291         }
 292
 293         # Add an item to the strip state
 294         # Returns the unique tag which must be inserted into the stripped text
 295         # The tag will be replaced with the original text in unstrip()
 296
 297         function insertStripItem( $text, &$state )
 298         {
 299                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 300                 if ( !$state ) {
 301                         $state = array(
 302                           'nowiki' => array(),
 303                           'hiero' => array(),
 304                           'math' => array(),
 305                           'pre' => array()
 306                         );
 307                 }
 308                 $state['item'][$rnd] = $text;
 309                 return $rnd;
 310         }
 311
 312         # This method generates the list of subcategories and pages for a category
 313         function categoryMagic ()
 314         {
 315                 global $wgLang , $wgUser ;
 316                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 317
 318                 $cns = Namespace::getCategory() ;
 319                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 320
 321                 $r = "<br style=\"clear:both;\"/>\n";
 322
 323
 324                 $sk =& $wgUser->getSkin() ;
 325
 326                 $articles = array() ;
 327                 $children = array() ;
 328                 $data = array () ;
 329                 $id = $this->mTitle->getArticleID() ;
 330
 331                 # FIXME: add limits
 332                 $t = wfStrencode( $this->mTitle->getDBKey() );
 333                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 334                 $res = wfQuery ( $sql, DB_READ ) ;
 335                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 336
 337                 # For all pages that link to this category
 338                 foreach ( $data AS $x )
 339                 {
 340                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 341                         if ( $t != "" ) $t .= ":" ;
 342                         $t .= $x->cur_title ;
 343
 344                         if ( $x->cur_namespace == $cns ) {
 345                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 346                         } else {
 347                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 348                         }
 349                 }
 350                 wfFreeResult ( $res ) ;
 351
 352                 # Showing subcategories
 353                 if ( count ( $children ) > 0 ) {
 354                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 355                         $r .= implode ( ", " , $children ) ;
 356                 }
 357
 358                 # Showing pages in this category
 359                 if ( count ( $articles ) > 0 ) {
 360                         $ti = $this->mTitle->getText() ;
 361                         $h =  wfMsg( "category_header", $ti );
 362                         $r .= "<h2>{$h}</h2>\n" ;
 363                         $r .= implode ( ", " , $articles ) ;
 364                 }
 365
 366
 367                 return $r ;
 368         }
 369
 370         function getHTMLattrs ()
 371         {
 372                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 373                                 "title", "align", "lang", "dir", "width", "height",
 374                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 375                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 376                                 /* FONT */ "type", "start", "value", "compact",
 377                                 /* For various lists, mostly deprecated but safe */
 378                                 "summary", "width", "border", "frame", "rules",
 379                                 "cellspacing", "cellpadding", "valign", "char",
 380                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 381                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 382                                 "id", "class", "name", "style" /* For CSS */
 383                                 );
 384                 return $htmlattrs ;
 385         }
 386
 387         function fixTagAttributes ( $t )
 388         {
 389                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 390                 $htmlattrs = $this->getHTMLattrs() ;
 391
 392                 # Strip non-approved attributes from the tag
 393                 $t = preg_replace(
 394                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 395                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 396                         $t);
 397                 # Strip javascript "expression" from stylesheets. Brute force approach:
 398                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 399
 400                 if( preg_match(
 401                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 402                         wfMungeToUtf8( $t ) ) )
 403                 {
 404                         $t="";
 405                 }
 406
 407                 return trim ( $t ) ;
 408         }
 409
 410         /* interface with html tidy, used if $wgUseTidy = true */
 411         function tidy ( $text ) {
 412                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 413                 global $wgInputEncoding, $wgOutputEncoding;
 414                 $cleansource = '';
 415                 switch(strtoupper($wgOutputEncoding)) {
 416                         case 'ISO-8859-1':
 417                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 418                                 break;
 419                         case 'UTF-8':
 420                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 421                                 break;
 422                         default:
 423                                 $wgTidyOpts .= ' -raw';
 424                         }
 425
 426                 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 427 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 428 '<head><title>test</title></head><body>'.$text.'</body></html>';
 429                 $descriptorspec = array(
 430                         0 => array("pipe", "r"),
 431                         1 => array("pipe", "w"),
 432                         2 => array("file", "/dev/null", "a")
 433                 );
 434                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 435                 if (is_resource($process)) {
 436                         fwrite($pipes[0], $text);
 437                         fclose($pipes[0]);
 438                         while (!feof($pipes[1])) {
 439                                 $cleansource .= fgets($pipes[1], 1024);
 440                         }
 441                         fclose($pipes[1]);
 442                         $return_value = proc_close($process);
 443                 }
 444                 if( $cleansource == '' && $text != '') {
 445                         return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
 446                 } else {
 447                         return $cleansource;
 448                 }
 449         }
 450
 451         function doTableStuff ( $t )
 452         {
 453                 $t = explode ( "\n" , $t ) ;
 454                 $td = array () ; # Is currently a td tag open?
 455                         $ltd = array () ; # Was it TD or TH?
 456                         $tr = array () ; # Is currently a tr tag open?
 457                         $ltr = array () ; # tr attributes
 458                         foreach ( $t AS $k => $x )
 459                         {
 460                                 $x = trim ( $x ) ;
 461                                 $fc = substr ( $x , 0 , 1 ) ;
 462                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 463                                 {
 464                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 465                                         array_push ( $td , false ) ;
 466                                         array_push ( $ltd , "" ) ;
 467                                         array_push ( $tr , false ) ;
 468                                         array_push ( $ltr , "" ) ;
 469                                 }
 470                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 471                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 472                                 {
 473                                         $z = "</table>\n" ;
 474                                         $l = array_pop ( $ltd ) ;
 475                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 476                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 477                                         array_pop ( $ltr ) ;
 478                                         $t[$k] = $z ;
 479                                 }
 480                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 481                                                 {
 482                                                 $z = trim ( substr ( $x , 2 ) ) ;
 483                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 484                                                 }*/
 485                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 486                                 {
 487                                         $x = substr ( $x , 1 ) ;
 488                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 489                                         $z = "" ;
 490                                         $l = array_pop ( $ltd ) ;
 491                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 492                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 493                                         array_pop ( $ltr ) ;
 494                                         $t[$k] = $z ;
 495                                         array_push ( $tr , false ) ;
 496                                         array_push ( $td , false ) ;
 497                                         array_push ( $ltd , "" ) ;
 498                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 499                                 }
 500                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 501                                 {
 502                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 503                                         {
 504                                                 $fc = "+" ;
 505                                                 $x = substr ( $x , 1 ) ;
 506                                         }
 507                                         $after = substr ( $x , 1 ) ;
 508                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 509                                         $after = explode ( "||" , $after ) ;
 510                                         $t[$k] = "" ;
 511                                         foreach ( $after AS $theline )
 512                                         {
 513                                                 $z = "" ;
 514                                                 if ( $fc != "+" )
 515                                                 {
 516                                                         $tra = array_pop ( $ltr ) ;
 517                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 518                                                         array_push ( $tr , true ) ;
 519                                                         array_push ( $ltr , "" ) ;
 520                                                 }
 521
 522                                                 $l = array_pop ( $ltd ) ;
 523                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 524                                                 if ( $fc == "|" ) $l = "td" ;
 525                                                 else if ( $fc == "!" ) $l = "th" ;
 526                                                 else if ( $fc == "+" ) $l = "caption" ;
 527                                                 else $l = "" ;
 528                                                 array_push ( $ltd , $l ) ;
 529                                                 $y = explode ( "|" , $theline , 2 ) ;
 530                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 531                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 532                                                 $t[$k] .= $y ;
 533                                                 array_push ( $td , true ) ;
 534                                         }
 535                                 }
 536                         }
 537
 538                 # Closing open td, tr && table
 539                 while ( count ( $td ) > 0 )
 540                 {
 541                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 542                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 543                         $t[] = "</table>" ;
 544                 }
 545
 546                 $t = implode ( "\n" , $t ) ;
 547                 #               $t = $this->removeHTMLtags( $t );
 548                 return $t ;
 549         }
 550
 551         # Parses the text and adds the result to the strip state
 552         # Returns the strip tag
 553         function stripParse( $text, $linestart, $args )
 554         {
 555                 $text = $this->strip( $text, $this->mStripState );
 556                 $text = $this->internalParse( $text, $linestart, $args, false );
 557                 if( $linestart ) {
 558                         $text = "\n" . $text;
 559                 }
 560                 return $this->insertStripItem( $text, $this->mStripState );
 561         }
 562
 563         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 564         {
 565                 $fname = "Parser::internalParse";
 566                 wfProfileIn( $fname );
 567
 568                 $text = $this->removeHTMLtags( $text );
 569                 $text = $this->replaceVariables( $text, $args );
 570
 571                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
 572
 573                 $text = $this->doHeadings( $text );
 574                 if($this->mOptions->getUseDynamicDates()) {
 575                         global $wgDateFormatter;
 576                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 577                 }
 578                 $text = $this->doAllQuotes( $text );
 579                 $text = $this->replaceExternalLinks( $text );
 580                 $text = $this->replaceInternalLinks ( $text );
 581                 //$text = $this->doTokenizedParser ( $text );
 582                 $text = $this->doTableStuff ( $text ) ;
 583                 $text = $this->magicISBN( $text );
 584                 $text = $this->magicRFC( $text );
 585                 $text = $this->formatHeadings( $text, $isMain );
 586                 $sk =& $this->mOptions->getSkin();
 587                 $text = $sk->transformContent( $text );
 588
 589                 if ( !isset ( $this->categoryMagicDone ) ) {
 590                         $text .= $this->categoryMagic () ;
 591                         $this->categoryMagicDone = true ;
 592                 }
 593
 594                 wfProfileOut( $fname );
 595                 return $text;
 596         }
 597
 598
 599         /* private */ function doHeadings( $text )
 600         {
 601                 for ( $i = 6; $i >= 1; --$i ) {
 602                         $h = substr( "======", 0, $i );
 603                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 604                           "<h{$i}>\\1</h{$i}>\\2", $text );
 605                 }
 606                 return $text;
 607         }
 608
 609         /* private */ function doAllQuotes( $text )
 610         {
 611                 $outtext = "";
 612                 $lines = explode( "\r\n", $text );
 613                 foreach ( $lines as $line ) {
 614                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
 615                 }
 616                 return $outtext;
 617         }
 618
 619         /* private */ function doQuotes( $pre, $text, $mode )
 620         {
 621                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 622                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 623                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 624                         if ( substr ($m[2], 0, 1) == "'" ) {
 625                                 $m[2] = substr ($m[2], 1);
 626                                 if ($mode == "em") {
 627                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 628                                 } else if ($mode == "strong") {
 629                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 630                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 631                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 632                                 } else if ($mode == "strongem") {
 633                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 634                                 } else {
 635                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 636                                 }
 637                         } else {
 638                                 if ($mode == "strong") {
 639                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 640                                 } else if ($mode == "em") {
 641                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 642                                 } else if ($mode == "emstrong") {
 643                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 644                                 } else if (($mode == "strongem") || ($mode == "both")) {
 645                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 646                                 } else {
 647                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 648                                 }
 649                         }
 650                 } else {
 651                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 652                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 653                         if ($mode == "") {
 654                                 return $pre . $text;
 655                         } else if ($mode == "em") {
 656                                 return $pre . $text_em;
 657                         } else if ($mode == "strong") {
 658                                 return $pre . $text_strong;
 659                         } else if ($mode == "strongem") {
 660                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 661                         } else {
 662                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 663                         }
 664                 }
 665         }
 666
 667         # Note: we have to do external links before the internal ones,
 668         # and otherwise take great care in the order of things here, so
 669         # that we don't end up interpreting some URLs twice.
 670
 671         /* private */ function replaceExternalLinks( $text )
 672         {
 673                 $fname = "Parser::replaceExternalLinks";
 674                 wfProfileIn( $fname );
 675                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 676                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 677                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 678                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 679                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 680                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 681                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 682                 wfProfileOut( $fname );
 683                 return $text;
 684         }
 685
 686         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 687         {
 688                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 689                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 690
 691                 # this is  the list of separators that should be ignored if they
 692                 # are the last character of an URL but that should be included
 693                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 694                 # in this case, the last comma should not become part of the URL,
 695                 # but in "www.foo.com/123,2342,32.htm" it should.
 696                 $sep = ",;\.:";
 697                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 698                 $images = "gif|png|jpg|jpeg";
 699
 700                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 701                 # they are interpreted as part of the string (used to tell PHP
 702                 # that the content of the string should be inserted there).
 703                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 704                   "((?i){$images})([^{$uc}]|$)/";
 705
 706                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 707                 $sk =& $this->mOptions->getSkin();
 708
 709                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 710                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 711                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 712                 }
 713                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 714                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 715                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 716                   "</a>\\5", $s );
 717                 $s = str_replace( $unique, $protocol, $s );
 718
 719                 $a = explode( "[{$protocol}:", " " . $s );
 720                 $s = array_shift( $a );
 721                 $s = substr( $s, 1 );
 722
 723                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 724                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 725
 726                 foreach ( $a as $line ) {
 727                         if ( preg_match( $e1, $line, $m ) ) {
 728                                 $link = "{$protocol}:{$m[1]}";
 729                                 $trail = $m[2];
 730                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 731                                 else { $text = wfEscapeHTML( $link ); }
 732                         } else if ( preg_match( $e2, $line, $m ) ) {
 733                                 $link = "{$protocol}:{$m[1]}";
 734                                 $text = $m[2];
 735                                 $trail = $m[3];
 736                         } else {
 737                                 $s .= "[{$protocol}:" . $line;
 738                                 continue;
 739                         }
 740                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 741                                 $paren = "";
 742                         } else {
 743                                 # Expand the URL for printable version
 744                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 745                         }
 746                         $la = $sk->getExternalLinkAttributes( $link, $text );
 747                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 748
 749                 }
 750                 return $s;
 751         }
 752
 753
 754         /* private */ function replaceInternalLinks( $s )
 755         {
 756                 global $wgLang, $wgLinkCache;
 757                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 758                 static $fname = "Parser::replaceInternalLink" ;
 759                 wfProfileIn( $fname );
 760
 761                 wfProfileIn( "$fname-setup" );
 762                 static $tc = FALSE;
 763                 # the % is needed to support urlencoded titles as well
 764                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 765                 $sk =& $this->mOptions->getSkin();
 766
 767                 $a = explode( "[[", " " . $s );
 768                 $s = array_shift( $a );
 769                 $s = substr( $s, 1 );
 770
 771                 # Match a link having the form [[namespace:link|alternate]]trail
 772                 static $e1 = FALSE;
 773                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 774                 # Match the end of a line for a word that's not followed by whitespace,
 775                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 776                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 777                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 778                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 779
 780
 781                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 782                 static $image = FALSE;
 783                 static $special = FALSE;
 784                 static $media = FALSE;
 785                 static $category = FALSE;
 786                 if ( !$image ) { $image = Namespace::getImage(); }
 787                 if ( !$special ) { $special = Namespace::getSpecial(); }
 788                 if ( !$media ) { $media = Namespace::getMedia(); }
 789                 if ( !$category ) { $category = Namespace::getCategory(); }
 790
 791                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 792
 793                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 794                         $new_prefix = $m[2];
 795                         $s = $m[1];
 796                 } else {
 797                         $new_prefix="";
 798                 }
 799
 800                 wfProfileOut( "$fname-setup" );
 801
 802                 foreach ( $a as $line ) {
 803                         $prefix = $new_prefix;
 804
 805                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 806                                 $text = $m[2];
 807                                 # fix up urlencoded title texts
 808                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 809                                 $trail = $m[3];
 810                         } else { # Invalid form; output directly
 811                                 $s .= $prefix . "[[" . $line ;
 812                                 wfProfileOut( $fname );
 813                                 continue;
 814                         }
 815
 816                         /* Valid link forms:
 817                         Foobar -- normal
 818                         :Foobar -- override special treatment of prefix (images, language links)
 819                         /Foobar -- convert to CurrentPage/Foobar
 820                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 821                         */
 822                         $c = substr($m[1],0,1);
 823                         $noforce = ($c != ":");
 824                         if( $c == "/" ) { # subpage
 825                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 826                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 827                                         $noslash=$m[1];
 828                                 } else {
 829                                         $noslash=substr($m[1],1);
 830                                 }
 831                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 832                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 833                                         if( "" == $text ) {
 834                                                 $text= $m[1];
 835                                         } # this might be changed for ugliness reasons
 836                                 } else {
 837                                         $link = $noslash; # no subpage allowed, use standard link
 838                                 }
 839                         } elseif( $noforce ) { # no subpage
 840                                 $link = $m[1];
 841                         } else {
 842                                 $link = substr( $m[1], 1 );
 843                         }
 844                         $wasblank = ( "" == $text );
 845                         if( $wasblank )
 846                         $text = $link;
 847
 848                         $nt = Title::newFromText( $link );
 849                         if( !$nt ) {
 850                                 $s .= $prefix . "[[" . $line;
 851                                 wfProfileOut( $fname );
 852                                 continue;
 853                         }
 854                         $ns = $nt->getNamespace();
 855                         $iw = $nt->getInterWiki();
 856                         if( $noforce ) {
 857                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 858                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 859                                         $s .= $prefix . $trail ;
 860                                         wfProfileOut( $fname );
 861                                         return (trim($s) == '')? '': $s;
 862                                         continue;
 863                                 }
 864                                 if ( $ns == $image ) {
 865                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 866                                         $wgLinkCache->addImageLinkObj( $nt );
 867                                         wfProfileOut( $fname );
 868                                         continue;
 869                                 } else if ( $ns == $category ) {
 870                                         $t = $nt->getText() ;
 871                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 872
 873                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 874                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 875                                         $wgLinkCache->resume();
 876
 877                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 878                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 879                                         $this->mOutput->mCategoryLinks[] = $t ;
 880                                         $s .= $prefix . $trail ;
 881                                         wfProfileOut( $fname );
 882                                         continue;
 883                                 }
 884                         }
 885                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 886                         ( strpos( $link, "#" ) == FALSE ) ) {
 887                                 # Self-links are handled specially; generally de-link and change to bold.
 888                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 889                                 wfProfileOut( $fname );
 890                                 continue;
 891                         }
 892
 893                         if( $ns == $media ) {
 894                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 895                                 $wgLinkCache->addImageLinkObj( $nt );
 896                                 wfProfileOut( $fname );
 897                                 continue;
 898                         } elseif( $ns == $special ) {
 899                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 900                                 wfProfileOut( $fname );
 901                                 continue;
 902                         }
 903                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 904                 }
 905                 wfProfileOut( $fname );
 906                 return $s;
 907         }
 908
 909         # Some functions here used by doBlockLevels()
 910         #
 911         /* private */ function closeParagraph()
 912         {
 913                 $result = "";
 914                 if ( '' != $this->mLastSection ) {
 915                         $result = "</" . $this->mLastSection  . ">\n";
 916                 }
 917                 $this->mInPre = false;
 918                 $this->mLastSection = "";
 919                 return $result;
 920         }
 921         # getCommon() returns the length of the longest common substring
 922         # of both arguments, starting at the beginning of both.
 923         #
 924         /* private */ function getCommon( $st1, $st2 )
 925         {
 926                 $fl = strlen( $st1 );
 927                 $shorter = strlen( $st2 );
 928                 if ( $fl < $shorter ) { $shorter = $fl; }
 929
 930                 for ( $i = 0; $i < $shorter; ++$i ) {
 931                         if ( $st1{$i} != $st2{$i} ) { break; }
 932                 }
 933                 return $i;
 934         }
 935         # These next three functions open, continue, and close the list
 936         # element appropriate to the prefix character passed into them.
 937         #
 938         /* private */ function openList( $char )
 939     {
 940                 $result = $this->closeParagraph();
 941
 942                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 943                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 944                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 945                 else if ( ";" == $char ) {
 946                         $result .= "<dl><dt>";
 947                         $this->mDTopen = true;
 948                 }
 949                 else { $result = "<!-- ERR 1 -->"; }
 950
 951                 return $result;
 952         }
 953
 954         /* private */ function nextItem( $char )
 955         {
 956                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 957                 else if ( ":" == $char || ";" == $char ) {
 958                         $close = "</dd>";
 959                         if ( $this->mDTopen ) { $close = "</dt>"; }
 960                         if ( ";" == $char ) {
 961                                 $this->mDTopen = true;
 962                                 return $close . "<dt>";
 963                         } else {
 964                                 $this->mDTopen = false;
 965                                 return $close . "<dd>";
 966                         }
 967                 }
 968                 return "<!-- ERR 2 -->";
 969         }
 970
 971         /* private */function closeList( $char )
 972         {
 973                 if ( "*" == $char ) { $text = "</li></ul>"; }
 974                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 975                 else if ( ":" == $char ) {
 976                         if ( $this->mDTopen ) {
 977                                 $this->mDTopen = false;
 978                                 $text = "</dt></dl>";
 979                         } else {
 980                                 $text = "</dd></dl>";
 981                         }
 982                 }
 983                 else {  return "<!-- ERR 3 -->"; }
 984                 return $text."\n";
 985         }
 986
 987         /* private */ function doBlockLevels( $text, $linestart ) {
 988                 $fname = "Parser::doBlockLevels";
 989                 wfProfileIn( $fname );
 990
 991                 # Parsing through the text line by line.  The main thing
 992                 # happening here is handling of block-level elements p, pre,
 993                 # and making lists from lines starting with * # : etc.
 994                 #
 995                 $textLines = explode( "\n", $text );
 996
 997                 $lastPrefix = $output = $lastLine = '';
 998                 $this->mDTopen = $inBlockElem = false;
 999                 $prefixLength = 0;
1000                 $paragraphStack = false;
1001
1002                 if ( !$linestart ) {
1003                         $output .= array_shift( $textLines );
1004                 }
1005                 foreach ( $textLines as $oLine ) {
1006                         $lastPrefixLength = strlen( $lastPrefix );
1007                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1008                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1009                         if (!$this->mInPre) {
1010                                 $this->mInPre = !empty($preOpenMatch);
1011                         }
1012                         if ( !$this->mInPre ) {
1013                                 # Multiple prefixes may abut each other for nested lists.
1014                                 $prefixLength = strspn( $oLine, "*#:;" );
1015                                 $pref = substr( $oLine, 0, $prefixLength );
1016
1017                                 # eh?
1018                                 $pref2 = str_replace( ";", ":", $pref );
1019                                 $t = substr( $oLine, $prefixLength );
1020                         } else {
1021                                 # Don't interpret any other prefixes in preformatted text
1022                                 $prefixLength = 0;
1023                                 $pref = $pref2 = '';
1024                                 $t = $oLine;
1025                         }
1026
1027                         # List generation
1028                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1029                                 # Same as the last item, so no need to deal with nesting or opening stuff
1030                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1031                                 $paragraphStack = false;
1032
1033                                 if ( ";" == substr( $pref, -1 ) ) {
1034                                         # The one nasty exception: definition lists work like this:
1035                                         # ; title : definition text
1036                                         # So we check for : in the remainder text to split up the
1037                                         # title and definition, without b0rking links.
1038                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1039                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1040                                                 $term = $match[1];
1041                                                 $output .= $term . $this->nextItem( ":" );
1042                                                 $t = $match[2];
1043                                         }
1044                                 }
1045                         } elseif( $prefixLength || $lastPrefixLength ) {
1046                                 # Either open or close a level...
1047                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1048                                 $paragraphStack = false;
1049
1050                                 while( $commonPrefixLength < $lastPrefixLength ) {
1051                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1052                                         --$lastPrefixLength;
1053                                 }
1054                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1055                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1056                                 }
1057                                 while ( $prefixLength > $commonPrefixLength ) {
1058                                         $char = substr( $pref, $commonPrefixLength, 1 );
1059                                         $output .= $this->openList( $char );
1060
1061                                         if ( ";" == $char ) {
1062                                                 # FIXME: This is dupe of code above
1063                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1064                                                         $term = $match[1];
1065                                                         $output .= $term . $this->nextItem( ":" );
1066                                                         $t = $match[2];
1067                                                 }
1068                                         }
1069                                         ++$commonPrefixLength;
1070                                 }
1071                                 $lastPrefix = $pref2;
1072                         }
1073                         if( 0 == $prefixLength ) {
1074                                 # No prefix (not in list)--go to paragraph mode
1075                                 $uniq_prefix = UNIQ_PREFIX;
1076                                 // XXX: use a stack for nestable elements like span, table and div
1077                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1078                                 $closematch = preg_match(
1079                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1080                                         "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1081                                 if ( $openmatch or $closematch ) {
1082                                         $paragraphStack = false;
1083                                         $output .= $this->closeParagraph();
1084                                         if($preOpenMatch and !$preCloseMatch) {
1085                                                 $this->mInPre = true;
1086                                         }
1087                                         if ( $closematch  ) {
1088                                                 $inBlockElem = false;
1089                                         } else {
1090                                                 $inBlockElem = true;
1091                                         }
1092                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1093                                         if ( " " == $t{0} and trim($t) != '' ) {
1094                                                 // pre
1095                                                 if ($this->mLastSection != 'pre') {
1096                                                         $paragraphStack = false;
1097                                                         $output .= $this->closeParagraph().'<pre>';
1098                                                         $this->mLastSection = 'pre';
1099                                                 }
1100                                         } else {
1101                                                 // paragraph
1102                                                 if ( '' == trim($t) ) {
1103                                                         if ( $paragraphStack ) {
1104                                                                 $output .= $paragraphStack.'<br/>';
1105                                                                 $paragraphStack = false;
1106                                                                 $this->mLastSection = 'p';
1107                                                         } else {
1108                                                                 if ($this->mLastSection != 'p' ) {
1109                                                                         $output .= $this->closeParagraph();
1110                                                                         $this->mLastSection = '';
1111                                                                         $paragraphStack = "<p>";
1112                                                                 } else {
1113                                                                         $paragraphStack = '</p><p>';
1114                                                                 }
1115                                                         }
1116                                                 } else {
1117                                                         if ( $paragraphStack ) {
1118                                                                 $output .= $paragraphStack;
1119                                                                 $paragraphStack = false;
1120                                                                 $this->mLastSection = 'p';
1121                                                         } else if ($this->mLastSection != 'p') {
1122                                                                 $output .= $this->closeParagraph().'<p>';
1123                                                                 $this->mLastSection = 'p';
1124                                                         }
1125                                                 }
1126                                         }
1127                                 }
1128                         }
1129                         if ($paragraphStack === false) {
1130                                 $output .= $t."\n";
1131                         }
1132                 }
1133                 while ( $prefixLength ) {
1134                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1135                         --$prefixLength;
1136                 }
1137                 if ( "" != $this->mLastSection ) {
1138                         $output .= "</" . $this->mLastSection . ">";
1139                         $this->mLastSection = "";
1140                 }
1141
1142                 wfProfileOut( $fname );
1143                 return $output;
1144         }
1145
1146         function getVariableValue( $index ) {
1147                 global $wgLang, $wgSitename, $wgServer;
1148
1149                 switch ( $index ) {
1150                         case MAG_CURRENTMONTH:
1151                                 return date( "m" );
1152                         case MAG_CURRENTMONTHNAME:
1153                                 return $wgLang->getMonthName( date("n") );
1154                         case MAG_CURRENTMONTHNAMEGEN:
1155                                 return $wgLang->getMonthNameGen( date("n") );
1156                         case MAG_CURRENTDAY:
1157                                 return date("j");
1158                         case MAG_PAGENAME:
1159                                 return $this->mTitle->getText();
1160                         case MAG_NAMESPACE:
1161                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1162                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1163                         case MAG_CURRENTDAYNAME:
1164                                 return $wgLang->getWeekdayName( date("w")+1 );
1165                         case MAG_CURRENTYEAR:
1166                                 return date( "Y" );
1167                         case MAG_CURRENTTIME:
1168                                 return $wgLang->time( wfTimestampNow(), false );
1169                         case MAG_NUMBEROFARTICLES:
1170                                 return wfNumberOfArticles();
1171                         case MAG_SITENAME:
1172                                 return $wgSitename;
1173                         case MAG_SERVER:
1174                                 return $wgServer;
1175                         default:
1176                                 return NULL;
1177                 }
1178         }
1179
1180         function initialiseVariables()
1181         {
1182                 global $wgVariableIDs;
1183                 $this->mVariables = array();
1184                 foreach ( $wgVariableIDs as $id ) {
1185                         $mw =& MagicWord::get( $id );
1186                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1187                 }
1188         }
1189
1190         /* private */ function replaceVariables( $text, $args = array() )
1191         {
1192                 global $wgLang, $wgScript, $wgArticlePath;
1193
1194                 $fname = "Parser::replaceVariables";
1195                 wfProfileIn( $fname );
1196
1197                 $bail = false;
1198                 if ( !$this->mVariables ) {
1199                         $this->initialiseVariables();
1200                 }
1201                 $titleChars = Title::legalChars();
1202
1203                 # This function is called recursively. To keep track of arguments we need a stack:
1204                 array_push( $this->mArgStack, $args );
1205
1206                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1207                 $GLOBALS['wgCurParser'] =& $this;
1208
1209                 # Argument substitution
1210                 if ( $this->mOutputType == OT_HTML ) {
1211                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1212                 }
1213
1214                 # Double brace substitution
1215                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1216                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1217
1218                 array_pop( $this->mArgStack );
1219
1220                 wfProfileOut( $fname );
1221                 return $text;
1222         }
1223
1224         function braceSubstitution( $matches )
1225         {
1226                 global $wgLinkCache, $wgLang;
1227                 $fname = "Parser::braceSubstitution";
1228                 $found = false;
1229                 $nowiki = false;
1230                 $noparse = false;
1231
1232                 $title = NULL;
1233
1234                 # $newline is an optional newline character before the braces
1235                 # $part1 is the bit before the first |, and must contain only title characters
1236                 # $args is a list of arguments, starting from index 0, not including $part1
1237
1238                 $newline = $matches[1];
1239                 $part1 = $matches[2];
1240                 # If the third subpattern matched anything, it will start with |
1241                 if ( $matches[3] !== "" ) {
1242                         $args = explode( "|", substr( $matches[3], 1 ) );
1243                 } else {
1244                         $args = array();
1245                 }
1246                 $argc = count( $args );
1247
1248                 # {{{}}}
1249                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1250                         $text = $matches[0];
1251                         $found = true;
1252                         $noparse = true;
1253                 }
1254
1255                 # SUBST
1256                 if ( !$found ) {
1257                         $mwSubst =& MagicWord::get( MAG_SUBST );
1258                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1259                                 if ( $this->mOutputType != OT_WIKI ) {
1260                                         # Invalid SUBST not replaced at PST time
1261                                         # Return without further processing
1262                                         $text = $matches[0];
1263                                         $found = true;
1264                                         $noparse= true;
1265                                 }
1266                         } elseif ( $this->mOutputType == OT_WIKI ) {
1267                                 # SUBST not found in PST pass, do nothing
1268                                 $text = $matches[0];
1269                                 $found = true;
1270                         }
1271                 }
1272
1273                 # MSG, MSGNW and INT
1274                 if ( !$found ) {
1275                         # Check for MSGNW:
1276                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1277                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1278                                 $nowiki = true;
1279                         } else {
1280                                 # Remove obsolete MSG:
1281                                 $mwMsg =& MagicWord::get( MAG_MSG );
1282                                 $mwMsg->matchStartAndRemove( $part1 );
1283                         }
1284
1285                         # Check if it is an internal message
1286                         $mwInt =& MagicWord::get( MAG_INT );
1287                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1288                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1289                                         $text = wfMsgReal( $part1, $args, true );
1290                                         $found = true;
1291                                 }
1292                         }
1293                 }
1294
1295                 # NS
1296                 if ( !$found ) {
1297                         # Check for NS: (namespace expansion)
1298                         $mwNs = MagicWord::get( MAG_NS );
1299                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1300                                 if ( intval( $part1 ) ) {
1301                                         $text = $wgLang->getNsText( intval( $part1 ) );
1302                                         $found = true;
1303                                 } else {
1304                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1305                                         if ( !is_null( $index ) ) {
1306                                                 $text = $wgLang->getNsText( $index );
1307                                                 $found = true;
1308                                         }
1309                                 }
1310                         }
1311                 }
1312
1313                 # LOCALURL and LOCALURLE
1314                 if ( !$found ) {
1315                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1316                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1317
1318                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1319                                 $func = 'getLocalURL';
1320                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1321                                 $func = 'escapeLocalURL';
1322                         } else {
1323                                 $func = '';
1324                         }
1325
1326                         if ( $func !== '' ) {
1327                                 $title = Title::newFromText( $part1 );
1328                                 if ( !is_null( $title ) ) {
1329                                         if ( $argc > 0 ) {
1330                                                 $text = $title->$func( $args[0] );
1331                                         } else {
1332                                                 $text = $title->$func();
1333                                         }
1334                                         $found = true;
1335                                 }
1336                         }
1337                 }
1338
1339                 # Internal variables
1340                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1341                         $text = $this->mVariables[$part1];
1342                         $found = true;
1343                         $this->mOutput->mContainsOldMagic = true;
1344                 }
1345 /*
1346                 # Arguments input from the caller
1347                 $inputArgs = end( $this->mArgStack );
1348                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1349                         $text = $inputArgs[$part1];
1350                         $found = true;
1351                 }
1352 */
1353                 # Load from database
1354                 if ( !$found ) {
1355                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1356                         if ( !is_null( $title ) && !$title->isExternal() ) {
1357                                 # Check for excessive inclusion
1358                                 $dbk = $title->getPrefixedDBkey();
1359                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1360                                         $article = new Article( $title );
1361                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1362                                         if ( $articleContent !== false ) {
1363                                                 $found = true;
1364                                                 $text = $articleContent;
1365
1366                                         }
1367                                 }
1368
1369                                 # If the title is valid but undisplayable, make a link to it
1370                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1371                                         $text = "[[" . $title->getPrefixedText() . "]]";
1372                                         $found = true;
1373                                 }
1374                         }
1375                 }
1376
1377                 # Recursive parsing, escaping and link table handling
1378                 # Only for HTML output
1379                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1380                         $text = wfEscapeWikiText( $text );
1381                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1382                         # Clean up argument array
1383                         $assocArgs = array();
1384                         $index = 1;
1385                         foreach( $args as $arg ) {
1386                                 $eqpos = strpos( $arg, "=" );
1387                                 if ( $eqpos === false ) {
1388                                         $assocArgs[$index++] = $arg;
1389                                 } else {
1390                                         $name = trim( substr( $arg, 0, $eqpos ) );
1391                                         $value = trim( substr( $arg, $eqpos+1 ) );
1392                                         if ( $value === false ) {
1393                                                 $value = "";
1394                                         }
1395                                         if ( $name !== false ) {
1396                                                 $assocArgs[$name] = $value;
1397                                         }
1398                                 }
1399                         }
1400
1401                         # Do not enter included links in link table
1402                         if ( !is_null( $title ) ) {
1403                                 $wgLinkCache->suspend();
1404                         }
1405
1406                         # Run full parser on the included text
1407                         $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1408
1409                         # Resume the link cache and register the inclusion as a link
1410                         if ( !is_null( $title ) ) {
1411                                 $wgLinkCache->resume();
1412                                 $wgLinkCache->addLinkObj( $title );
1413                         }
1414                 }
1415
1416                 if ( !$found ) {
1417                         return $matches[0];
1418                 } else {
1419                         return $text;
1420                 }
1421         }
1422
1423         # Triple brace replacement -- used for template arguments
1424         function argSubstitution( $matches )
1425         {
1426                 $newline = $matches[1];
1427                 $arg = trim( $matches[2] );
1428                 $text = $matches[0];
1429                 $inputArgs = end( $this->mArgStack );
1430
1431                 if ( array_key_exists( $arg, $inputArgs ) ) {
1432                         $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1433                 }
1434
1435                 return $text;
1436         }
1437
1438         # Returns true if the function is allowed to include this entity
1439         function incrementIncludeCount( $dbk )
1440         {
1441                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1442                         $this->mIncludeCount[$dbk] = 0;
1443                 }
1444                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1445                         return true;
1446                 } else {
1447                         return false;
1448                 }
1449         }
1450
1451
1452         # Cleans up HTML, removes dangerous tags and attributes
1453         /* private */ function removeHTMLtags( $text )
1454         {
1455                 global $wgUseTidy, $wgUserHtml;
1456                 $fname = "Parser::removeHTMLtags";
1457                 wfProfileIn( $fname );
1458
1459                 if( $wgUserHtml ) {
1460                         $htmlpairs = array( # Tags that must be closed
1461                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1462                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1463                                 "strike", "strong", "tt", "var", "div", "center",
1464                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1465                                 "ruby", "rt" , "rb" , "rp", "p"
1466                         );
1467                         $htmlsingle = array(
1468                                 "br", "hr", "li", "dt", "dd"
1469                         );
1470                         $htmlnest = array( # Tags that can be nested--??
1471                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1472                                 "dl", "font", "big", "small", "sub", "sup"
1473                         );
1474                         $tabletags = array( # Can only appear inside table
1475                                 "td", "th", "tr"
1476                         );
1477                 } else {
1478                         $htmlpairs = array();
1479                         $htmlsingle = array();
1480                         $htmlnest = array();
1481                         $tabletags = array();
1482                 }
1483
1484                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1485                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1486
1487                 $htmlattrs = $this->getHTMLattrs () ;
1488
1489                 # Remove HTML comments
1490                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1491
1492                 $bits = explode( "<", $text );
1493                 $text = array_shift( $bits );
1494                 if(!$wgUseTidy) {
1495                         $tagstack = array(); $tablestack = array();
1496                         foreach ( $bits as $x ) {
1497                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1498                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1499                                 $x, $regs );
1500                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1501                                 error_reporting( $prev );
1502
1503                                 $badtag = 0 ;
1504                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1505                                         # Check our stack
1506                                         if ( $slash ) {
1507                                                 # Closing a tag...
1508                                                 if ( ! in_array( $t, $htmlsingle ) &&
1509                                                 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1510                                                         if(!empty($ot)) array_push( $tagstack, $ot );
1511                                                         $badtag = 1;
1512                                                 } else {
1513                                                         if ( $t == "table" ) {
1514                                                                 $tagstack = array_pop( $tablestack );
1515                                                         }
1516                                                         $newparams = "";
1517                                                 }
1518                                         } else {
1519                                                 # Keep track for later
1520                                                 if ( in_array( $t, $tabletags ) &&
1521                                                 ! in_array( "table", $tagstack ) ) {
1522                                                         $badtag = 1;
1523                                                 } else if ( in_array( $t, $tagstack ) &&
1524                                                 ! in_array ( $t , $htmlnest ) ) {
1525                                                         $badtag = 1 ;
1526                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1527                                                         if ( $t == "table" ) {
1528                                                                 array_push( $tablestack, $tagstack );
1529                                                                 $tagstack = array();
1530                                                         }
1531                                                         array_push( $tagstack, $t );
1532                                                 }
1533                                                 # Strip non-approved attributes from the tag
1534                                                 $newparams = $this->fixTagAttributes($params);
1535
1536                                         }
1537                                         if ( ! $badtag ) {
1538                                                 $rest = str_replace( ">", "&gt;", $rest );
1539                                                 $text .= "<$slash$t $newparams$brace$rest";
1540                                                 continue;
1541                                         }
1542                                 }
1543                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1544                         }
1545                         # Close off any remaining tags
1546                         while ( $t = array_pop( $tagstack ) ) {
1547                                 $text .= "</$t>\n";
1548                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1549                         }
1550                 } else {
1551                         # this might be possible using tidy itself
1552                         foreach ( $bits as $x ) {
1553                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1554                                 $x, $regs );
1555                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1556                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1557                                         $newparams = $this->fixTagAttributes($params);
1558                                         $rest = str_replace( ">", "&gt;", $rest );
1559                                         $text .= "<$slash$t $newparams$brace$rest";
1560                                 } else {
1561                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1562                                 }
1563                         }
1564                 }
1565                 wfProfileOut( $fname );
1566                 return $text;
1567         }
1568
1569
1570 /*
1571  *
1572  * This function accomplishes several tasks:
1573  * 1) Auto-number headings if that option is enabled
1574  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1575  * 3) Add a Table of contents on the top for users who have enabled the option
1576  * 4) Auto-anchor headings
1577  *
1578  * It loops through all headlines, collects the necessary data, then splits up the
1579  * string and re-inserts the newly formatted headlines.
1580  *
1581  */
1582
1583         /* private */ function formatHeadings( $text, $isMain=true )
1584         {
1585                 global $wgInputEncoding;
1586
1587                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1588                 $doShowToc = $this->mOptions->getShowToc();
1589                 if( !$this->mTitle->userCanEdit() ) {
1590                         $showEditLink = 0;
1591                         $rightClickHack = 0;
1592                 } else {
1593                         $showEditLink = $this->mOptions->getEditSection();
1594                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1595                 }
1596
1597                 # Inhibit editsection links if requested in the page
1598                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1599                 if( $esw->matchAndRemove( $text ) ) {
1600                         $showEditLink = 0;
1601                 }
1602                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1603                 # do not add TOC
1604                 $mw =& MagicWord::get( MAG_NOTOC );
1605                 if( $mw->matchAndRemove( $text ) ) {
1606                         $doShowToc = 0;
1607                 }
1608
1609                 # never add the TOC to the Main Page. This is an entry page that should not
1610                 # be more than 1-2 screens large anyway
1611                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1612                         $doShowToc = 0;
1613                 }
1614
1615                 # Get all headlines for numbering them and adding funky stuff like [edit]
1616                 # links - this is for later, but we need the number of headlines right now
1617                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1618
1619                 # if there are fewer than 4 headlines in the article, do not show TOC
1620                 if( $numMatches < 4 ) {
1621                         $doShowToc = 0;
1622                 }
1623
1624                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1625                 # override above conditions and always show TOC
1626                 $mw =& MagicWord::get( MAG_FORCETOC );
1627                 if ($mw->matchAndRemove( $text ) ) {
1628                         $doShowToc = 1;
1629                 }
1630
1631
1632                 # We need this to perform operations on the HTML
1633                 $sk =& $this->mOptions->getSkin();
1634
1635                 # headline counter
1636                 $headlineCount = 0;
1637
1638                 # Ugh .. the TOC should have neat indentation levels which can be
1639                 # passed to the skin functions. These are determined here
1640                 $toclevel = 0;
1641                 $toc = "";
1642                 $full = "";
1643                 $head = array();
1644                 $sublevelCount = array();
1645                 $level = 0;
1646                 $prevlevel = 0;
1647                 foreach( $matches[3] as $headline ) {
1648                         $numbering = "";
1649                         if( $level ) {
1650                                 $prevlevel = $level;
1651                         }
1652                         $level = $matches[1][$headlineCount];
1653                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1654                                 # reset when we enter a new level
1655                                 $sublevelCount[$level] = 0;
1656                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1657                                 $toclevel += $level - $prevlevel;
1658                         }
1659                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1660                                 # reset when we step back a level
1661                                 $sublevelCount[$level+1]=0;
1662                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1663                                 $toclevel -= $prevlevel - $level;
1664                         }
1665                         # count number of headlines for each level
1666                         @$sublevelCount[$level]++;
1667                         if( $doNumberHeadings || $doShowToc ) {
1668                                 $dot = 0;
1669                                 for( $i = 1; $i <= $level; $i++ ) {
1670                                         if( !empty( $sublevelCount[$i] ) ) {
1671                                                 if( $dot ) {
1672                                                         $numbering .= ".";
1673                                                 }
1674                                                 $numbering .= $sublevelCount[$i];
1675                                                 $dot = 1;
1676                                         }
1677                                 }
1678                         }
1679
1680                         # The canonized header is a version of the header text safe to use for links
1681                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1682                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1683
1684                         # strip out HTML
1685                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1686                         $tocline = trim( $canonized_headline );
1687                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1688                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1689                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1690                         $refer[$headlineCount] = $canonized_headline;
1691
1692                         # count how many in assoc. array so we can track dupes in anchors
1693                         @$refers[$canonized_headline]++;
1694                         $refcount[$headlineCount]=$refers[$canonized_headline];
1695
1696                         # Prepend the number to the heading text
1697
1698                         if( $doNumberHeadings || $doShowToc ) {
1699                                 $tocline = $numbering . " " . $tocline;
1700
1701                                 # Don't number the heading if it is the only one (looks silly)
1702                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1703                                         # the two are different if the line contains a link
1704                                         $headline=$numbering . " " . $headline;
1705                                 }
1706                         }
1707
1708                         # Create the anchor for linking from the TOC to the section
1709                         $anchor = $canonized_headline;
1710                         if($refcount[$headlineCount] > 1 ) {
1711                                 $anchor .= "_" . $refcount[$headlineCount];
1712                         }
1713                         if( $doShowToc ) {
1714                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1715                         }
1716                         if( $showEditLink ) {
1717                                 if ( empty( $head[$headlineCount] ) ) {
1718                                         $head[$headlineCount] = "";
1719                                 }
1720                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1721                         }
1722
1723                         # Add the edit section span
1724                         if( $rightClickHack ) {
1725                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1726                         }
1727
1728                         # give headline the correct <h#> tag
1729                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1730
1731                         $headlineCount++;
1732                 }
1733
1734                 if( $doShowToc ) {
1735                         $toclines = $headlineCount;
1736                         $toc .= $sk->tocUnindent( $toclevel );
1737                         $toc = $sk->tocTable( $toc );
1738                 }
1739
1740                 # split up and insert constructed headlines
1741
1742                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1743                 $i = 0;
1744
1745                 foreach( $blocks as $block ) {
1746                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1747                             # This is the [edit] link that appears for the top block of text when
1748                                 # section editing is enabled
1749
1750                                 # Disabled because it broke block formatting
1751                                 # For example, a bullet point in the top line
1752                                 # $full .= $sk->editSectionLink(0);
1753                         }
1754                         $full .= $block;
1755                         if( $doShowToc && !$i && $isMain) {
1756                         # Top anchor now in skin
1757                                 $full = $full.$toc;
1758                         }
1759
1760                         if( !empty( $head[$i] ) ) {
1761                                 $full .= $head[$i];
1762                         }
1763                         $i++;
1764                 }
1765
1766                 return $full;
1767         }
1768
1769         /* private */ function magicISBN( $text )
1770         {
1771                 global $wgLang;
1772
1773                 $a = split( "ISBN ", " $text" );
1774                 if ( count ( $a ) < 2 ) return $text;
1775                 $text = substr( array_shift( $a ), 1);
1776                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1777
1778                 foreach ( $a as $x ) {
1779                         $isbn = $blank = "" ;
1780                         while ( " " == $x{0} ) {
1781                                 $blank .= " ";
1782                                 $x = substr( $x, 1 );
1783                         }
1784                         while ( strstr( $valid, $x{0} ) != false ) {
1785                                 $isbn .= $x{0};
1786                                 $x = substr( $x, 1 );
1787                         }
1788                         $num = str_replace( "-", "", $isbn );
1789                         $num = str_replace( " ", "", $num );
1790
1791                         if ( "" == $num ) {
1792                                 $text .= "ISBN $blank$x";
1793                         } else {
1794                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1795                                 $text .= "<a href=\"" .
1796                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1797                                         "\" class=\"internal\">ISBN $isbn</a>";
1798                                 $text .= $x;
1799                         }
1800                 }
1801                 return $text;
1802         }
1803         /* private */ function magicRFC( $text )
1804         {
1805                 global $wgLang;
1806
1807                 $a = split( "ISBN ", " $text" );
1808                 if ( count ( $a ) < 2 ) return $text;
1809                 $text = substr( array_shift( $a ), 1);
1810                 $valid = "0123456789";
1811
1812                 foreach ( $a as $x ) {
1813                         $rfc = $blank = "" ;
1814                         while ( " " == $x{0} ) {
1815                                 $blank .= " ";
1816                                 $x = substr( $x, 1 );
1817                         }
1818                         while ( strstr( $valid, $x{0} ) != false ) {
1819                                 $rfc .= $x{0};
1820                                 $x = substr( $x, 1 );
1821                         }
1822
1823                         if ( "" == $rfc ) {
1824                                 $text .= "RFC $blank$x";
1825                         } else {
1826                                 $url = wfmsg( "rfcurl" );
1827                                 $url = str_replace( "$1", $rfc, $url);
1828                                 $sk =& $this->mOptions->getSkin();
1829                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1830                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1831                         }
1832                 }
1833                 return $text;
1834         }
1835
1836         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1837         {
1838                 $this->mOptions = $options;
1839                 $this->mTitle =& $title;
1840                 $this->mOutputType = OT_WIKI;
1841
1842                 if ( $clearState ) {
1843                         $this->clearState();
1844                 }
1845
1846                 $stripState = false;
1847                 $pairs = array(
1848                         "\r\n" => "\n",
1849                         );
1850                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1851                 // now with regexes
1852                 $pairs = array(
1853                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1854                         "/<br *?>/i" => "<br/>",
1855                 );
1856                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1857                 $text = $this->strip( $text, $stripState, false );
1858                 $text = $this->pstPass2( $text, $user );
1859                 $text = $this->unstrip( $text, $stripState );
1860                 return $text;
1861         }
1862
1863         /* private */ function pstPass2( $text, &$user )
1864         {
1865                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1866
1867                 # Variable replacement
1868                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1869                 $text = $this->replaceVariables( $text );
1870
1871                 # Signatures
1872                 #
1873                 $n = $user->getName();
1874                 $k = $user->getOption( "nickname" );
1875                 if ( "" == $k ) { $k = $n; }
1876                 if(isset($wgLocaltimezone)) {
1877                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1878                 }
1879                 /* Note: this is an ugly timezone hack for the European wikis */
1880                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1881                   " (" . date( "T" ) . ")";
1882                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1883
1884                 $text = preg_replace( "/~~~~~/", $d, $text );
1885                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1886                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1887                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1888                   Namespace::getUser() ) . ":$n|$k]]", $text );
1889
1890                 # Context links: [[|name]] and [[name (context)|]]
1891                 #
1892                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1893                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1894                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1895                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1896
1897                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1898                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1899                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1900                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1901                                                                                                                 # [[ns:page (cont)|]]
1902                 $context = "";
1903                 $t = $this->mTitle->getText();
1904                 if ( preg_match( $conpat, $t, $m ) ) {
1905                         $context = $m[2];
1906                 }
1907                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1908                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1909                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1910
1911                 if ( "" == $context ) {
1912                         $text = preg_replace( $p2, "[[\\1]]", $text );
1913                 } else {
1914                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1915                 }
1916
1917                 /*
1918                 $mw =& MagicWord::get( MAG_SUBST );
1919                 $wgCurParser = $this->fork();
1920                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1921                 $this->merge( $wgCurParser );
1922                 */
1923
1924                 # Trim trailing whitespace
1925                 # MAG_END (__END__) tag allows for trailing
1926                 # whitespace to be deliberately included
1927                 $text = rtrim( $text );
1928                 $mw =& MagicWord::get( MAG_END );
1929                 $mw->matchAndRemove( $text );
1930
1931                 return $text;
1932         }
1933
1934         # Set up some variables which are usually set up in parse()
1935         # so that an external function can call some class members with confidence
1936         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1937         {
1938                 $this->mTitle =& $title;
1939                 $this->mOptions = $options;
1940                 $this->mOutputType = $outputType;
1941                 if ( $clearState ) {
1942                         $this->clearState();
1943                 }
1944         }
1945
1946         function transformMsg( $text, $options ) {
1947                 global $wgTitle;
1948                 static $executing = false;
1949
1950                 # Guard against infinite recursion
1951                 if ( $executing ) {
1952                         return $text;
1953                 }
1954                 $executing = true;
1955
1956                 $this->mTitle = $wgTitle;
1957                 $this->mOptions = $options;
1958                 $this->mOutputType = OT_MSG;
1959                 $this->clearState();
1960                 $text = $this->replaceVariables( $text );
1961
1962                 $executing = false;
1963                 return $text;
1964         }
1965 }
1966
1967 class ParserOutput
1968 {
1969         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1970
1971         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1972                 $containsOldMagic = false )
1973         {
1974                 $this->mText = $text;
1975                 $this->mLanguageLinks = $languageLinks;
1976                 $this->mCategoryLinks = $categoryLinks;
1977                 $this->mContainsOldMagic = $containsOldMagic;
1978         }
1979
1980         function getText() { return $this->mText; }
1981         function getLanguageLinks() { return $this->mLanguageLinks; }
1982         function getCategoryLinks() { return $this->mCategoryLinks; }
1983         function containsOldMagic() { return $this->mContainsOldMagic; }
1984         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1985         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1986         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1987         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1988
1989         function merge( $other ) {
1990                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1991                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1992                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1993         }
1994
1995 }
1996
1997 class ParserOptions
1998 {
1999         # All variables are private
2000         var $mUseTeX;                    # Use texvc to expand <math> tags
2001         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2002         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2003         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2004         var $mAllowExternalImages;       # Allow external images inline
2005         var $mSkin;                      # Reference to the preferred skin
2006         var $mDateFormat;                # Date format index
2007         var $mEditSection;               # Create "edit section" links
2008         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2009         var $mNumberHeadings;            # Automatically number headings
2010         var $mShowToc;                   # Show table of contents
2011
2012         function getUseTeX() { return $this->mUseTeX; }
2013         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2014         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2015         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2016         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2017         function getSkin() { return $this->mSkin; }
2018         function getDateFormat() { return $this->mDateFormat; }
2019         function getEditSection() { return $this->mEditSection; }
2020         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2021         function getNumberHeadings() { return $this->mNumberHeadings; }
2022         function getShowToc() { return $this->mShowToc; }
2023
2024         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2025         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2026         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2027         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2028         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2029         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2030         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2031         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2032         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2033         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2034         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2035
2036         /* static */ function newFromUser( &$user )
2037         {
2038                 $popts = new ParserOptions;
2039                 $popts->initialiseFromUser( $user );
2040                 return $popts;
2041         }
2042
2043         function initialiseFromUser( &$userInput )
2044         {
2045                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2046
2047                 if ( !$userInput ) {
2048                         $user = new User;
2049                         $user->setLoaded( true );
2050                 } else {
2051                         $user =& $userInput;
2052                 }
2053
2054                 $this->mUseTeX = $wgUseTeX;
2055                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2056                 $this->mUseDynamicDates = $wgUseDynamicDates;
2057                 $this->mInterwikiMagic = $wgInterwikiMagic;
2058                 $this->mAllowExternalImages = $wgAllowExternalImages;
2059                 $this->mSkin =& $user->getSkin();
2060                 $this->mDateFormat = $user->getOption( "date" );
2061                 $this->mEditSection = $user->getOption( "editsection" );
2062                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2063                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2064                 $this->mShowToc = $user->getOption( "showtoc" );
2065         }
2066
2067
2068 }
2069
2070 # Regex callbacks, used in Parser::replaceVariables
2071 function wfBraceSubstitution( $matches )
2072 {
2073         global $wgCurParser;
2074         return $wgCurParser->braceSubstitution( $matches );
2075 }
2076
2077 function wfArgSubstitution( $matches )
2078 {
2079         global $wgCurParser;
2080         return $wgCurParser->argSubstitution( $matches );
2081 }
2082
2083 ?>