includes/Parser.php

   1 <?php
   2
   3 /**
   4  * File for Parser and related classes
   5  *
   6  * @package MediaWiki
   7  * @version $Id$
   8  */
   9
  10 /**
  11  * Variable substitution O(N^2) attack
  12  *
  13  * Without countermeasures, it would be possible to attack the parser by saving
  14  * a page filled with a large number of inclusions of large pages. The size of
  15  * the generated page would be proportional to the square of the input size.
  16  * Hence, we limit the number of inclusions of any given page, thus bringing any
  17  * attack back to O(N).
  18  */
  19 define( 'MAX_INCLUDE_REPEAT', 100 );
  20 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
  21
  22 # Allowed values for $mOutputType
  23 define( 'OT_HTML', 1 );
  24 define( 'OT_WIKI', 2 );
  25 define( 'OT_MSG' , 3 );
  26
  27 # string parameter for extractTags which will cause it
  28 # to strip HTML comments in addition to regular
  29 # <XML>-style tags. This should not be anything we
  30 # may want to use in wikisyntax
  31 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  32
  33 # prefix for escaping, used in two functions at least
  34 define( 'UNIQ_PREFIX', 'NaodW29');
  35
  36 # Constants needed for external link processing
  37 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  38 define( 'HTTP_PROTOCOLS', 'http|https' );
  39 # Everything except bracket, space, or control characters
  40 define( 'EXT_LINK_URL_CLASS', '[^]<>\\x00-\\x20\\x7F]' );
  41 # Including space
  42 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  43 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  44 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  45 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  46 define( 'EXT_IMAGE_REGEX',
  47         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  48         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  49         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  50 );
  51
  52 /**
  53  * PHP Parser
  54  *
  55  * Processes wiki markup
  56  *
  57  * <pre>
  58  * There are three main entry points into the Parser class:
  59  * parse()
  60  *   produces HTML output
  61  * preSaveTransform().
  62  *   produces altered wiki markup.
  63  * transformMsg()
  64  *   performs brace substitution on MediaWiki messages
  65  *
  66  * Globals used:
  67  *    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  68  *
  69  * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  70  *
  71  * settings:
  72  *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  73  *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  74  *  $wgLocaltimezone
  75  *
  76  *  * only within ParserOptions
  77  * </pre>
  78  *
  79  * @package MediaWiki
  80  */
  81 class Parser
  82 {
  83         /**#@+
  84          * @access private
  85          */
  86         # Persistent:
  87         var $mTagHooks;
  88
  89         # Cleared with clearState():
  90         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  91         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  92
  93         # Temporary:
  94         var $mOptions, $mTitle, $mOutputType,
  95             $mTemplates,        // cache of already loaded templates, avoids
  96                                 // multiple SQL queries for the same string
  97             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  98                                 // in this path. Used for loop detection.
  99
 100         /**#@-*/
 101
 102         /**
 103          * Constructor
 104          *
 105          * @access public
 106          */
 107         function Parser() {
 108                 $this->mTemplates = array();
 109                 $this->mTemplatePath = array();
 110                 $this->mTagHooks = array();
 111                 $this->clearState();
 112         }
 113
 114         /**
 115          * Clear Parser state
 116          *
 117          * @access private
 118          */
 119         function clearState() {
 120                 $this->mOutput = new ParserOutput;
 121                 $this->mAutonumber = 0;
 122                 $this->mLastSection = "";
 123                 $this->mDTopen = false;
 124                 $this->mVariables = false;
 125                 $this->mIncludeCount = array();
 126                 $this->mStripState = array();
 127                 $this->mArgStack = array();
 128                 $this->mInPre = false;
 129         }
 130
 131         /**
 132          * First pass--just handle <nowiki> sections, pass the rest off
 133          * to internalParse() which does all the real work.
 134          *
 135          * @access private
 136          * @return ParserOutput a ParserOutput
 137          */
 138         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 139                 global $wgUseTidy;
 140                 $fname = 'Parser::parse';
 141                 wfProfileIn( $fname );
 142
 143                 if ( $clearState ) {
 144                         $this->clearState();
 145                 }
 146
 147                 $this->mOptions = $options;
 148                 $this->mTitle =& $title;
 149                 $this->mOutputType = OT_HTML;
 150
 151                 $stripState = NULL;
 152                 $text = $this->strip( $text, $this->mStripState );
 153                 $text = $this->internalParse( $text, $linestart );
 154                 $text = $this->unstrip( $text, $this->mStripState );
 155                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 156                 if(!$wgUseTidy) {
 157                         $fixtags = array(
 158                                 # french spaces, last one Guillemet-left
 159                                 # only if there is something before the space
 160                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 161                                 # french spaces, Guillemet-right
 162                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 163                                 '/<hr *>/i' => '<hr />',
 164                                 '/<br *>/i' => '<br />',
 165                                 '/<center *>/i' => '<div class="center">',
 166                                 '/<\\/center *>/i' => '</div>',
 167                                 # Clean up spare ampersands; note that we probably ought to be
 168                                 # more careful about named entities.
 169                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 170                         );
 171                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 172                 } else {
 173                         $fixtags = array(
 174                                 # french spaces, last one Guillemet-left
 175                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 176                                 # french spaces, Guillemet-right
 177                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 178                                 '/<center *>/i' => '<div class="center">',
 179                                 '/<\\/center *>/i' => '</div>'
 180                         );
 181                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 182                 }
 183                 # only once and last
 184                 $text = $this->doBlockLevels( $text, $linestart );
 185                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 186                 $this->mOutput->setText( $text );
 187                 wfProfileOut( $fname );
 188                 return $this->mOutput;
 189         }
 190
 191         /**
 192          * Get a random string
 193          *
 194          * @access private
 195          * @static
 196          */
 197         function getRandomString() {
 198                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 199         }
 200
 201         /**
 202          * Replaces all occurrences of <$tag>content</$tag> in the text
 203          * with a random marker and returns the new text. the output parameter
 204          * $content will be an associative array filled with data on the form
 205          * $unique_marker => content.
 206          *
 207          * If $content is already set, the additional entries will be appended
 208          * If $tag is set to STRIP_COMMENTS, the function will extract
 209          * <!-- HTML comments -->
 210          *
 211          * @access private
 212          * @static
 213          */
 214         function extractTags($tag, $text, &$content, $uniq_prefix = ''){
 215                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 216                 if ( !$content ) {
 217                         $content = array( );
 218                 }
 219                 $n = 1;
 220                 $stripped = '';
 221
 222                 while ( '' != $text ) {
 223                         if($tag==STRIP_COMMENTS) {
 224                                 $p = preg_split( '/<!--/i', $text, 2 );
 225                         } else {
 226                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 227                         }
 228                         $stripped .= $p[0];
 229                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 230                                 $text = '';
 231                         } else {
 232                                 if($tag==STRIP_COMMENTS) {
 233                                         $q = preg_split( '/-->/i', $p[1], 2 );
 234                                 } else {
 235                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 236                                 }
 237                                 $marker = $rnd . sprintf('%08X', $n++);
 238                                 $content[$marker] = $q[0];
 239                                 $stripped .= $marker;
 240                                 $text = $q[1];
 241                         }
 242                 }
 243                 return $stripped;
 244         }
 245
 246         /**
 247          * Strips and renders nowiki, pre, math, hiero
 248          * If $render is set, performs necessary rendering operations on plugins
 249          * Returns the text, and fills an array with data needed in unstrip()
 250          * If the $state is already a valid strip state, it adds to the state
 251          *
 252          * @param bool $stripcomments when set, HTML comments <!-- like this -->
 253          *  will be stripped in addition to other tags. This is important
 254          *  for section editing, where these comments cause confusion when
 255          *  counting the sections in the wikisource
 256          *
 257          * @access private
 258          */
 259         function strip( $text, &$state, $stripcomments = false ) {
 260                 $render = ($this->mOutputType == OT_HTML);
 261                 $html_content = array();
 262                 $nowiki_content = array();
 263                 $math_content = array();
 264                 $pre_content = array();
 265                 $comment_content = array();
 266                 $ext_content = array();
 267
 268                 # Replace any instances of the placeholders
 269                 $uniq_prefix = UNIQ_PREFIX;
 270                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 271
 272                 # html
 273                 global $wgRawHtml, $wgWhitelistEdit;
 274                 if( $wgRawHtml && $wgWhitelistEdit ) {
 275                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 276                         foreach( $html_content as $marker => $content ) {
 277                                 if ($render ) {
 278                                         # Raw and unchecked for validity.
 279                                         $html_content[$marker] = $content;
 280                                 } else {
 281                                         $html_content[$marker] = '<html>'.$content.'</html>';
 282                                 }
 283                         }
 284                 }
 285
 286                 # nowiki
 287                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 288                 foreach( $nowiki_content as $marker => $content ) {
 289                         if( $render ){
 290                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 291                         } else {
 292                                 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
 293                         }
 294                 }
 295
 296                 # math
 297                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 298                 foreach( $math_content as $marker => $content ){
 299                         if( $render ) {
 300                                 if( $this->mOptions->getUseTeX() ) {
 301                                         $math_content[$marker] = renderMath( $content );
 302                                 } else {
 303                                         $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
 304                                 }
 305                         } else {
 306                                 $math_content[$marker] = '<math>'.$content.'</math>';
 307                         }
 308                 }
 309
 310                 # pre
 311                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 312                 foreach( $pre_content as $marker => $content ){
 313                         if( $render ){
 314                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 315                         } else {
 316                                 $pre_content[$marker] = '<pre>'.$content.'</pre>';
 317                         }
 318                 }
 319
 320                 # Comments
 321                 if($stripcomments) {
 322                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 323                         foreach( $comment_content as $marker => $content ){
 324                                 $comment_content[$marker] = '<!--'.$content.'-->';
 325                         }
 326                 }
 327
 328                 # Extensions
 329                 foreach ( $this->mTagHooks as $tag => $callback ) {
 330                         $ext_contents[$tag] = array();
 331                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 332                         foreach( $ext_content[$tag] as $marker => $content ) {
 333                                 if ( $render ) {
 334                                         $ext_content[$tag][$marker] = $callback( $content );
 335                                 } else {
 336                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 337                                 }
 338                         }
 339                 }
 340
 341                 # Merge state with the pre-existing state, if there is one
 342                 if ( $state ) {
 343                         $state['html'] = $state['html'] + $html_content;
 344                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 345                         $state['math'] = $state['math'] + $math_content;
 346                         $state['pre'] = $state['pre'] + $pre_content;
 347                         $state['comment'] = $state['comment'] + $comment_content;
 348
 349                         foreach( $ext_content as $tag => $array ) {
 350                                 if ( array_key_exists( $tag, $state ) ) {
 351                                         $state[$tag] = $state[$tag] + $array;
 352                                 }
 353                         }
 354                 } else {
 355                         $state = array(
 356                           'html' => $html_content,
 357                           'nowiki' => $nowiki_content,
 358                           'math' => $math_content,
 359                           'pre' => $pre_content,
 360                           'comment' => $comment_content,
 361                         ) + $ext_content;
 362                 }
 363                 return $text;
 364         }
 365
 366         /**
 367          * restores pre, math, and heiro removed by strip()
 368          *
 369          * always call unstripNoWiki() after this one
 370          * @access private
 371          */
 372         function unstrip( $text, &$state ) {
 373                 # Must expand in reverse order, otherwise nested tags will be corrupted
 374                 $contentDict = end( $state );
 375                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 376                         if( key($state) != 'nowiki' && key($state) != 'html') {
 377                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 378                                         $text = str_replace( key( $contentDict ), $content, $text );
 379                                 }
 380                         }
 381                 }
 382
 383                 return $text;
 384         }
 385
 386         /**
 387          * always call this after unstrip() to preserve the order
 388          *
 389          * @access private
 390          */
 391         function unstripNoWiki( $text, &$state ) {
 392                 # Must expand in reverse order, otherwise nested tags will be corrupted
 393                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 394                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 395                 }
 396
 397                 global $wgRawHtml;
 398                 if ($wgRawHtml) {
 399                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 400                                 $text = str_replace( key( $state['html'] ), $content, $text );
 401                         }
 402                 }
 403
 404                 return $text;
 405         }
 406
 407         /**
 408          * Add an item to the strip state
 409          * Returns the unique tag which must be inserted into the stripped text
 410          * The tag will be replaced with the original text in unstrip()
 411          *
 412          * @access private
 413          */
 414         function insertStripItem( $text, &$state ) {
 415                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 416                 if ( !$state ) {
 417                         $state = array(
 418                           'html' => array(),
 419                           'nowiki' => array(),
 420                           'math' => array(),
 421                           'pre' => array()
 422                         );
 423                 }
 424                 $state['item'][$rnd] = $text;
 425                 return $rnd;
 426         }
 427
 428         /**
 429          * Return allowed HTML attributes
 430          *
 431          * @access private
 432          */
 433         function getHTMLattrs () {
 434                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 435                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 436                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 437                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 438                                 /* FONT */ 'type', 'start', 'value', 'compact',
 439                                 /* For various lists, mostly deprecated but safe */
 440                                 'summary', 'width', 'border', 'frame', 'rules',
 441                                 'cellspacing', 'cellpadding', 'valign', 'char',
 442                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 443                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 444                                 'id', 'class', 'name', 'style' /* For CSS */
 445                                 );
 446                 return $htmlattrs ;
 447         }
 448
 449         /**
 450          * Remove non approved attributes and javascript in css
 451          *
 452          * @access private
 453          */
 454         function fixTagAttributes ( $t ) {
 455                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 456                 $htmlattrs = $this->getHTMLattrs() ;
 457
 458                 # Strip non-approved attributes from the tag
 459                 $t = preg_replace(
 460                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 461                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 462                         $t);
 463
 464                 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
 465
 466                 # Strip javascript "expression" from stylesheets. Brute force approach:
 467                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 468
 469                 if( preg_match(
 470                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 471                         wfMungeToUtf8( $t ) ) )
 472                 {
 473                         $t='';
 474                 }
 475
 476                 return trim ( $t ) ;
 477         }
 478
 479         /**
 480          * interface with html tidy, used if $wgUseTidy = true
 481          *
 482          * @access public
 483          * @static
 484          */
 485         function tidy ( $text ) {
 486                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 487                 global $wgInputEncoding, $wgOutputEncoding;
 488                 $fname = 'Parser::tidy';
 489                 wfProfileIn( $fname );
 490
 491                 $cleansource = '';
 492                 $opts = '';
 493                 switch(strtoupper($wgOutputEncoding)) {
 494                         case 'ISO-8859-1':
 495                                 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 496                                 break;
 497                         case 'UTF-8':
 498                                 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 499                                 break;
 500                         default:
 501                                 $opts .= ' -raw';
 502                         }
 503
 504                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 505 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 506 '<head><title>test</title></head><body>'.$text.'</body></html>';
 507                 $descriptorspec = array(
 508                         0 => array('pipe', 'r'),
 509                         1 => array('pipe', 'w'),
 510                         2 => array('file', '/dev/null', 'a')
 511                 );
 512                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
 513                 if (is_resource($process)) {
 514                         fwrite($pipes[0], $wrappedtext);
 515                         fclose($pipes[0]);
 516                         while (!feof($pipes[1])) {
 517                                 $cleansource .= fgets($pipes[1], 1024);
 518                         }
 519                         fclose($pipes[1]);
 520                         $return_value = proc_close($process);
 521                 }
 522
 523                 wfProfileOut( $fname );
 524
 525                 if( $cleansource == '' && $text != '') {
 526                         wfDebug( "Tidy error detected!\n" );
 527                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 528                 } else {
 529                         return $cleansource;
 530                 }
 531         }
 532
 533         /**
 534          * parse the wiki syntax used to render tables
 535          *
 536          * @access private
 537          */
 538         function doTableStuff ( $t ) {
 539                 $fname = 'Parser::doTableStuff';
 540                 wfProfileIn( $fname );
 541
 542                 $t = explode ( "\n" , $t ) ;
 543                 $td = array () ; # Is currently a td tag open?
 544                 $ltd = array () ; # Was it TD or TH?
 545                 $tr = array () ; # Is currently a tr tag open?
 546                 $ltr = array () ; # tr attributes
 547                 $indent_level = 0; # indent level of the table
 548                 foreach ( $t AS $k => $x )
 549                 {
 550                         $x = trim ( $x ) ;
 551                         $fc = substr ( $x , 0 , 1 ) ;
 552                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 553                                 $indent_level = strlen( $matches[1] );
 554                                 $t[$k] = "\n" .
 555                                         str_repeat( '<dl><dd>', $indent_level ) .
 556                                         '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 557                                 array_push ( $td , false ) ;
 558                                 array_push ( $ltd , '' ) ;
 559                                 array_push ( $tr , false ) ;
 560                                 array_push ( $ltr , '' ) ;
 561                         }
 562                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 563                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 564                                 $z = "</table>\n" ;
 565                                 $l = array_pop ( $ltd ) ;
 566                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 567                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 568                                 array_pop ( $ltr ) ;
 569                                 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
 570                         }
 571                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 572                                 $x = substr ( $x , 1 ) ;
 573                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 574                                 $z = '' ;
 575                                 $l = array_pop ( $ltd ) ;
 576                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 577                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 578                                 array_pop ( $ltr ) ;
 579                                 $t[$k] = $z ;
 580                                 array_push ( $tr , false ) ;
 581                                 array_push ( $td , false ) ;
 582                                 array_push ( $ltd , '' ) ;
 583                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 584                         }
 585                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 586                                 # $x is a table row
 587                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 588                                         $fc = '+' ;
 589                                         $x = substr ( $x , 1 ) ;
 590                                 }
 591                                 $after = substr ( $x , 1 ) ;
 592                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 593                                 $after = explode ( '||' , $after ) ;
 594                                 $t[$k] = '' ;
 595
 596                                 # Loop through each table cell
 597                                 foreach ( $after AS $theline )
 598                                 {
 599                                         $z = '' ;
 600                                         if ( $fc != '+' )
 601                                         {
 602                                                 $tra = array_pop ( $ltr ) ;
 603                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 604                                                 array_push ( $tr , true ) ;
 605                                                 array_push ( $ltr , '' ) ;
 606                                         }
 607
 608                                         $l = array_pop ( $ltd ) ;
 609                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 610                                         if ( $fc == '|' ) $l = 'td' ;
 611                                         else if ( $fc == '!' ) $l = 'th' ;
 612                                         else if ( $fc == '+' ) $l = 'caption' ;
 613                                         else $l = '' ;
 614                                         array_push ( $ltd , $l ) ;
 615
 616                                         # Cell parameters
 617                                         $y = explode ( '|' , $theline , 2 ) ;
 618                                         # Note that a '|' inside an invalid link should not
 619                                         # be mistaken as delimiting cell parameters
 620                                         if ( strpos( $y[0], '[[' ) !== false ) {
 621                                                 $y = array ($theline);
 622                                         }
 623                                         if ( count ( $y ) == 1 )
 624                                                 $y = "{$z}<{$l}>{$y[0]}" ;
 625                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 626                                         $t[$k] .= $y ;
 627                                         array_push ( $td , true ) ;
 628                                 }
 629                         }
 630                 }
 631
 632                 # Closing open td, tr && table
 633                 while ( count ( $td ) > 0 )
 634                 {
 635                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 636                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 637                         $t[] = '</table>' ;
 638                 }
 639
 640                 $t = implode ( "\n" , $t ) ;
 641                 #               $t = $this->removeHTMLtags( $t );
 642                 wfProfileOut( $fname );
 643                 return $t ;
 644         }
 645
 646         /**
 647          * Helper function for parse() that transforms wiki markup into
 648          * HTML. Only called for $mOutputType == OT_HTML.
 649          *
 650          * @access private
 651          */
 652         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 653         global $wgContLang;
 654
 655                 $fname = 'Parser::internalParse';
 656                 wfProfileIn( $fname );
 657
 658                 $text = $this->removeHTMLtags( $text );
 659                 $text = $this->replaceVariables( $text, $args );
 660
 661                 $text = $wgContLang->convert($text);
 662
 663                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 664
 665                 $text = $this->doHeadings( $text );
 666                 if($this->mOptions->getUseDynamicDates()) {
 667                         global $wgDateFormatter;
 668                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 669                 }
 670                 $text = $this->doAllQuotes( $text );
 671                 $text = $this->replaceInternalLinks ( $text );
 672                 # Another call to replace links and images inside captions of images
 673                 $text = $this->replaceInternalLinks ( $text );
 674                 $text = $this->replaceExternalLinks( $text );
 675                 $text = $this->doMagicLinks( $text );
 676                 $text = $this->doTableStuff( $text );
 677                 $text = $this->formatHeadings( $text, $isMain );
 678                 $sk =& $this->mOptions->getSkin();
 679                 $text = $sk->transformContent( $text );
 680
 681                 wfProfileOut( $fname );
 682                 return $text;
 683         }
 684
 685         /**
 686          * Replace special strings like "ISBN xxx" and "RFC xxx" with
 687          * magic external links.
 688          *
 689          * @access private
 690          */
 691         function &doMagicLinks( &$text ) {
 692                 global $wgUseGeoMode;
 693                 $text = $this->magicISBN( $text );
 694                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 695                         $text = $this->magicGEO( $text );
 696                 }
 697                 $text = $this->magicRFC( $text );
 698                 return $text;
 699         }
 700
 701         /**
 702          * Parse ^^ tokens and return html
 703          *
 704          * @access private
 705          */
 706         function doExponent ( $text ) {
 707                 $fname = 'Parser::doExponent';
 708                 wfProfileIn( $fname);
 709                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 710                 wfProfileOut( $fname);
 711                 return $text;
 712         }
 713
 714         /**
 715          * Parse headers and return html
 716          *
 717          * @access private
 718          */
 719         function doHeadings( $text ) {
 720                 $fname = 'Parser::doHeadings';
 721                 wfProfileIn( $fname );
 722                 for ( $i = 6; $i >= 1; --$i ) {
 723                         $h = substr( '======', 0, $i );
 724                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 725                           "<h{$i}>\\1</h{$i}>\\2", $text );
 726                 }
 727                 wfProfileOut( $fname );
 728                 return $text;
 729         }
 730
 731         /**
 732          * Replace single quotes with HTML markup
 733          * @access private
 734          * @return string the altered text
 735          */
 736         function doAllQuotes( $text ) {
 737                 $fname = 'Parser::doAllQuotes';
 738                 wfProfileIn( $fname );
 739                 $outtext = '';
 740                 $lines = explode( "\n", $text );
 741                 foreach ( $lines as $line ) {
 742                         $outtext .= $this->doQuotes ( $line ) . "\n";
 743                 }
 744                 $outtext = substr($outtext, 0,-1);
 745                 wfProfileOut( $fname );
 746                 return $outtext;
 747         }
 748
 749         /**
 750          * Helper function for doAllQuotes()
 751          * @access private
 752          */
 753         function doQuotes( $text ) {
 754                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 755                 if (count ($arr) == 1)
 756                         return $text;
 757                 else
 758                 {
 759                         # First, do some preliminary work. This may shift some apostrophes from
 760                         # being mark-up to being text. It also counts the number of occurrences
 761                         # of bold and italics mark-ups.
 762                         $i = 0;
 763                         $numbold = 0;
 764                         $numitalics = 0;
 765                         foreach ($arr as $r)
 766                         {
 767                                 if (($i % 2) == 1)
 768                                 {
 769                                         # If there are ever four apostrophes, assume the first is supposed to
 770                                         # be text, and the remaining three constitute mark-up for bold text.
 771                                         if (strlen ($arr[$i]) == 4)
 772                                         {
 773                                                 $arr[$i-1] .= "'";
 774                                                 $arr[$i] = "'''";
 775                                         }
 776                                         # If there are more than 5 apostrophes in a row, assume they're all
 777                                         # text except for the last 5.
 778                                         else if (strlen ($arr[$i]) > 5)
 779                                         {
 780                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 781                                                 $arr[$i] = "'''''";
 782                                         }
 783                                         # Count the number of occurrences of bold and italics mark-ups.
 784                                         # We are not counting sequences of five apostrophes.
 785                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 786                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 787                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 788                                 }
 789                                 $i++;
 790                         }
 791
 792                         # If there is an odd number of both bold and italics, it is likely
 793                         # that one of the bold ones was meant to be an apostrophe followed
 794                         # by italics. Which one we cannot know for certain, but it is more
 795                         # likely to be one that has a single-letter word before it.
 796                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 797                         {
 798                                 $i = 0;
 799                                 $firstsingleletterword = -1;
 800                                 $firstmultiletterword = -1;
 801                                 $firstspace = -1;
 802                                 foreach ($arr as $r)
 803                                 {
 804                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 805                                         {
 806                                                 $x1 = substr ($arr[$i-1], -1);
 807                                                 $x2 = substr ($arr[$i-1], -2, 1);
 808                                                 if ($x1 == ' ') {
 809                                                         if ($firstspace == -1) $firstspace = $i;
 810                                                 } else if ($x2 == ' ') {
 811                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 812                                                 } else {
 813                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 814                                                 }
 815                                         }
 816                                         $i++;
 817                                 }
 818
 819                                 # If there is a single-letter word, use it!
 820                                 if ($firstsingleletterword > -1)
 821                                 {
 822                                         $arr [ $firstsingleletterword ] = "''";
 823                                         $arr [ $firstsingleletterword-1 ] .= "'";
 824                                 }
 825                                 # If not, but there's a multi-letter word, use that one.
 826                                 else if ($firstmultiletterword > -1)
 827                                 {
 828                                         $arr [ $firstmultiletterword ] = "''";
 829                                         $arr [ $firstmultiletterword-1 ] .= "'";
 830                                 }
 831                                 # ... otherwise use the first one that has neither.
 832                                 # (notice that it is possible for all three to be -1 if, for example,
 833                                 # there is only one pentuple-apostrophe in the line)
 834                                 else if ($firstspace > -1)
 835                                 {
 836                                         $arr [ $firstspace ] = "''";
 837                                         $arr [ $firstspace-1 ] .= "'";
 838                                 }
 839                         }
 840
 841                         # Now let's actually convert our apostrophic mush to HTML!
 842                         $output = '';
 843                         $buffer = '';
 844                         $state = '';
 845                         $i = 0;
 846                         foreach ($arr as $r)
 847                         {
 848                                 if (($i % 2) == 0)
 849                                 {
 850                                         if ($state == 'both')
 851                                                 $buffer .= $r;
 852                                         else
 853                                                 $output .= $r;
 854                                 }
 855                                 else
 856                                 {
 857                                         if (strlen ($r) == 2)
 858                                         {
 859                                                 if ($state == 'i')
 860                                                 { $output .= '</i>'; $state = ''; }
 861                                                 else if ($state == 'bi')
 862                                                 { $output .= '</i>'; $state = 'b'; }
 863                                                 else if ($state == 'ib')
 864                                                 { $output .= '</b></i><b>'; $state = 'b'; }
 865                                                 else if ($state == 'both')
 866                                                 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
 867                                                 else # $state can be 'b' or ''
 868                                                 { $output .= '<i>'; $state .= 'i'; }
 869                                         }
 870                                         else if (strlen ($r) == 3)
 871                                         {
 872                                                 if ($state == 'b')
 873                                                 { $output .= '</b>'; $state = ''; }
 874                                                 else if ($state == 'bi')
 875                                                 { $output .= '</i></b><i>'; $state = 'i'; }
 876                                                 else if ($state == 'ib')
 877                                                 { $output .= '</b>'; $state = 'i'; }
 878                                                 else if ($state == 'both')
 879                                                 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
 880                                                 else # $state can be 'i' or ''
 881                                                 { $output .= '<b>'; $state .= 'b'; }
 882                                         }
 883                                         else if (strlen ($r) == 5)
 884                                         {
 885                                                 if ($state == 'b')
 886                                                 { $output .= '</b><i>'; $state = 'i'; }
 887                                                 else if ($state == 'i')
 888                                                 { $output .= '</i><b>'; $state = 'b'; }
 889                                                 else if ($state == 'bi')
 890                                                 { $output .= '</i></b>'; $state = ''; }
 891                                                 else if ($state == 'ib')
 892                                                 { $output .= '</b></i>'; $state = ''; }
 893                                                 else if ($state == 'both')
 894                                                 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
 895                                                 else # ($state == '')
 896                                                 { $buffer = ''; $state = 'both'; }
 897                                         }
 898                                 }
 899                                 $i++;
 900                         }
 901                         # Now close all remaining tags.  Notice that the order is important.
 902                         if ($state == 'b' || $state == 'ib')
 903                                 $output .= '</b>';
 904                         if ($state == 'i' || $state == 'bi' || $state == 'ib')
 905                                 $output .= '</i>';
 906                         if ($state == 'bi')
 907                                 $output .= '</b>';
 908                         if ($state == 'both')
 909                                 $output .= '<b><i>'.$buffer.'</i></b>';
 910                         return $output;
 911                 }
 912         }
 913
 914         /**
 915          * Replace external links
 916          *
 917          * Note: we have to do external links before the internal ones,
 918          * and otherwise take great care in the order of things here, so
 919          * that we don't end up interpreting some URLs twice.
 920          *
 921          * @access private
 922          */
 923         function replaceExternalLinks( $text ) {
 924                 $fname = 'Parser::replaceExternalLinks';
 925                 wfProfileIn( $fname );
 926
 927                 $sk =& $this->mOptions->getSkin();
 928                 $linktrail = wfMsgForContent('linktrail');
 929                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 930
 931                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 932
 933                 $i = 0;
 934                 while ( $i<count( $bits ) ) {
 935                         $url = $bits[$i++];
 936                         $protocol = $bits[$i++];
 937                         $text = $bits[$i++];
 938                         $trail = $bits[$i++];
 939
 940                         # If the link text is an image URL, replace it with an <img> tag
 941                         # This happened by accident in the original parser, but some people used it extensively
 942                         $img = $this->maybeMakeImageLink( $text );
 943                         if ( $img !== false ) {
 944                                 $text = $img;
 945                         }
 946
 947                         $dtrail = '';
 948
 949                         # No link text, e.g. [http://domain.tld/some.link]
 950                         if ( $text == '' ) {
 951                                 # Autonumber if allowed
 952                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 953                                         $text = '[' . ++$this->mAutonumber . ']';
 954                                 } else {
 955                                         # Otherwise just use the URL
 956                                         $text = htmlspecialchars( $url );
 957                                 }
 958                         } else {
 959                                 # Have link text, e.g. [http://domain.tld/some.link text]s
 960                                 # Check for trail
 961                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
 962                                         $dtrail = $m2[1];
 963                                         $trail = $m2[2];
 964                                 }
 965                         }
 966
 967                         $encUrl = htmlspecialchars( $url );
 968                         # Bit in parentheses showing the URL for the printable version
 969                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
 970                                 $paren = '';
 971                         } else {
 972                                 # Expand the URL for printable version
 973                                 if ( ! $sk->suppressUrlExpansion() ) {
 974                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
 975                                 } else {
 976                                         $paren = '';
 977                                 }
 978                         }
 979
 980                         # Process the trail (i.e. everything after this link up until start of the next link),
 981                         # replacing any non-bracketed links
 982                         $trail = $this->replaceFreeExternalLinks( $trail );
 983
 984                         $la = $sk->getExternalLinkAttributes( $url, $text );
 985
 986                         # Use the encoded URL
 987                         # This means that users can paste URLs directly into the text
 988                         # Funny characters like &ouml; aren't valid in URLs anyway
 989                         # This was changed in August 2004
 990                         $s .= "<a href=\"{$url}\"{$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
 991                 }
 992
 993                 wfProfileOut( $fname );
 994                 return $s;
 995         }
 996
 997         /**
 998          * Replace anything that looks like a URL with a link
 999          * @access private
1000          */
1001         function replaceFreeExternalLinks( $text ) {
1002                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1003                 $s = array_shift( $bits );
1004                 $i = 0;
1005
1006                 $sk =& $this->mOptions->getSkin();
1007
1008                 while ( $i < count( $bits ) ){
1009                         $protocol = $bits[$i++];
1010                         $remainder = $bits[$i++];
1011
1012                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1013                                 # Found some characters after the protocol that look promising
1014                                 $url = $protocol . $m[1];
1015                                 $trail = $m[2];
1016
1017                                 # Move trailing punctuation to $trail
1018                                 $sep = ',;\.:!?';
1019                                 # If there is no left bracket, then consider right brackets fair game too
1020                                 if ( strpos( $url, '(' ) === false ) {
1021                                         $sep .= ')';
1022                                 }
1023
1024                                 $numSepChars = strspn( strrev( $url ), $sep );
1025                                 if ( $numSepChars ) {
1026                                         $trail = substr( $url, -$numSepChars ) . $trail;
1027                                         $url = substr( $url, 0, -$numSepChars );
1028                                 }
1029
1030                                 # Replace &amp; from obsolete syntax with &
1031                                 $url = str_replace( '&amp;', '&', $url );
1032
1033                                 # Is this an external image?
1034                                 $text = $this->maybeMakeImageLink( $url );
1035                                 if ( $text === false ) {
1036                                         # Not an image, make a link
1037                                         $text = $sk->makeExternalLink( $url, $url );
1038                                 }
1039                                 $s .= $text . $trail;
1040                         } else {
1041                                 $s .= $protocol . $remainder;
1042                         }
1043                 }
1044                 return $s;
1045         }
1046
1047         /**
1048          * make an image if it's allowed
1049          * @access private
1050          */
1051         function maybeMakeImageLink( $url ) {
1052                 $sk =& $this->mOptions->getSkin();
1053                 $text = false;
1054                 if ( $this->mOptions->getAllowExternalImages() ) {
1055                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1056                                 # Image found
1057                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1058                         }
1059                 }
1060                 return $text;
1061         }
1062
1063         /**
1064          * Process [[ ]] wikilinks
1065          *
1066          * @access private
1067          */
1068         function replaceInternalLinks( $s ) {
1069                 global $wgLang, $wgContLang, $wgLinkCache;
1070                 static $fname = 'Parser::replaceInternalLinks' ;
1071                 wfProfileIn( $fname );
1072
1073                 wfProfileIn( $fname.'-setup' );
1074                 static $tc = FALSE;
1075                 # the % is needed to support urlencoded titles as well
1076                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1077                 $sk =& $this->mOptions->getSkin();
1078
1079                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1080
1081                 $a = explode( '[[', ' ' . $s );
1082                 $s = array_shift( $a );
1083                 $s = substr( $s, 1 );
1084
1085                 # Match a link having the form [[namespace:link|alternate]]trail
1086                 static $e1 = FALSE;
1087                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1088                 # Match the end of a line for a word that's not followed by whitespace,
1089                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1090                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1091
1092                 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1093                 # Special and Media are pseudo-namespaces; no pages actually exist in them
1094
1095                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1096
1097                 if ( $useLinkPrefixExtension ) {
1098                         if ( preg_match( $e2, $s, $m ) ) {
1099                                 $first_prefix = $m[2];
1100                                 $s = $m[1];
1101                         } else {
1102                                 $first_prefix = false;
1103                         }
1104                 } else {
1105                         $prefix = '';
1106                 }
1107
1108                 wfProfileOut( $fname.'-setup' );
1109
1110                 # start procedeeding each line
1111                 foreach ( $a as $line ) {
1112                         wfProfileIn( $fname.'-prefixhandling' );
1113                         if ( $useLinkPrefixExtension ) {
1114                                 if ( preg_match( $e2, $s, $m ) ) {
1115                                         $prefix = $m[2];
1116                                         $s = $m[1];
1117                                 } else {
1118                                         $prefix='';
1119                                 }
1120                                 # first link
1121                                 if($first_prefix) {
1122                                         $prefix = $first_prefix;
1123                                         $first_prefix = false;
1124                                 }
1125                         }
1126                         wfProfileOut( $fname.'-prefixhandling' );
1127
1128                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1129                                 $text = $m[2];
1130                                 # fix up urlencoded title texts
1131                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1132                                 $trail = $m[3];
1133                         } else { # Invalid form; output directly
1134                                 $s .= $prefix . '[[' . $line ;
1135                                 continue;
1136                         }
1137
1138                         # Don't allow internal links to pages containing
1139                         # PROTO: where PROTO is a valid URL protocol; these
1140                         # should be external links.
1141                         if (preg_match('/((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1142                                 $s .= $prefix . '[[' . $line ;
1143                                 continue;
1144                         }
1145
1146                         # Make subpage if necessary
1147                         $link = $this->maybeDoSubpageLink( $m[1], $text );
1148
1149                         $noforce = (substr($m[1], 0, 1) != ':');
1150                         if (!$noforce) {
1151                                 # Strip off leading ':'
1152                                 $link = substr($link, 1);
1153                         }
1154
1155                         $wasblank = ( '' == $text );
1156                         if( $wasblank ) $text = $link;
1157
1158                         $nt = Title::newFromText( $link );
1159                         if( !$nt ) {
1160                                 $s .= $prefix . '[[' . $line;
1161                                 continue;
1162                         }
1163
1164                         $ns = $nt->getNamespace();
1165                         $iw = $nt->getInterWiki();
1166
1167                         # Link not escaped by : , create the various objects
1168                         if( $noforce ) {
1169
1170                                 # Interwikis
1171                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1172                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1173                                         $tmp = $prefix . $trail ;
1174                                         $s .= (trim($tmp) == '')? '': $tmp;
1175                                         continue;
1176                                 }
1177
1178                                 if ( $ns == NS_IMAGE ) {
1179                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1180                                         $wgLinkCache->addImageLinkObj( $nt );
1181                                         continue;
1182                                 }
1183
1184                                 if ( $ns == NS_CATEGORY ) {
1185                                         $t = $nt->getText() ;
1186                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1187
1188                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1189                                         $pPLC=$sk->postParseLinkColour();
1190                                         $sk->postParseLinkColour( false );
1191                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1192                                         $sk->postParseLinkColour( $pPLC );
1193                                         $wgLinkCache->resume();
1194
1195                                         if ( $wasblank ) {
1196                                                 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1197                                                         $sortkey = $this->mTitle->getText();
1198                                                 } else {
1199                                                         $sortkey = $this->mTitle->getPrefixedText();
1200                                                 }
1201                                         } else {
1202                                                 $sortkey = $text;
1203                                         }
1204                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1205                                         $this->mOutput->mCategoryLinks[] = $t ;
1206                                         $s .= $prefix . $trail ;
1207                                         continue;
1208                                 }
1209                         }
1210
1211                         if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1212                             ( strpos( $link, '#' ) === FALSE ) ) {
1213                                 # Self-links are handled specially; generally de-link and change to bold.
1214                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1215                                 continue;
1216                         }
1217
1218                         if( $ns == NS_MEDIA ) {
1219                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1220                                 $wgLinkCache->addImageLinkObj( $nt );
1221                                 continue;
1222                         } elseif( $ns == NS_SPECIAL ) {
1223                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1224                                 continue;
1225                         }
1226                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1227                 }
1228                 wfProfileOut( $fname );
1229                 return $s;
1230         }
1231
1232         /**
1233          * Handle link to subpage if necessary
1234          * @param $target string the source of the link
1235          * @param &$text the link text, modified as necessary
1236          * @return string the full name of the link
1237          * @access private
1238          */
1239         function maybeDoSubpageLink($target, &$text) {
1240                 # Valid link forms:
1241                 # Foobar -- normal
1242                 # :Foobar -- override special treatment of prefix (images, language links)
1243                 # /Foobar -- convert to CurrentPage/Foobar
1244                 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1245                 global $wgNamespacesWithSubpages;
1246
1247                 $fname = 'Parser::maybeDoSubpageLink';
1248                 wfProfileIn( $fname );
1249                 # Look at the first character
1250                 if( $target{0} == '/' ) {
1251                         # / at end means we don't want the slash to be shown
1252                         if(substr($target,-1,1)=='/') {
1253                                 $target=substr($target,1,-1);
1254                                 $noslash=$target;
1255                         } else {
1256                                 $noslash=substr($target,1);
1257                         }
1258
1259                         # Some namespaces don't allow subpages
1260                         if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1261                                 # subpages allowed here
1262                                 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1263                                 if( '' === $text ) {
1264                                         $text = $target;
1265                                 } # this might be changed for ugliness reasons
1266                         } else {
1267                                 # no subpage allowed, use standard link
1268                                 $ret = $target;
1269                         }
1270                 } else {
1271                         # no subpage
1272                         $ret = $target;
1273                 }
1274
1275                 wfProfileOut( $fname );
1276                 return $ret;
1277         }
1278
1279         /**#@+
1280          * Used by doBlockLevels()
1281          * @access private
1282          */
1283         /* private */ function closeParagraph() {
1284                 $result = '';
1285                 if ( '' != $this->mLastSection ) {
1286                         $result = '</' . $this->mLastSection  . ">\n";
1287                 }
1288                 $this->mInPre = false;
1289                 $this->mLastSection = '';
1290                 return $result;
1291         }
1292         # getCommon() returns the length of the longest common substring
1293         # of both arguments, starting at the beginning of both.
1294         #
1295         /* private */ function getCommon( $st1, $st2 ) {
1296                 $fl = strlen( $st1 );
1297                 $shorter = strlen( $st2 );
1298                 if ( $fl < $shorter ) { $shorter = $fl; }
1299
1300                 for ( $i = 0; $i < $shorter; ++$i ) {
1301                         if ( $st1{$i} != $st2{$i} ) { break; }
1302                 }
1303                 return $i;
1304         }
1305         # These next three functions open, continue, and close the list
1306         # element appropriate to the prefix character passed into them.
1307         #
1308         /* private */ function openList( $char ) {
1309                 $result = $this->closeParagraph();
1310
1311                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1312                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1313                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1314                 else if ( ';' == $char ) {
1315                         $result .= '<dl><dt>';
1316                         $this->mDTopen = true;
1317                 }
1318                 else { $result = '<!-- ERR 1 -->'; }
1319
1320                 return $result;
1321         }
1322
1323         /* private */ function nextItem( $char ) {
1324                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1325                 else if ( ':' == $char || ';' == $char ) {
1326                         $close = '</dd>';
1327                         if ( $this->mDTopen ) { $close = '</dt>'; }
1328                         if ( ';' == $char ) {
1329                                 $this->mDTopen = true;
1330                                 return $close . '<dt>';
1331                         } else {
1332                                 $this->mDTopen = false;
1333                                 return $close . '<dd>';
1334                         }
1335                 }
1336                 return '<!-- ERR 2 -->';
1337         }
1338
1339         /* private */ function closeList( $char ) {
1340                 if ( '*' == $char ) { $text = '</li></ul>'; }
1341                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1342                 else if ( ':' == $char ) {
1343                         if ( $this->mDTopen ) {
1344                                 $this->mDTopen = false;
1345                                 $text = '</dt></dl>';
1346                         } else {
1347                                 $text = '</dd></dl>';
1348                         }
1349                 }
1350                 else {  return '<!-- ERR 3 -->'; }
1351                 return $text."\n";
1352         }
1353         /**#@-*/
1354
1355         /**
1356          * Make lists from lines starting with ':', '*', '#', etc.
1357          *
1358          * @access private
1359          * @return string the lists rendered as HTML
1360          */
1361         function doBlockLevels( $text, $linestart ) {
1362                 $fname = 'Parser::doBlockLevels';
1363                 wfProfileIn( $fname );
1364
1365                 # Parsing through the text line by line.  The main thing
1366                 # happening here is handling of block-level elements p, pre,
1367                 # and making lists from lines starting with * # : etc.
1368                 #
1369                 $textLines = explode( "\n", $text );
1370
1371                 $lastPrefix = $output = $lastLine = '';
1372                 $this->mDTopen = $inBlockElem = false;
1373                 $prefixLength = 0;
1374                 $paragraphStack = false;
1375
1376                 if ( !$linestart ) {
1377                         $output .= array_shift( $textLines );
1378                 }
1379                 foreach ( $textLines as $oLine ) {
1380                         $lastPrefixLength = strlen( $lastPrefix );
1381                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1382                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1383                         if ( !$this->mInPre ) {
1384                                 # Multiple prefixes may abut each other for nested lists.
1385                                 $prefixLength = strspn( $oLine, '*#:;' );
1386                                 $pref = substr( $oLine, 0, $prefixLength );
1387
1388                                 # eh?
1389                                 $pref2 = str_replace( ';', ':', $pref );
1390                                 $t = substr( $oLine, $prefixLength );
1391                                 $this->mInPre = !empty($preOpenMatch);
1392                         } else {
1393                                 # Don't interpret any other prefixes in preformatted text
1394                                 $prefixLength = 0;
1395                                 $pref = $pref2 = '';
1396                                 $t = $oLine;
1397                         }
1398
1399                         # List generation
1400                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1401                                 # Same as the last item, so no need to deal with nesting or opening stuff
1402                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1403                                 $paragraphStack = false;
1404
1405                                 if ( substr( $pref, -1 ) == ';') {
1406                                         # The one nasty exception: definition lists work like this:
1407                                         # ; title : definition text
1408                                         # So we check for : in the remainder text to split up the
1409                                         # title and definition, without b0rking links.
1410                                         if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1411                                                 $t = $t2;
1412                                                 $output .= $term . $this->nextItem( ':' );
1413                                         }
1414                                 }
1415                         } elseif( $prefixLength || $lastPrefixLength ) {
1416                                 # Either open or close a level...
1417                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1418                                 $paragraphStack = false;
1419
1420                                 while( $commonPrefixLength < $lastPrefixLength ) {
1421                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1422                                         --$lastPrefixLength;
1423                                 }
1424                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1425                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1426                                 }
1427                                 while ( $prefixLength > $commonPrefixLength ) {
1428                                         $char = substr( $pref, $commonPrefixLength, 1 );
1429                                         $output .= $this->openList( $char );
1430
1431                                         if ( ';' == $char ) {
1432                                                 # FIXME: This is dupe of code above
1433                                                 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1434                                                         $t = $t2;
1435                                                         $output .= $term . $this->nextItem( ':' );
1436                                                 }
1437                                         }
1438                                         ++$commonPrefixLength;
1439                                 }
1440                                 $lastPrefix = $pref2;
1441                         }
1442                         if( 0 == $prefixLength ) {
1443                                 # No prefix (not in list)--go to paragraph mode
1444                                 $uniq_prefix = UNIQ_PREFIX;
1445                                 // XXX: use a stack for nestable elements like span, table and div
1446                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1447                                 $closematch = preg_match(
1448                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1449                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1450                                 if ( $openmatch or $closematch ) {
1451                                         $paragraphStack = false;
1452                                         $output .= $this->closeParagraph();
1453                                         if($preOpenMatch and !$preCloseMatch) {
1454                                                 $this->mInPre = true;
1455                                         }
1456                                         if ( $closematch ) {
1457                                                 $inBlockElem = false;
1458                                         } else {
1459                                                 $inBlockElem = true;
1460                                         }
1461                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1462                                         if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1463                                                 // pre
1464                                                 if ($this->mLastSection != 'pre') {
1465                                                         $paragraphStack = false;
1466                                                         $output .= $this->closeParagraph().'<pre>';
1467                                                         $this->mLastSection = 'pre';
1468                                                 }
1469                                                 $t = substr( $t, 1 );
1470                                         } else {
1471                                                 // paragraph
1472                                                 if ( '' == trim($t) ) {
1473                                                         if ( $paragraphStack ) {
1474                                                                 $output .= $paragraphStack.'<br />';
1475                                                                 $paragraphStack = false;
1476                                                                 $this->mLastSection = 'p';
1477                                                         } else {
1478                                                                 if ($this->mLastSection != 'p' ) {
1479                                                                         $output .= $this->closeParagraph();
1480                                                                         $this->mLastSection = '';
1481                                                                         $paragraphStack = '<p>';
1482                                                                 } else {
1483                                                                         $paragraphStack = '</p><p>';
1484                                                                 }
1485                                                         }
1486                                                 } else {
1487                                                         if ( $paragraphStack ) {
1488                                                                 $output .= $paragraphStack;
1489                                                                 $paragraphStack = false;
1490                                                                 $this->mLastSection = 'p';
1491                                                         } else if ($this->mLastSection != 'p') {
1492                                                                 $output .= $this->closeParagraph().'<p>';
1493                                                                 $this->mLastSection = 'p';
1494                                                         }
1495                                                 }
1496                                         }
1497                                 }
1498                         }
1499                         if ($paragraphStack === false) {
1500                                 $output .= $t."\n";
1501                         }
1502                 }
1503                 while ( $prefixLength ) {
1504                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1505                         --$prefixLength;
1506                 }
1507                 if ( '' != $this->mLastSection ) {
1508                         $output .= '</' . $this->mLastSection . '>';
1509                         $this->mLastSection = '';
1510                 }
1511
1512                 wfProfileOut( $fname );
1513                 return $output;
1514         }
1515
1516         /**
1517          * Split up a string on ':', ignoring any occurences inside
1518          * <a>..</a> or <span>...</span>
1519          * @param $str string the string to split
1520          * @param &$before string set to everything before the ':'
1521          * @param &$after string set to everything after the ':'
1522          * return string the position of the ':', or false if none found
1523          */
1524         function findColonNoLinks($str, &$before, &$after) {
1525                 # I wonder if we should make this count all tags, not just <a>
1526                 # and <span>. That would prevent us from matching a ':' that
1527                 # comes in the middle of italics other such formatting....
1528                 # -- Wil
1529                 $fname = 'Parser::findColonNoLinks';
1530                 wfProfileIn( $fname );
1531                 $pos = 0;
1532                 do {
1533                         $colon = strpos($str, ':', $pos);
1534
1535                         if ($colon !== false) {
1536                                 $before = substr($str, 0, $colon);
1537                                 $after = substr($str, $colon + 1);
1538
1539                                 # Skip any ':' within <a> or <span> pairs
1540                                 $a = substr_count($before, '<a');
1541                                 $s = substr_count($before, '<span');
1542                                 $ca = substr_count($before, '</a>');
1543                                 $cs = substr_count($before, '</span>');
1544
1545                                 if ($a <= $ca and $s <= $cs) {
1546                                         # Tags are balanced before ':'; ok
1547                                         break;
1548                                 }
1549                                 $pos = $colon + 1;
1550                         }
1551                 } while ($colon !== false);
1552                 wfProfileOut( $fname );
1553                 return $colon;
1554         }
1555
1556         /**
1557          * Return value of a magic variable (like PAGENAME)
1558          *
1559          * @access private
1560          */
1561         function getVariableValue( $index ) {
1562                 global $wgContLang, $wgSitename, $wgServer;
1563
1564                 switch ( $index ) {
1565                         case MAG_CURRENTMONTH:
1566                                 return $wgContLang->formatNum( date( 'm' ) );
1567                         case MAG_CURRENTMONTHNAME:
1568                                 return $wgContLang->getMonthName( date('n') );
1569                         case MAG_CURRENTMONTHNAMEGEN:
1570                                 return $wgContLang->getMonthNameGen( date('n') );
1571                         case MAG_CURRENTDAY:
1572                                 return $wgContLang->formatNum( date('j') );
1573                         case MAG_PAGENAME:
1574                                 return $this->mTitle->getText();
1575                         case MAG_PAGENAMEE:
1576                                 return $this->mTitle->getPartialURL();
1577                         case MAG_NAMESPACE:
1578                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1579                                 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1580                         case MAG_CURRENTDAYNAME:
1581                                 return $wgContLang->getWeekdayName( date('w')+1 );
1582                         case MAG_CURRENTYEAR:
1583                                 return $wgContLang->formatNum( date( 'Y' ) );
1584                         case MAG_CURRENTTIME:
1585                                 return $wgContLang->time( wfTimestampNow(), false );
1586                         case MAG_NUMBEROFARTICLES:
1587                                 return $wgContLang->formatNum( wfNumberOfArticles() );
1588                         case MAG_SITENAME:
1589                                 return $wgSitename;
1590                         case MAG_SERVER:
1591                                 return $wgServer;
1592                         default:
1593                                 return NULL;
1594                 }
1595         }
1596
1597         /**
1598          * initialise the magic variables (like CURRENTMONTHNAME)
1599          *
1600          * @access private
1601          */
1602         function initialiseVariables() {
1603                 $fname = 'Parser::initialiseVariables';
1604                 wfProfileIn( $fname );
1605                 global $wgVariableIDs;
1606                 $this->mVariables = array();
1607                 foreach ( $wgVariableIDs as $id ) {
1608                         $mw =& MagicWord::get( $id );
1609                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1610                 }
1611                 wfProfileOut( $fname );
1612         }
1613
1614         /**
1615          * Replace magic variables, templates, and template arguments
1616          * with the appropriate text. Templates are substituted recursively,
1617          * taking care to avoid infinite loops.
1618          *
1619          * Note that the substitution depends on value of $mOutputType:
1620          *  OT_WIKI: only {{subst:}} templates
1621          *  OT_MSG: only magic variables
1622          *  OT_HTML: all templates and magic variables
1623          *
1624          * @param string $tex The text to transform
1625          * @param array $args Key-value pairs representing template parameters to substitute
1626          * @access private
1627          */
1628         function replaceVariables( $text, $args = array() ) {
1629                 global $wgLang, $wgScript, $wgArticlePath;
1630
1631                 # Prevent too big inclusions
1632                 if(strlen($text)> MAX_INCLUDE_SIZE)
1633                 return $text;
1634
1635                 $fname = 'Parser::replaceVariables';
1636                 wfProfileIn( $fname );
1637
1638                 $titleChars = Title::legalChars();
1639
1640                 # This function is called recursively. To keep track of arguments we need a stack:
1641                 array_push( $this->mArgStack, $args );
1642
1643                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1644                 $GLOBALS['wgCurParser'] =& $this;
1645
1646                 # Variable substitution
1647                 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", 'wfVariableSubstitution', $text );
1648
1649                 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1650                         # Argument substitution
1651                         $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1652                 }
1653                 # Template substitution
1654                 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1655                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1656
1657                 array_pop( $this->mArgStack );
1658
1659                 wfProfileOut( $fname );
1660                 return $text;
1661         }
1662
1663         /**
1664          * Replace magic variables
1665          * @access private
1666          */
1667         function variableSubstitution( $matches ) {
1668                 if ( !$this->mVariables ) {
1669                         $this->initialiseVariables();
1670                 }
1671                 $skip = false;
1672                 if ( $this->mOutputType == OT_WIKI ) {
1673                         # Do only magic variables prefixed by SUBST
1674                         $mwSubst =& MagicWord::get( MAG_SUBST );
1675                         if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1676                                 $skip = true;
1677                         # Note that if we don't substitute the variable below,
1678                         # we don't remove the {{subst:}} magic word, in case
1679                         # it is a template rather than a magic variable.
1680                 }
1681                 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1682                         $text = $this->mVariables[$matches[1]];
1683                         $this->mOutput->mContainsOldMagic = true;
1684                 } else {
1685                         $text = $matches[0];
1686                 }
1687                 return $text;
1688         }
1689
1690         # Split template arguments
1691         function getTemplateArgs( $argsString ) {
1692                 if ( $argsString === '' ) {
1693                         return array();
1694                 }
1695
1696                 $args = explode( '|', substr( $argsString, 1 ) );
1697
1698                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1699                 # merged with the next arg because the '|' character between belongs
1700                 # to the link syntax and not the template parameter syntax.
1701                 $argc = count($args);
1702                 $i = 0;
1703                 for ( $i = 0; $i < $argc-1; $i++ ) {
1704                         if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1705                                 $args[$i] .= '|'.$args[$i+1];
1706                                 array_splice($args, $i+1, 1);
1707                                 $i--;
1708                                 $argc--;
1709                         }
1710                 }
1711
1712                 return $args;
1713         }
1714
1715         /**
1716          * Return the text of a template, after recursively
1717          * replacing any variables or templates within the template.
1718          *
1719          * @param array $matches The parts of the template
1720          *  $matches[1]: the title, i.e. the part before the |
1721          *  $matches[2]: the parameters (including a leading |), if  any
1722          * @return string the text of the template
1723          * @access private
1724          */
1725         function braceSubstitution( $matches ) {
1726                 global $wgLinkCache, $wgContLang;
1727                 $fname = 'Parser::braceSubstitution';
1728                 $found = false;
1729                 $nowiki = false;
1730                 $noparse = false;
1731
1732                 $title = NULL;
1733
1734                 # Need to know if the template comes at the start of a line,
1735                 # to treat the beginning of the template like the beginning
1736                 # of a line for tables and block-level elements.
1737                 $linestart = $matches[1];
1738
1739                 # $part1 is the bit before the first |, and must contain only title characters
1740                 # $args is a list of arguments, starting from index 0, not including $part1
1741
1742                 $part1 = $matches[2];
1743                 # If the third subpattern matched anything, it will start with |
1744
1745                 $args = $this->getTemplateArgs($matches[3]);
1746                 $argc = count( $args );
1747
1748                 # Don't parse {{{}}} because that's only for template arguments
1749                 if ( $linestart === '{' ) {
1750                         $text = $matches[0];
1751                         $found = true;
1752                         $noparse = true;
1753                 }
1754
1755                 # SUBST
1756                 if ( !$found ) {
1757                         $mwSubst =& MagicWord::get( MAG_SUBST );
1758                         if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1759                                 # One of two possibilities is true:
1760                                 # 1) Found SUBST but not in the PST phase
1761                                 # 2) Didn't find SUBST and in the PST phase
1762                                 # In either case, return without further processing
1763                                 $text = $matches[0];
1764                                 $found = true;
1765                                 $noparse = true;
1766                         }
1767                 }
1768
1769                 # MSG, MSGNW and INT
1770                 if ( !$found ) {
1771                         # Check for MSGNW:
1772                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1773                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1774                                 $nowiki = true;
1775                         } else {
1776                                 # Remove obsolete MSG:
1777                                 $mwMsg =& MagicWord::get( MAG_MSG );
1778                                 $mwMsg->matchStartAndRemove( $part1 );
1779                         }
1780
1781                         # Check if it is an internal message
1782                         $mwInt =& MagicWord::get( MAG_INT );
1783                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1784                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1785                                         $text = $linestart . wfMsgReal( $part1, $args, true );
1786                                         $found = true;
1787                                 }
1788                         }
1789                 }
1790
1791                 # NS
1792                 if ( !$found ) {
1793                         # Check for NS: (namespace expansion)
1794                         $mwNs = MagicWord::get( MAG_NS );
1795                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1796                                 if ( intval( $part1 ) ) {
1797                                         $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1798                                         $found = true;
1799                                 } else {
1800                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1801                                         if ( !is_null( $index ) ) {
1802                                                 $text = $linestart . $wgContLang->getNsText( $index );
1803                                                 $found = true;
1804                                         }
1805                                 }
1806                         }
1807                 }
1808
1809                 # LOCALURL and LOCALURLE
1810                 if ( !$found ) {
1811                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1812                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1813
1814                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1815                                 $func = 'getLocalURL';
1816                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1817                                 $func = 'escapeLocalURL';
1818                         } else {
1819                                 $func = '';
1820                         }
1821
1822                         if ( $func !== '' ) {
1823                                 $title = Title::newFromText( $part1 );
1824                                 if ( !is_null( $title ) ) {
1825                                         if ( $argc > 0 ) {
1826                                                 $text = $linestart . $title->$func( $args[0] );
1827                                         } else {
1828                                                 $text = $linestart . $title->$func();
1829                                         }
1830                                         $found = true;
1831                                 }
1832                         }
1833                 }
1834
1835                 # GRAMMAR
1836                 if ( !$found && $argc == 1 ) {
1837                         $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1838                         if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1839                                 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
1840                                 $found = true;
1841                         }
1842                 }
1843
1844                 # Template table test
1845
1846                 # Did we encounter this template already? If yes, it is in the cache
1847                 # and we need to check for loops.
1848                 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
1849                         # set $text to cached message.
1850                         $text = $linestart . $this->mTemplates[$part1];
1851                         $found = true;
1852
1853                         # Infinite loop test
1854                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1855                                 $noparse = true;
1856                                 $found = true;
1857                                 $text .= '<!-- WARNING: template loop detected -->';
1858                         }
1859                 }
1860
1861                 # Load from database
1862                 $itcamefromthedatabase = false;
1863                 if ( !$found ) {
1864                         $ns = NS_TEMPLATE;
1865                         $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
1866                         if ($subpage !== '') {
1867                                 $ns = $this->mTitle->getNamespace();
1868                         }
1869                         $title = Title::newFromText( $part1, $ns );
1870                         if ( !is_null( $title ) && !$title->isExternal() ) {
1871                                 # Check for excessive inclusion
1872                                 $dbk = $title->getPrefixedDBkey();
1873                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1874                                         # This should never be reached.
1875                                         $article = new Article( $title );
1876                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1877                                         if ( $articleContent !== false ) {
1878                                                 $found = true;
1879                                                 $text = $linestart . $articleContent;
1880                                                 $itcamefromthedatabase = true;
1881                                         }
1882                                 }
1883
1884                                 # If the title is valid but undisplayable, make a link to it
1885                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1886                                         $text = $linestart . '[['.$title->getPrefixedText().']]';
1887                                         $found = true;
1888                                 }
1889
1890                                 # Template cache array insertion
1891                                 $this->mTemplates[$part1] = $text;
1892                         }
1893                 }
1894
1895                 # Recursive parsing, escaping and link table handling
1896                 # Only for HTML output
1897                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1898                         $text = wfEscapeWikiText( $text );
1899                 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
1900                         # Clean up argument array
1901                         $assocArgs = array();
1902                         $index = 1;
1903                         foreach( $args as $arg ) {
1904                                 $eqpos = strpos( $arg, '=' );
1905                                 if ( $eqpos === false ) {
1906                                         $assocArgs[$index++] = $arg;
1907                                 } else {
1908                                         $name = trim( substr( $arg, 0, $eqpos ) );
1909                                         $value = trim( substr( $arg, $eqpos+1 ) );
1910                                         if ( $value === false ) {
1911                                                 $value = '';
1912                                         }
1913                                         if ( $name !== false ) {
1914                                                 $assocArgs[$name] = $value;
1915                                         }
1916                                 }
1917                         }
1918
1919                         # Add a new element to the templace recursion path
1920                         $this->mTemplatePath[$part1] = 1;
1921
1922                         $text = $this->strip( $text, $this->mStripState );
1923                         $text = $this->removeHTMLtags( $text );
1924                         $text = $this->replaceVariables( $text, $assocArgs );
1925
1926                         # Resume the link cache and register the inclusion as a link
1927                         if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
1928                                 $wgLinkCache->addLinkObj( $title );
1929                         }
1930
1931                         # If the template begins with a table or block-level
1932                         # element, it should be treated as beginning a new line.
1933                         if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
1934                                 $text = "\n" . $text;
1935                         }
1936                 }
1937
1938                 # Empties the template path
1939                 $this->mTemplatePath = array();
1940                 if ( !$found ) {
1941                         return $matches[0];
1942                 } else {
1943                         # replace ==section headers==
1944                         # XXX this needs to go away once we have a better parser.
1945                         if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
1946                                 if( !is_null( $title ) )
1947                                         $encodedname = base64_encode($title->getPrefixedDBkey());
1948                                 else
1949                                         $encodedname = base64_encode("");
1950                                 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
1951                                         PREG_SPLIT_DELIM_CAPTURE);
1952                                 $text = '';
1953                                 $nsec = 0;
1954                                 for( $i = 0; $i < count($m); $i += 2 ) {
1955                                         $text .= $m[$i];
1956                                         if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
1957                                         $hl = $m[$i + 1];
1958                                         if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
1959                                                 $text .= $hl;
1960                                                 continue;
1961                                         }
1962                                         preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
1963                                         $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
1964                                                 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
1965
1966                                         $nsec++;
1967                                 }
1968                         }
1969                 }
1970
1971                 # Empties the template path
1972                 $this->mTemplatePath = array();
1973                 if ( !$found ) {
1974                         return $matches[0];
1975                 } else {
1976                         return $text;
1977                 }
1978         }
1979
1980         /**
1981          * Triple brace replacement -- used for template arguments
1982          * @access private
1983          */
1984         function argSubstitution( $matches ) {
1985                 $arg = trim( $matches[1] );
1986                 $text = $matches[0];
1987                 $inputArgs = end( $this->mArgStack );
1988
1989                 if ( array_key_exists( $arg, $inputArgs ) ) {
1990                         $text = $inputArgs[$arg];
1991                 }
1992
1993                 return $text;
1994         }
1995
1996         /**
1997          * Returns true if the function is allowed to include this entity
1998          * @access private
1999          */
2000         function incrementIncludeCount( $dbk ) {
2001                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2002                         $this->mIncludeCount[$dbk] = 0;
2003                 }
2004                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2005                         return true;
2006                 } else {
2007                         return false;
2008                 }
2009         }
2010
2011
2012         /**
2013          * Cleans up HTML, removes dangerous tags and attributes, and
2014          * removes HTML comments
2015          * @access private
2016          */
2017         function removeHTMLtags( $text ) {
2018                 global $wgUseTidy, $wgUserHtml;
2019                 $fname = 'Parser::removeHTMLtags';
2020                 wfProfileIn( $fname );
2021
2022                 if( $wgUserHtml ) {
2023                         $htmlpairs = array( # Tags that must be closed
2024                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2025                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2026                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
2027                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2028                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
2029                         );
2030                         $htmlsingle = array(
2031                                 'br', 'hr', 'li', 'dt', 'dd'
2032                         );
2033                         $htmlnest = array( # Tags that can be nested--??
2034                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2035                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
2036                         );
2037                         $tabletags = array( # Can only appear inside table
2038                                 'td', 'th', 'tr'
2039                         );
2040                 } else {
2041                         $htmlpairs = array();
2042                         $htmlsingle = array();
2043                         $htmlnest = array();
2044                         $tabletags = array();
2045                 }
2046
2047                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2048                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2049
2050                 $htmlattrs = $this->getHTMLattrs () ;
2051
2052                 # Remove HTML comments
2053                 $text = $this->removeHTMLcomments( $text );
2054
2055                 $bits = explode( '<', $text );
2056                 $text = array_shift( $bits );
2057                 if(!$wgUseTidy) {
2058                         $tagstack = array(); $tablestack = array();
2059                         foreach ( $bits as $x ) {
2060                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2061                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2062                                 $x, $regs );
2063                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2064                                 error_reporting( $prev );
2065
2066                                 $badtag = 0 ;
2067                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2068                                         # Check our stack
2069                                         if ( $slash ) {
2070                                                 # Closing a tag...
2071                                                 if ( ! in_array( $t, $htmlsingle ) &&
2072                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2073                                                         @array_push( $tagstack, $ot );
2074                                                         $badtag = 1;
2075                                                 } else {
2076                                                         if ( $t == 'table' ) {
2077                                                                 $tagstack = array_pop( $tablestack );
2078                                                         }
2079                                                         $newparams = '';
2080                                                 }
2081                                         } else {
2082                                                 # Keep track for later
2083                                                 if ( in_array( $t, $tabletags ) &&
2084                                                 ! in_array( 'table', $tagstack ) ) {
2085                                                         $badtag = 1;
2086                                                 } else if ( in_array( $t, $tagstack ) &&
2087                                                 ! in_array ( $t , $htmlnest ) ) {
2088                                                         $badtag = 1 ;
2089                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
2090                                                         if ( $t == 'table' ) {
2091                                                                 array_push( $tablestack, $tagstack );
2092                                                                 $tagstack = array();
2093                                                         }
2094                                                         array_push( $tagstack, $t );
2095                                                 }
2096                                                 # Strip non-approved attributes from the tag
2097                                                 $newparams = $this->fixTagAttributes($params);
2098
2099                                         }
2100                                         if ( ! $badtag ) {
2101                                                 $rest = str_replace( '>', '&gt;', $rest );
2102                                                 $text .= "<$slash$t $newparams$brace$rest";
2103                                                 continue;
2104                                         }
2105                                 }
2106                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2107                         }
2108                         # Close off any remaining tags
2109                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2110                                 $text .= "</$t>\n";
2111                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2112                         }
2113                 } else {
2114                         # this might be possible using tidy itself
2115                         foreach ( $bits as $x ) {
2116                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2117                                 $x, $regs );
2118                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2119                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2120                                         $newparams = $this->fixTagAttributes($params);
2121                                         $rest = str_replace( '>', '&gt;', $rest );
2122                                         $text .= "<$slash$t $newparams$brace$rest";
2123                                 } else {
2124                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2125                                 }
2126                         }
2127                 }
2128                 wfProfileOut( $fname );
2129                 return $text;
2130         }
2131
2132         /**
2133          * Remove '<!--', '-->', and everything between.
2134          * To avoid leaving blank lines, when a comment is both preceded
2135          * and followed by a newline (ignoring spaces), trim leading and
2136          * trailing spaces and one of the newlines.
2137          *
2138          * @access private
2139          */
2140         function removeHTMLcomments( $text ) {
2141                 $fname='Parser::removeHTMLcomments';
2142                 wfProfileIn( $fname );
2143                 while (($start = strpos($text, '<!--')) !== false) {
2144                         $end = strpos($text, '-->', $start + 4);
2145                         if ($end === false) {
2146                                 # Unterminated comment; bail out
2147                                 break;
2148                         }
2149
2150                         $end += 3;
2151
2152                         # Trim space and newline if the comment is both
2153                         # preceded and followed by a newline
2154                         $spaceStart = max($start - 1, 0);
2155                         $spaceLen = $end - $spaceStart;
2156                         while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2157                                 $spaceStart--;
2158                                 $spaceLen++;
2159                         }
2160                         while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2161                                 $spaceLen++;
2162                         if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2163                                 # Remove the comment, leading and trailing
2164                                 # spaces, and leave only one newline.
2165                                 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2166                         }
2167                         else {
2168                                 # Remove just the comment.
2169                                 $text = substr_replace($text, '', $start, $end - $start);
2170                         }
2171                 }
2172                 wfProfileOut( $fname );
2173                 return $text;
2174         }
2175
2176         /**
2177          * This function accomplishes several tasks:
2178          * 1) Auto-number headings if that option is enabled
2179          * 2) Add an [edit] link to sections for logged in users who have enabled the option
2180          * 3) Add a Table of contents on the top for users who have enabled the option
2181          * 4) Auto-anchor headings
2182          *
2183          * It loops through all headlines, collects the necessary data, then splits up the
2184          * string and re-inserts the newly formatted headlines.
2185          * @access private
2186          */
2187         /* private */ function formatHeadings( $text, $isMain=true ) {
2188                 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2189
2190                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2191                 $doShowToc = $this->mOptions->getShowToc();
2192                 $forceTocHere = false;
2193                 if( !$this->mTitle->userCanEdit() ) {
2194                         $showEditLink = 0;
2195                         $rightClickHack = 0;
2196                 } else {
2197                         $showEditLink = $this->mOptions->getEditSection();
2198                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2199                 }
2200
2201                 # Inhibit editsection links if requested in the page
2202                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2203                 if( $esw->matchAndRemove( $text ) ) {
2204                         $showEditLink = 0;
2205                 }
2206                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2207                 # do not add TOC
2208                 $mw =& MagicWord::get( MAG_NOTOC );
2209                 if( $mw->matchAndRemove( $text ) ) {
2210                         $doShowToc = 0;
2211                 }
2212
2213                 # never add the TOC to the Main Page. This is an entry page that should not
2214                 # be more than 1-2 screens large anyway
2215                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2216                         $doShowToc = 0;
2217                 }
2218
2219                 # Get all headlines for numbering them and adding funky stuff like [edit]
2220                 # links - this is for later, but we need the number of headlines right now
2221                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2222
2223                 # if there are fewer than 4 headlines in the article, do not show TOC
2224                 if( $numMatches < 4 ) {
2225                         $doShowToc = 0;
2226                 }
2227
2228                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2229                 # override above conditions and always show TOC at that place
2230                 $mw =& MagicWord::get( MAG_TOC );
2231                 if ($mw->match( $text ) ) {
2232                         $doShowToc = 1;
2233                         $forceTocHere = true;
2234                 } else {
2235                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2236                         # override above conditions and always show TOC above first header
2237                         $mw =& MagicWord::get( MAG_FORCETOC );
2238                         if ($mw->matchAndRemove( $text ) ) {
2239                                 $doShowToc = 1;
2240                         }
2241                 }
2242
2243
2244
2245                 # We need this to perform operations on the HTML
2246                 $sk =& $this->mOptions->getSkin();
2247
2248                 # headline counter
2249                 $headlineCount = 0;
2250                 $sectionCount = 0; # headlineCount excluding template sections
2251
2252                 # Ugh .. the TOC should have neat indentation levels which can be
2253                 # passed to the skin functions. These are determined here
2254                 $toclevel = 0;
2255                 $toc = '';
2256                 $full = '';
2257                 $head = array();
2258                 $sublevelCount = array();
2259                 $level = 0;
2260                 $prevlevel = 0;
2261                 foreach( $matches[3] as $headline ) {
2262                         $istemplate = 0;
2263                         $templatetitle = "";
2264                         $templatesection = 0;
2265
2266                         if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2267                                 $istemplate = 1;
2268                                 $templatetitle = base64_decode($mat[1]);
2269                                 $templatesection = 1 + (int)base64_decode($mat[2]);
2270                                 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2271                         }
2272
2273                         $numbering = '';
2274                         if( $level ) {
2275                                 $prevlevel = $level;
2276                         }
2277                         $level = $matches[1][$headlineCount];
2278                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2279                                 # reset when we enter a new level
2280                                 $sublevelCount[$level] = 0;
2281                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2282                                 $toclevel += $level - $prevlevel;
2283                         }
2284                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2285                                 # reset when we step back a level
2286                                 $sublevelCount[$level+1]=0;
2287                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2288                                 $toclevel -= $prevlevel - $level;
2289                         }
2290                         # count number of headlines for each level
2291                         @$sublevelCount[$level]++;
2292                         if( $doNumberHeadings || $doShowToc ) {
2293                                 $dot = 0;
2294                                 for( $i = 1; $i <= $level; $i++ ) {
2295                                         if( !empty( $sublevelCount[$i] ) ) {
2296                                                 if( $dot ) {
2297                                                         $numbering .= '.';
2298                                                 }
2299                                                 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2300                                                 $dot = 1;
2301                                         }
2302                                 }
2303                         }
2304
2305                         # The canonized header is a version of the header text safe to use for links
2306                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2307                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2308                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2309
2310                         # Remove link placeholders by the link text.
2311                         #     <!--LINK number-->
2312                         # turns into
2313                         #     link text with suffix
2314                         $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2315                                                             "\$wgLinkHolders['texts'][\$1]",
2316                                                             $canonized_headline );
2317
2318                         # strip out HTML
2319                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2320                         $tocline = trim( $canonized_headline );
2321                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2322                         $replacearray = array(
2323                                 '%3A' => ':',
2324                                 '%' => '.'
2325                         );
2326                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2327                         $refer[$headlineCount] = $canonized_headline;
2328
2329                         # count how many in assoc. array so we can track dupes in anchors
2330                         @$refers[$canonized_headline]++;
2331                         $refcount[$headlineCount]=$refers[$canonized_headline];
2332
2333                         # Prepend the number to the heading text
2334
2335                         if( $doNumberHeadings || $doShowToc ) {
2336                                 $tocline = $numbering . ' ' . $tocline;
2337
2338                                 # Don't number the heading if it is the only one (looks silly)
2339                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2340                                         # the two are different if the line contains a link
2341                                         $headline=$numbering . ' ' . $headline;
2342                                 }
2343                         }
2344
2345                         # Create the anchor for linking from the TOC to the section
2346                         $anchor = $canonized_headline;
2347                         if($refcount[$headlineCount] > 1 ) {
2348                                 $anchor .= '_' . $refcount[$headlineCount];
2349                         }
2350                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2351                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2352                         }
2353                         if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2354                                 if ( empty( $head[$headlineCount] ) ) {
2355                                         $head[$headlineCount] = '';
2356                                 }
2357                                 if( $istemplate )
2358                                         $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2359                                 else
2360                                         $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2361                         }
2362
2363                         # Add the edit section span
2364                         if( $rightClickHack ) {
2365                                 if( $istemplate )
2366                                         $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2367                                 else
2368                                         $headline = $sk->editSectionScript($sectionCount+1,$headline);
2369                         }
2370
2371                         # give headline the correct <h#> tag
2372                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2373
2374                         $headlineCount++;
2375                         if( !$istemplate )
2376                                 $sectionCount++;
2377                 }
2378
2379                 if( $doShowToc ) {
2380                         $toclines = $headlineCount;
2381                         $toc .= $sk->tocUnindent( $toclevel );
2382                         $toc = $sk->tocTable( $toc );
2383                 }
2384
2385                 # split up and insert constructed headlines
2386
2387                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2388                 $i = 0;
2389
2390                 foreach( $blocks as $block ) {
2391                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2392                                 # This is the [edit] link that appears for the top block of text when
2393                                 # section editing is enabled
2394
2395                                 # Disabled because it broke block formatting
2396                                 # For example, a bullet point in the top line
2397                                 # $full .= $sk->editSectionLink(0);
2398                         }
2399                         $full .= $block;
2400                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2401                         # Top anchor now in skin
2402                                 $full = $full.$toc;
2403                         }
2404
2405                         if( !empty( $head[$i] ) ) {
2406                                 $full .= $head[$i];
2407                         }
2408                         $i++;
2409                 }
2410                 if($forceTocHere) {
2411                         $mw =& MagicWord::get( MAG_TOC );
2412                         return $mw->replace( $toc, $full );
2413                 } else {
2414                         return $full;
2415                 }
2416         }
2417
2418         /**
2419          * Return an HTML link for the "ISBN 123456" text
2420          * @access private
2421          */
2422         function magicISBN( $text ) {
2423                 global $wgLang;
2424                 $fname = 'Parser::magicISBN';
2425                 wfProfileIn( $fname );
2426
2427                 $a = split( 'ISBN ', ' '.$text );
2428                 if ( count ( $a ) < 2 ) {
2429                         wfProfileOut( $fname );
2430                         return $text;
2431                 }
2432                 $text = substr( array_shift( $a ), 1);
2433                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2434
2435                 foreach ( $a as $x ) {
2436                         $isbn = $blank = '' ;
2437                         while ( ' ' == $x{0} ) {
2438                                 $blank .= ' ';
2439                                 $x = substr( $x, 1 );
2440                         }
2441                         if ( $x == '' ) { # blank isbn
2442                                 $text .= "ISBN $blank";
2443                                 continue;
2444                         }
2445                         while ( strstr( $valid, $x{0} ) != false ) {
2446                                 $isbn .= $x{0};
2447                                 $x = substr( $x, 1 );
2448                         }
2449                         $num = str_replace( '-', '', $isbn );
2450                         $num = str_replace( ' ', '', $num );
2451
2452                         if ( '' == $num ) {
2453                                 $text .= "ISBN $blank$x";
2454                         } else {
2455                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2456                                 $text .= '<a href="' .
2457                                 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2458                                         "\" class=\"internal\">ISBN $isbn</a>";
2459                                 $text .= $x;
2460                         }
2461                 }
2462                 wfProfileOut( $fname );
2463                 return $text;
2464         }
2465
2466         /**
2467          * Return an HTML link for the "GEO ..." text
2468          * @access private
2469          */
2470         function magicGEO( $text ) {
2471                 global $wgLang, $wgUseGeoMode;
2472                 $fname = 'Parser::magicGEO';
2473                 wfProfileIn( $fname );
2474
2475                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2476                 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2477                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2478                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2479                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2480                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2481
2482                 $a = split( 'GEO ', ' '.$text );
2483                 if ( count ( $a ) < 2 ) {
2484                         wfProfileOut( $fname );
2485                         return $text;
2486                 }
2487                 $text = substr( array_shift( $a ), 1);
2488                 $valid = '0123456789.+-:';
2489
2490                 foreach ( $a as $x ) {
2491                         $geo = $blank = '' ;
2492                         while ( ' ' == $x{0} ) {
2493                                 $blank .= ' ';
2494                                 $x = substr( $x, 1 );
2495                         }
2496                         while ( strstr( $valid, $x{0} ) != false ) {
2497                                 $geo .= $x{0};
2498                                 $x = substr( $x, 1 );
2499                         }
2500                         $num = str_replace( '+', '', $geo );
2501                         $num = str_replace( ' ', '', $num );
2502
2503                         if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2504                                 $text .= "GEO $blank$x";
2505                         } else {
2506                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2507                                 $text .= '<a href="' .
2508                                 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2509                                         "\" class=\"internal\">GEO $geo</a>";
2510                                 $text .= $x;
2511                         }
2512                 }
2513                 wfProfileOut( $fname );
2514                 return $text;
2515         }
2516
2517         /**
2518          * Return an HTML link for the "RFC 1234" text
2519          * @access private
2520          * @param string $text text to be processed
2521          */
2522         function magicRFC( $text ) {
2523                 global $wgLang;
2524
2525                 $valid = '0123456789';
2526                 $internal = false;
2527
2528                 $a = split( 'RFC ', ' '.$text );
2529                 if ( count ( $a ) < 2 ) return $text;
2530                 $text = substr( array_shift( $a ), 1);
2531
2532                 /* Check if RFC keyword is preceed by [[.
2533                  * This test is made here cause of the array_shift above
2534                  * that prevent the test to be done in the foreach.
2535                  */
2536                 if(substr($text, -2) == '[[') { $internal = true; }
2537
2538                 foreach ( $a as $x ) {
2539                         /* token might be empty if we have RFC RFC 1234 */
2540                         if($x=='') {
2541                                 $text.='RFC ';
2542                                 continue;
2543                                 }
2544
2545                         $rfc = $blank = '' ;
2546
2547                         /** remove and save whitespaces in $blank */
2548                         while ( $x{0} == ' ' ) {
2549                                 $blank .= ' ';
2550                                 $x = substr( $x, 1 );
2551                         }
2552
2553                         /** remove and save the rfc number in $rfc */
2554                         while ( strstr( $valid, $x{0} ) != false ) {
2555                                 $rfc .= $x{0};
2556                                 $x = substr( $x, 1 );
2557                         }
2558
2559                         if ( $rfc == '') {
2560                                 /* call back stripped spaces*/
2561                                 $text .= "RFC $blank$x";
2562                         } elseif( $internal) {
2563                                 /* normal link */
2564                                 $text .= "RFC $rfc$x";
2565                         } else {
2566                                 /* build the external link*/
2567                                 $url = wfmsg( 'rfcurl' );
2568                                 $url = str_replace( '$1', $rfc, $url);
2569                                 $sk =& $this->mOptions->getSkin();
2570                                 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2571                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2572                         }
2573
2574                         /* Check if the next RFC keyword is preceed by [[ */
2575                         $internal = (substr($x,-2) == '[[');
2576                 }
2577                 return $text;
2578         }
2579
2580         /**
2581          * Transform wiki markup when saving a page by doing \r\n -> \n
2582          * conversion, substitting signatures, {{subst:}} templates, etc.
2583          *
2584          * @param string $text the text to transform
2585          * @param Title &$title the Title object for the current article
2586          * @param User &$user the User object describing the current user
2587          * @param ParserOptions $options parsing options
2588          * @param bool $clearState whether to clear the parser state first
2589          * @return string the altered wiki markup
2590          * @access public
2591          */
2592         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2593                 $this->mOptions = $options;
2594                 $this->mTitle =& $title;
2595                 $this->mOutputType = OT_WIKI;
2596
2597                 if ( $clearState ) {
2598                         $this->clearState();
2599                 }
2600
2601                 $stripState = false;
2602                 $pairs = array(
2603                         "\r\n" => "\n",
2604                         );
2605                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2606                 // now with regexes
2607                 /*
2608                 $pairs = array(
2609                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2610                         "/<br *?>/i" => "<br />",
2611                 );
2612                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2613                 */
2614                 $text = $this->strip( $text, $stripState, false );
2615                 $text = $this->pstPass2( $text, $user );
2616                 $text = $this->unstrip( $text, $stripState );
2617                 $text = $this->unstripNoWiki( $text, $stripState );
2618                 return $text;
2619         }
2620
2621         /**
2622          * Pre-save transform helper function
2623          * @access private
2624          */
2625         function pstPass2( $text, &$user ) {
2626                 global $wgLang, $wgContLang, $wgLocaltimezone, $wgCurParser;
2627
2628                 # Variable replacement
2629                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2630                 $text = $this->replaceVariables( $text );
2631
2632                 # Signatures
2633                 #
2634                 $n = $user->getName();
2635                 $k = $user->getOption( 'nickname' );
2636                 if ( '' == $k ) { $k = $n; }
2637                 if(isset($wgLocaltimezone)) {
2638                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2639                 }
2640                 /* Note: this is an ugly timezone hack for the European wikis */
2641                 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2642                   ' (' . date( 'T' ) . ')';
2643                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2644
2645                 $text = preg_replace( '/~~~~~/', $d, $text );
2646                 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2647                 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2648
2649                 # Context links: [[|name]] and [[name (context)|]]
2650                 #
2651                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2652                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2653                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2654                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2655
2656                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2657                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2658                 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/";                # [[namespace:page|]] and [[:namespace:page|]]
2659                 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2660                 $context = '';
2661                 $t = $this->mTitle->getText();
2662                 if ( preg_match( $conpat, $t, $m ) ) {
2663                         $context = $m[2];
2664                 }
2665                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2666                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2667                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2668
2669                 if ( '' == $context ) {
2670                         $text = preg_replace( $p2, '[[\\1]]', $text );
2671                 } else {
2672                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2673                 }
2674
2675                 # Trim trailing whitespace
2676                 # MAG_END (__END__) tag allows for trailing
2677                 # whitespace to be deliberately included
2678                 $text = rtrim( $text );
2679                 $mw =& MagicWord::get( MAG_END );
2680                 $mw->matchAndRemove( $text );
2681
2682                 return $text;
2683         }
2684
2685         /**
2686          * Set up some variables which are usually set up in parse()
2687          * so that an external function can call some class members with confidence
2688          * @access public
2689          */
2690         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2691                 $this->mTitle =& $title;
2692                 $this->mOptions = $options;
2693                 $this->mOutputType = $outputType;
2694                 if ( $clearState ) {
2695                         $this->clearState();
2696                 }
2697         }
2698
2699         /**
2700          * Transform a MediaWiki message by replacing magic variables.
2701          *
2702          * @param string $text the text to transform
2703          * @param ParserOptions $options  options
2704          * @return string the text with variables substituted
2705          * @access public
2706          */
2707         function transformMsg( $text, $options ) {
2708                 global $wgTitle;
2709                 static $executing = false;
2710
2711                 # Guard against infinite recursion
2712                 if ( $executing ) {
2713                         return $text;
2714                 }
2715                 $executing = true;
2716
2717                 $this->mTitle = $wgTitle;
2718                 $this->mOptions = $options;
2719                 $this->mOutputType = OT_MSG;
2720                 $this->clearState();
2721                 $text = $this->replaceVariables( $text );
2722
2723                 $executing = false;
2724                 return $text;
2725         }
2726
2727         /**
2728          * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2729          * Callback will be called with the text within
2730          * Transform and return the text within
2731          * @access public
2732          */
2733         function setHook( $tag, $callback ) {
2734                 $oldVal = @$this->mTagHooks[$tag];
2735                 $this->mTagHooks[$tag] = $callback;
2736                 return $oldVal;
2737         }
2738 }
2739
2740 /**
2741  * @todo document
2742  * @package MediaWiki
2743  */
2744 class ParserOutput
2745 {
2746         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2747         var $mCacheTime; # Used in ParserCache
2748
2749         function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2750                 $containsOldMagic = false )
2751         {
2752                 $this->mText = $text;
2753                 $this->mLanguageLinks = $languageLinks;
2754                 $this->mCategoryLinks = $categoryLinks;
2755                 $this->mContainsOldMagic = $containsOldMagic;
2756                 $this->mCacheTime = '';
2757         }
2758
2759         function getText() { return $this->mText; }
2760         function getLanguageLinks() { return $this->mLanguageLinks; }
2761         function getCategoryLinks() { return $this->mCategoryLinks; }
2762         function getCacheTime() { return $this->mCacheTime; }
2763         function containsOldMagic() { return $this->mContainsOldMagic; }
2764         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2765         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2766         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2767         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2768         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2769
2770         function merge( $other ) {
2771                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2772                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2773                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2774         }
2775
2776 }
2777
2778 /**
2779  * Set options of the Parser
2780  * @todo document
2781  * @package MediaWiki
2782  */
2783 class ParserOptions
2784 {
2785         # All variables are private
2786         var $mUseTeX;                    # Use texvc to expand <math> tags
2787         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2788         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2789         var $mAllowExternalImages;       # Allow external images inline
2790         var $mSkin;                      # Reference to the preferred skin
2791         var $mDateFormat;                # Date format index
2792         var $mEditSection;               # Create "edit section" links
2793         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2794         var $mNumberHeadings;            # Automatically number headings
2795         var $mShowToc;                   # Show table of contents
2796
2797         function getUseTeX()                        { return $this->mUseTeX; }
2798         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2799         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2800         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2801         function getSkin()                          { return $this->mSkin; }
2802         function getDateFormat()                    { return $this->mDateFormat; }
2803         function getEditSection()                   { return $this->mEditSection; }
2804         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2805         function getNumberHeadings()                { return $this->mNumberHeadings; }
2806         function getShowToc()                       { return $this->mShowToc; }
2807
2808         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2809         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2810         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2811         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2812         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2813         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2814         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2815         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2816         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2817
2818         function setSkin( &$x ) { $this->mSkin =& $x; }
2819
2820         # Get parser options
2821         /* static */ function newFromUser( &$user ) {
2822                 $popts = new ParserOptions;
2823                 $popts->initialiseFromUser( $user );
2824                 return $popts;
2825         }
2826
2827         # Get user options
2828         function initialiseFromUser( &$userInput ) {
2829                 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2830
2831                 $fname = 'ParserOptions::initialiseFromUser';
2832                 wfProfileIn( $fname );
2833                 if ( !$userInput ) {
2834                         $user = new User;
2835                         $user->setLoaded( true );
2836                 } else {
2837                         $user =& $userInput;
2838                 }
2839
2840                 $this->mUseTeX = $wgUseTeX;
2841                 $this->mUseDynamicDates = $wgUseDynamicDates;
2842                 $this->mInterwikiMagic = $wgInterwikiMagic;
2843                 $this->mAllowExternalImages = $wgAllowExternalImages;
2844                 wfProfileIn( $fname.'-skin' );
2845                 $this->mSkin =& $user->getSkin();
2846                 wfProfileOut( $fname.'-skin' );
2847                 $this->mDateFormat = $user->getOption( 'date' );
2848                 $this->mEditSection = $user->getOption( 'editsection' );
2849                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2850                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2851                 $this->mShowToc = $user->getOption( 'showtoc' );
2852                 wfProfileOut( $fname );
2853         }
2854
2855
2856 }
2857
2858 # Regex callbacks, used in Parser::replaceVariables
2859 function wfBraceSubstitution( $matches ) {
2860         global $wgCurParser;
2861         return $wgCurParser->braceSubstitution( $matches );
2862 }
2863
2864 function wfArgSubstitution( $matches ) {
2865         global $wgCurParser;
2866         return $wgCurParser->argSubstitution( $matches );
2867 }
2868
2869 function wfVariableSubstitution( $matches ) {
2870         global $wgCurParser;
2871         return $wgCurParser->variableSubstitution( $matches );
2872 }
2873
2874 /**
2875  * Return the total number of articles
2876  */
2877 function wfNumberOfArticles() {
2878         global $wgNumberOfArticles;
2879
2880         wfLoadSiteStats();
2881         return $wgNumberOfArticles;
2882 }
2883
2884 /**
2885  * Get various statistics from the database
2886  * @private
2887  */
2888 function wfLoadSiteStats() {
2889         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2890         $fname = 'wfLoadSiteStats';
2891
2892         if ( -1 != $wgNumberOfArticles ) return;
2893         $dbr =& wfGetDB( DB_SLAVE );
2894         $s = $dbr->getArray( 'site_stats',
2895                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2896                 array( 'ss_row_id' => 1 ), $fname
2897         );
2898
2899         if ( $s === false ) {
2900                 return;
2901         } else {
2902                 $wgTotalViews = $s->ss_total_views;
2903                 $wgTotalEdits = $s->ss_total_edits;
2904                 $wgNumberOfArticles = $s->ss_good_articles;
2905         }
2906 }
2907
2908 function wfEscapeHTMLTagsOnly( $in ) {
2909         return str_replace(
2910                 array( '"', '>', '<' ),
2911                 array( '&quot;', '&gt;', '&lt;' ),
2912                 $in );
2913 }
2914
2915 ?>