includes/Parser.php

   1 <?php
   2
   3 /**
   4  * File for Parser and related classes
   5  *
   6  * @package MediaWiki
   7  * @version $Id$
   8  */
   9
  10 /**
  11  * Update this version number when the ParserOutput format
  12  * changes in an incompatible way, so the parser cache
  13  * can automatically discard old data.
  14  */
  15 define( 'MW_PARSER_VERSION', '1.4.0' );
  16
  17 /**
  18  * Variable substitution O(N^2) attack
  19  *
  20  * Without countermeasures, it would be possible to attack the parser by saving
  21  * a page filled with a large number of inclusions of large pages. The size of
  22  * the generated page would be proportional to the square of the input size.
  23  * Hence, we limit the number of inclusions of any given page, thus bringing any
  24  * attack back to O(N).
  25  */
  26
  27 define( 'MAX_INCLUDE_REPEAT', 100 );
  28 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
  29
  30 define( 'RLH_FOR_UPDATE', 1 );
  31
  32 # Allowed values for $mOutputType
  33 define( 'OT_HTML', 1 );
  34 define( 'OT_WIKI', 2 );
  35 define( 'OT_MSG' , 3 );
  36
  37 # string parameter for extractTags which will cause it
  38 # to strip HTML comments in addition to regular
  39 # <XML>-style tags. This should not be anything we
  40 # may want to use in wikisyntax
  41 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  42
  43 # prefix for escaping, used in two functions at least
  44 define( 'UNIQ_PREFIX', 'NaodW29');
  45
  46 # Constants needed for external link processing
  47 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  48 define( 'HTTP_PROTOCOLS', 'http|https' );
  49 # Everything except bracket, space, or control characters
  50 define( 'EXT_LINK_URL_CLASS', '[^]<>"\\x00-\\x20\\x7F]' );
  51 # Including space
  52 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  53 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  54 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  55 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  56 define( 'EXT_IMAGE_REGEX',
  57         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  58         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  59         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  60 );
  61
  62 /**
  63  * PHP Parser
  64  *
  65  * Processes wiki markup
  66  *
  67  * <pre>
  68  * There are three main entry points into the Parser class:
  69  * parse()
  70  *   produces HTML output
  71  * preSaveTransform().
  72  *   produces altered wiki markup.
  73  * transformMsg()
  74  *   performs brace substitution on MediaWiki messages
  75  *
  76  * Globals used:
  77  *    objects:   $wgLang, $wgDateFormatter, $wgLinkCache
  78  *
  79  * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  80  *
  81  * settings:
  82  *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  83  *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  84  *  $wgLocaltimezone
  85  *
  86  *  * only within ParserOptions
  87  * </pre>
  88  *
  89  * @package MediaWiki
  90  */
  91 class Parser
  92 {
  93         /**#@+
  94          * @access private
  95          */
  96         # Persistent:
  97         var $mTagHooks;
  98
  99         # Cleared with clearState():
 100         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
 101         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
 102
 103         # Temporary:
 104         var $mOptions, $mTitle, $mOutputType,
 105             $mTemplates,        // cache of already loaded templates, avoids
 106                                 // multiple SQL queries for the same string
 107             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
 108                                 // in this path. Used for loop detection.
 109
 110         /**#@-*/
 111
 112         /**
 113          * Constructor
 114          *
 115          * @access public
 116          */
 117         function Parser() {
 118                 $this->mTemplates = array();
 119                 $this->mTemplatePath = array();
 120                 $this->mTagHooks = array();
 121                 $this->clearState();
 122         }
 123
 124         /**
 125          * Clear Parser state
 126          *
 127          * @access private
 128          */
 129         function clearState() {
 130                 $this->mOutput = new ParserOutput;
 131                 $this->mAutonumber = 0;
 132                 $this->mLastSection = "";
 133                 $this->mDTopen = false;
 134                 $this->mVariables = false;
 135                 $this->mIncludeCount = array();
 136                 $this->mStripState = array();
 137                 $this->mArgStack = array();
 138                 $this->mInPre = false;
 139         }
 140
 141         /**
 142          * First pass--just handle <nowiki> sections, pass the rest off
 143          * to internalParse() which does all the real work.
 144          *
 145          * @access private
 146          * @return ParserOutput a ParserOutput
 147          */
 148         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 149                 global $wgUseTidy, $wgContLang;
 150                 $fname = 'Parser::parse';
 151                 wfProfileIn( $fname );
 152
 153                 if ( $clearState ) {
 154                         $this->clearState();
 155                 }
 156
 157                 $this->mOptions = $options;
 158                 $this->mTitle =& $title;
 159                 $this->mOutputType = OT_HTML;
 160
 161                 $stripState = NULL;
 162                 $text = $this->strip( $text, $this->mStripState );
 163
 164                 $text = $this->internalParse( $text, $linestart );
 165                 $text = $this->unstrip( $text, $this->mStripState );
 166                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 167                 if(!$wgUseTidy) {
 168                         $fixtags = array(
 169                                 # french spaces, last one Guillemet-left
 170                                 # only if there is something before the space
 171                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 172                                 # french spaces, Guillemet-right
 173                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 174                                 '/<hr *>/i' => '<hr />',
 175                                 '/<br *>/i' => '<br />',
 176                                 '/<center *>/i' => '<div class="center">',
 177                                 '/<\\/center *>/i' => '</div>',
 178                                 # Clean up spare ampersands; note that we probably ought to be
 179                                 # more careful about named entities.
 180                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 181                         );
 182                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 183                 } else {
 184                         $fixtags = array(
 185                                 # french spaces, last one Guillemet-left
 186                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 187                                 # french spaces, Guillemet-right
 188                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 189                                 '/<center *>/i' => '<div class="center">',
 190                                 '/<\\/center *>/i' => '</div>'
 191                         );
 192                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 193                 }
 194                 # only once and last
 195                 $text = $this->doBlockLevels( $text, $linestart );
 196
 197                 $this->replaceLinkHolders( $text );
 198                 $text = $wgContLang->convert($text);
 199
 200                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 201                 global $wgUseTidy;
 202                 if ($wgUseTidy) {
 203                         $text = Parser::tidy($text);
 204                 }
 205
 206                 $this->mOutput->setText( $text );
 207                 wfProfileOut( $fname );
 208                 return $this->mOutput;
 209         }
 210
 211         /**
 212          * Get a random string
 213          *
 214          * @access private
 215          * @static
 216          */
 217         function getRandomString() {
 218                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 219         }
 220
 221         /**
 222          * Replaces all occurrences of <$tag>content</$tag> in the text
 223          * with a random marker and returns the new text. the output parameter
 224          * $content will be an associative array filled with data on the form
 225          * $unique_marker => content.
 226          *
 227          * If $content is already set, the additional entries will be appended
 228          * If $tag is set to STRIP_COMMENTS, the function will extract
 229          * <!-- HTML comments -->
 230          *
 231          * @access private
 232          * @static
 233          */
 234         function extractTags($tag, $text, &$content, $uniq_prefix = ''){
 235                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 236                 if ( !$content ) {
 237                         $content = array( );
 238                 }
 239                 $n = 1;
 240                 $stripped = '';
 241
 242                 while ( '' != $text ) {
 243                         if($tag==STRIP_COMMENTS) {
 244                                 $p = preg_split( '/<!--/i', $text, 2 );
 245                         } else {
 246                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 247                         }
 248                         $stripped .= $p[0];
 249                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 250                                 $text = '';
 251                         } else {
 252                                 if($tag==STRIP_COMMENTS) {
 253                                         $q = preg_split( '/-->/i', $p[1], 2 );
 254                                 } else {
 255                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 256                                 }
 257                                 $marker = $rnd . sprintf('%08X', $n++);
 258                                 $content[$marker] = $q[0];
 259                                 $stripped .= $marker;
 260                                 $text = $q[1];
 261                         }
 262                 }
 263                 return $stripped;
 264         }
 265
 266         /**
 267          * Strips and renders nowiki, pre, math, hiero
 268          * If $render is set, performs necessary rendering operations on plugins
 269          * Returns the text, and fills an array with data needed in unstrip()
 270          * If the $state is already a valid strip state, it adds to the state
 271          *
 272          * @param bool $stripcomments when set, HTML comments <!-- like this -->
 273          *  will be stripped in addition to other tags. This is important
 274          *  for section editing, where these comments cause confusion when
 275          *  counting the sections in the wikisource
 276          *
 277          * @access private
 278          */
 279         function strip( $text, &$state, $stripcomments = false ) {
 280                 $render = ($this->mOutputType == OT_HTML);
 281                 $html_content = array();
 282                 $nowiki_content = array();
 283                 $math_content = array();
 284                 $pre_content = array();
 285                 $comment_content = array();
 286                 $ext_content = array();
 287                 $gallery_content = array();
 288
 289                 # Replace any instances of the placeholders
 290                 $uniq_prefix = UNIQ_PREFIX;
 291                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 292
 293                 # html
 294                 global $wgRawHtml, $wgWhitelistEdit;
 295                 if( $wgRawHtml && $wgWhitelistEdit ) {
 296                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 297                         foreach( $html_content as $marker => $content ) {
 298                                 if ($render ) {
 299                                         # Raw and unchecked for validity.
 300                                         $html_content[$marker] = $content;
 301                                 } else {
 302                                         $html_content[$marker] = '<html>'.$content.'</html>';
 303                                 }
 304                         }
 305                 }
 306
 307                 # nowiki
 308                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 309                 foreach( $nowiki_content as $marker => $content ) {
 310                         if( $render ){
 311                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 312                         } else {
 313                                 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
 314                         }
 315                 }
 316
 317                 # math
 318                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 319                 foreach( $math_content as $marker => $content ){
 320                         if( $render ) {
 321                                 if( $this->mOptions->getUseTeX() ) {
 322                                         $math_content[$marker] = renderMath( $content );
 323                                 } else {
 324                                         $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
 325                                 }
 326                         } else {
 327                                 $math_content[$marker] = '<math>'.$content.'</math>';
 328                         }
 329                 }
 330
 331                 # pre
 332                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 333                 foreach( $pre_content as $marker => $content ){
 334                         if( $render ){
 335                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 336                         } else {
 337                                 $pre_content[$marker] = '<pre>'.$content.'</pre>';
 338                         }
 339                 }
 340
 341                 # gallery
 342                 $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix);
 343                 foreach( $gallery_content as $marker => $content ) {
 344                         require_once( 'ImageGallery.php' );
 345                         if ( $render ) {
 346                                 $gallery_content[$marker] = Parser::renderImageGallery( $content );
 347                         } else {
 348                                 $gallery_content[$marker] = '<gallery>'.$content.'</gallery>';
 349                         }
 350                 }
 351
 352                 # Comments
 353                 if($stripcomments) {
 354                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 355                         foreach( $comment_content as $marker => $content ){
 356                                 $comment_content[$marker] = '<!--'.$content.'-->';
 357                         }
 358                 }
 359
 360                 # Extensions
 361                 foreach ( $this->mTagHooks as $tag => $callback ) {
 362                         $ext_contents[$tag] = array();
 363                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 364                         foreach( $ext_content[$tag] as $marker => $content ) {
 365                                 if ( $render ) {
 366                                         $ext_content[$tag][$marker] = $callback( $content );
 367                                 } else {
 368                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 369                                 }
 370                         }
 371                 }
 372
 373                 # Merge state with the pre-existing state, if there is one
 374                 if ( $state ) {
 375                         $state['html'] = $state['html'] + $html_content;
 376                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 377                         $state['math'] = $state['math'] + $math_content;
 378                         $state['pre'] = $state['pre'] + $pre_content;
 379                         $state['comment'] = $state['comment'] + $comment_content;
 380                         $state['gallery'] = $state['gallery'] + $gallery_content;
 381
 382                         foreach( $ext_content as $tag => $array ) {
 383                                 if ( array_key_exists( $tag, $state ) ) {
 384                                         $state[$tag] = $state[$tag] + $array;
 385                                 }
 386                         }
 387                 } else {
 388                         $state = array(
 389                           'html' => $html_content,
 390                           'nowiki' => $nowiki_content,
 391                           'math' => $math_content,
 392                           'pre' => $pre_content,
 393                           'comment' => $comment_content,
 394                           'gallery' => $gallery_content,
 395                         ) + $ext_content;
 396                 }
 397                 return $text;
 398         }
 399
 400         /**
 401          * restores pre, math, and hiero removed by strip()
 402          *
 403          * always call unstripNoWiki() after this one
 404          * @access private
 405          */
 406         function unstrip( $text, &$state ) {
 407                 # Must expand in reverse order, otherwise nested tags will be corrupted
 408                 $contentDict = end( $state );
 409                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 410                         if( key($state) != 'nowiki' && key($state) != 'html') {
 411                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 412                                         $text = str_replace( key( $contentDict ), $content, $text );
 413                                 }
 414                         }
 415                 }
 416
 417                 return $text;
 418         }
 419
 420         /**
 421          * always call this after unstrip() to preserve the order
 422          *
 423          * @access private
 424          */
 425         function unstripNoWiki( $text, &$state ) {
 426                 # Must expand in reverse order, otherwise nested tags will be corrupted
 427                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 428                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 429                 }
 430
 431                 global $wgRawHtml;
 432                 if ($wgRawHtml) {
 433                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 434                                 $text = str_replace( key( $state['html'] ), $content, $text );
 435                         }
 436                 }
 437
 438                 return $text;
 439         }
 440
 441         /**
 442          * Add an item to the strip state
 443          * Returns the unique tag which must be inserted into the stripped text
 444          * The tag will be replaced with the original text in unstrip()
 445          *
 446          * @access private
 447          */
 448         function insertStripItem( $text, &$state ) {
 449                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 450                 if ( !$state ) {
 451                         $state = array(
 452                           'html' => array(),
 453                           'nowiki' => array(),
 454                           'math' => array(),
 455                           'pre' => array()
 456                         );
 457                 }
 458                 $state['item'][$rnd] = $text;
 459                 return $rnd;
 460         }
 461
 462         /**
 463          * Return allowed HTML attributes
 464          *
 465          * @access private
 466          */
 467         function getHTMLattrs () {
 468                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 469                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 470                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 471                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 472                                 /* FONT */ 'type', 'start', 'value', 'compact',
 473                                 /* For various lists, mostly deprecated but safe */
 474                                 'summary', 'width', 'border', 'frame', 'rules',
 475                                 'cellspacing', 'cellpadding', 'valign', 'char',
 476                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 477                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 478                                 'id', 'class', 'name', 'style' /* For CSS */
 479                                 );
 480                 return $htmlattrs ;
 481         }
 482
 483         /**
 484          * Remove non approved attributes and javascript in css
 485          *
 486          * @access private
 487          */
 488         function fixTagAttributes ( $t ) {
 489                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 490                 $htmlattrs = $this->getHTMLattrs() ;
 491
 492                 # Strip non-approved attributes from the tag
 493                 $t = preg_replace(
 494                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 495                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 496                         $t);
 497
 498                 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
 499
 500                 # Strip javascript "expression" from stylesheets. Brute force approach:
 501                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 502
 503                 if( preg_match(
 504                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 505                         wfMungeToUtf8( $t ) ) )
 506                 {
 507                         $t='';
 508                 }
 509
 510                 return trim ( $t ) ;
 511         }
 512
 513         /**
 514          * interface with html tidy, used if $wgUseTidy = true
 515          *
 516          * @access public
 517          * @static
 518          */
 519         function tidy ( $text ) {
 520                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 521                 global $wgInputEncoding, $wgOutputEncoding;
 522                 $fname = 'Parser::tidy';
 523                 wfProfileIn( $fname );
 524
 525                 $cleansource = '';
 526                 $opts = '';
 527                 switch(strtoupper($wgOutputEncoding)) {
 528                         case 'ISO-8859-1':
 529                                 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 530                                 break;
 531                         case 'UTF-8':
 532                                 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 533                                 break;
 534                         default:
 535                                 $opts .= ' -raw';
 536                         }
 537
 538                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 539 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 540 '<head><title>test</title></head><body>'.$text.'</body></html>';
 541                 $descriptorspec = array(
 542                         0 => array('pipe', 'r'),
 543                         1 => array('pipe', 'w'),
 544                         2 => array('file', '/dev/null', 'a')
 545                 );
 546                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
 547                 if (is_resource($process)) {
 548                         fwrite($pipes[0], $wrappedtext);
 549                         fclose($pipes[0]);
 550                         while (!feof($pipes[1])) {
 551                                 $cleansource .= fgets($pipes[1], 1024);
 552                         }
 553                         fclose($pipes[1]);
 554                         $return_value = proc_close($process);
 555                 }
 556
 557                 wfProfileOut( $fname );
 558
 559                 if( $cleansource == '' && $text != '') {
 560                         wfDebug( "Tidy error detected!\n" );
 561                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 562                 } else {
 563                         return $cleansource;
 564                 }
 565         }
 566
 567         /**
 568          * parse the wiki syntax used to render tables
 569          *
 570          * @access private
 571          */
 572         function doTableStuff ( $t ) {
 573                 $fname = 'Parser::doTableStuff';
 574                 wfProfileIn( $fname );
 575
 576                 $t = explode ( "\n" , $t ) ;
 577                 $td = array () ; # Is currently a td tag open?
 578                 $ltd = array () ; # Was it TD or TH?
 579                 $tr = array () ; # Is currently a tr tag open?
 580                 $ltr = array () ; # tr attributes
 581                 $indent_level = 0; # indent level of the table
 582                 foreach ( $t AS $k => $x )
 583                 {
 584                         $x = trim ( $x ) ;
 585                         $fc = substr ( $x , 0 , 1 ) ;
 586                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 587                                 $indent_level = strlen( $matches[1] );
 588                                 $t[$k] = "\n" .
 589                                         str_repeat( '<dl><dd>', $indent_level ) .
 590                                         '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 591                                 array_push ( $td , false ) ;
 592                                 array_push ( $ltd , '' ) ;
 593                                 array_push ( $tr , false ) ;
 594                                 array_push ( $ltr , '' ) ;
 595                         }
 596                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 597                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 598                                 $z = "</table>\n" ;
 599                                 $l = array_pop ( $ltd ) ;
 600                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 601                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 602                                 array_pop ( $ltr ) ;
 603                                 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
 604                         }
 605                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 606                                 $x = substr ( $x , 1 ) ;
 607                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 608                                 $z = '' ;
 609                                 $l = array_pop ( $ltd ) ;
 610                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 611                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 612                                 array_pop ( $ltr ) ;
 613                                 $t[$k] = $z ;
 614                                 array_push ( $tr , false ) ;
 615                                 array_push ( $td , false ) ;
 616                                 array_push ( $ltd , '' ) ;
 617                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 618                         }
 619                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 620                                 # $x is a table row
 621                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 622                                         $fc = '+' ;
 623                                         $x = substr ( $x , 1 ) ;
 624                                 }
 625                                 $after = substr ( $x , 1 ) ;
 626                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 627                                 $after = explode ( '||' , $after ) ;
 628                                 $t[$k] = '' ;
 629
 630                                 # Loop through each table cell
 631                                 foreach ( $after AS $theline )
 632                                 {
 633                                         $z = '' ;
 634                                         if ( $fc != '+' )
 635                                         {
 636                                                 $tra = array_pop ( $ltr ) ;
 637                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 638                                                 array_push ( $tr , true ) ;
 639                                                 array_push ( $ltr , '' ) ;
 640                                         }
 641
 642                                         $l = array_pop ( $ltd ) ;
 643                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 644                                         if ( $fc == '|' ) $l = 'td' ;
 645                                         else if ( $fc == '!' ) $l = 'th' ;
 646                                         else if ( $fc == '+' ) $l = 'caption' ;
 647                                         else $l = '' ;
 648                                         array_push ( $ltd , $l ) ;
 649
 650                                         # Cell parameters
 651                                         $y = explode ( '|' , $theline , 2 ) ;
 652                                         # Note that a '|' inside an invalid link should not
 653                                         # be mistaken as delimiting cell parameters
 654                                         if ( strpos( $y[0], '[[' ) !== false ) {
 655                                                 $y = array ($theline);
 656                                         }
 657                                         if ( count ( $y ) == 1 )
 658                                                 $y = "{$z}<{$l}>{$y[0]}" ;
 659                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 660                                         $t[$k] .= $y ;
 661                                         array_push ( $td , true ) ;
 662                                 }
 663                         }
 664                 }
 665
 666                 # Closing open td, tr && table
 667                 while ( count ( $td ) > 0 )
 668                 {
 669                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 670                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 671                         $t[] = '</table>' ;
 672                 }
 673
 674                 $t = implode ( "\n" , $t ) ;
 675                 #               $t = $this->removeHTMLtags( $t );
 676                 wfProfileOut( $fname );
 677                 return $t ;
 678         }
 679
 680         /**
 681          * Helper function for parse() that transforms wiki markup into
 682          * HTML. Only called for $mOutputType == OT_HTML.
 683          *
 684          * @access private
 685          */
 686         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 687                 global $wgContLang;
 688
 689                 $fname = 'Parser::internalParse';
 690                 wfProfileIn( $fname );
 691
 692                 $text = $this->removeHTMLtags( $text );
 693                 $text = $this->replaceVariables( $text, $args );
 694
 695                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 696
 697                 $text = $this->doHeadings( $text );
 698                 if($this->mOptions->getUseDynamicDates()) {
 699                         global $wgDateFormatter;
 700                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 701                 }
 702                 $text = $this->doAllQuotes( $text );
 703                 $text = $this->replaceInternalLinks ( $text );
 704                 $text = $this->replaceExternalLinks( $text );
 705
 706                 # replaceInternalLinks may sometimes leave behind
 707                 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
 708                 $text = str_replace("http-noparse://","http://",$text);
 709
 710                 $text = $this->doMagicLinks( $text );
 711                 $text = $this->doTableStuff( $text );
 712                 $text = $this->formatHeadings( $text, $isMain );
 713                 $sk =& $this->mOptions->getSkin();
 714                 $text = $sk->transformContent( $text );
 715
 716                 wfProfileOut( $fname );
 717                 return $text;
 718         }
 719
 720         /**
 721          * Replace special strings like "ISBN xxx" and "RFC xxx" with
 722          * magic external links.
 723          *
 724          * @access private
 725          */
 726         function &doMagicLinks( &$text ) {
 727                 global $wgUseGeoMode;
 728                 $text = $this->magicISBN( $text );
 729                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 730                         $text = $this->magicGEO( $text );
 731                 }
 732                 $text = $this->magicRFC( $text, 'RFC ', 'rfcurl' );
 733                 $text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' );
 734                 return $text;
 735         }
 736
 737         /**
 738          * Parse ^^ tokens and return html
 739          *
 740          * @access private
 741          */
 742         function doExponent( $text ) {
 743                 $fname = 'Parser::doExponent';
 744                 wfProfileIn( $fname );
 745                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 746                 wfProfileOut( $fname );
 747                 return $text;
 748         }
 749
 750         /**
 751          * Parse headers and return html
 752          *
 753          * @access private
 754          */
 755         function doHeadings( $text ) {
 756                 $fname = 'Parser::doHeadings';
 757                 wfProfileIn( $fname );
 758                 for ( $i = 6; $i >= 1; --$i ) {
 759                         $h = substr( '======', 0, $i );
 760                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 761                           "<h{$i}>\\1</h{$i}>\\2", $text );
 762                 }
 763                 wfProfileOut( $fname );
 764                 return $text;
 765         }
 766
 767         /**
 768          * Replace single quotes with HTML markup
 769          * @access private
 770          * @return string the altered text
 771          */
 772         function doAllQuotes( $text ) {
 773                 $fname = 'Parser::doAllQuotes';
 774                 wfProfileIn( $fname );
 775                 $outtext = '';
 776                 $lines = explode( "\n", $text );
 777                 foreach ( $lines as $line ) {
 778                         $outtext .= $this->doQuotes ( $line ) . "\n";
 779                 }
 780                 $outtext = substr($outtext, 0,-1);
 781                 wfProfileOut( $fname );
 782                 return $outtext;
 783         }
 784
 785         /**
 786          * Helper function for doAllQuotes()
 787          * @access private
 788          */
 789         function doQuotes( $text ) {
 790                 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 791                 if ( count( $arr ) == 1 )
 792                         return $text;
 793                 else
 794                 {
 795                         # First, do some preliminary work. This may shift some apostrophes from
 796                         # being mark-up to being text. It also counts the number of occurrences
 797                         # of bold and italics mark-ups.
 798                         $i = 0;
 799                         $numbold = 0;
 800                         $numitalics = 0;
 801                         foreach ( $arr as $r )
 802                         {
 803                                 if ( ( $i % 2 ) == 1 )
 804                                 {
 805                                         # If there are ever four apostrophes, assume the first is supposed to
 806                                         # be text, and the remaining three constitute mark-up for bold text.
 807                                         if ( strlen( $arr[$i] ) == 4 )
 808                                         {
 809                                                 $arr[$i-1] .= "'";
 810                                                 $arr[$i] = "'''";
 811                                         }
 812                                         # If there are more than 5 apostrophes in a row, assume they're all
 813                                         # text except for the last 5.
 814                                         else if ( strlen( $arr[$i] ) > 5 )
 815                                         {
 816                                                 $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
 817                                                 $arr[$i] = "'''''";
 818                                         }
 819                                         # Count the number of occurrences of bold and italics mark-ups.
 820                                         # We are not counting sequences of five apostrophes.
 821                                         if ( strlen( $arr[$i] ) == 2 ) $numitalics++;  else
 822                                         if ( strlen( $arr[$i] ) == 3 ) $numbold++;     else
 823                                         if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
 824                                 }
 825                                 $i++;
 826                         }
 827
 828                         # If there is an odd number of both bold and italics, it is likely
 829                         # that one of the bold ones was meant to be an apostrophe followed
 830                         # by italics. Which one we cannot know for certain, but it is more
 831                         # likely to be one that has a single-letter word before it.
 832                         if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
 833                         {
 834                                 $i = 0;
 835                                 $firstsingleletterword = -1;
 836                                 $firstmultiletterword = -1;
 837                                 $firstspace = -1;
 838                                 foreach ( $arr as $r )
 839                                 {
 840                                         if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
 841                                         {
 842                                                 $x1 = substr ($arr[$i-1], -1);
 843                                                 $x2 = substr ($arr[$i-1], -2, 1);
 844                                                 if ($x1 == ' ') {
 845                                                         if ($firstspace == -1) $firstspace = $i;
 846                                                 } else if ($x2 == ' ') {
 847                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 848                                                 } else {
 849                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 850                                                 }
 851                                         }
 852                                         $i++;
 853                                 }
 854
 855                                 # If there is a single-letter word, use it!
 856                                 if ($firstsingleletterword > -1)
 857                                 {
 858                                         $arr [ $firstsingleletterword ] = "''";
 859                                         $arr [ $firstsingleletterword-1 ] .= "'";
 860                                 }
 861                                 # If not, but there's a multi-letter word, use that one.
 862                                 else if ($firstmultiletterword > -1)
 863                                 {
 864                                         $arr [ $firstmultiletterword ] = "''";
 865                                         $arr [ $firstmultiletterword-1 ] .= "'";
 866                                 }
 867                                 # ... otherwise use the first one that has neither.
 868                                 # (notice that it is possible for all three to be -1 if, for example,
 869                                 # there is only one pentuple-apostrophe in the line)
 870                                 else if ($firstspace > -1)
 871                                 {
 872                                         $arr [ $firstspace ] = "''";
 873                                         $arr [ $firstspace-1 ] .= "'";
 874                                 }
 875                         }
 876
 877                         # Now let's actually convert our apostrophic mush to HTML!
 878                         $output = '';
 879                         $buffer = '';
 880                         $state = '';
 881                         $i = 0;
 882                         foreach ($arr as $r)
 883                         {
 884                                 if (($i % 2) == 0)
 885                                 {
 886                                         if ($state == 'both')
 887                                                 $buffer .= $r;
 888                                         else
 889                                                 $output .= $r;
 890                                 }
 891                                 else
 892                                 {
 893                                         if (strlen ($r) == 2)
 894                                         {
 895                                                 if ($state == 'i')
 896                                                 { $output .= '</i>'; $state = ''; }
 897                                                 else if ($state == 'bi')
 898                                                 { $output .= '</i>'; $state = 'b'; }
 899                                                 else if ($state == 'ib')
 900                                                 { $output .= '</b></i><b>'; $state = 'b'; }
 901                                                 else if ($state == 'both')
 902                                                 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
 903                                                 else # $state can be 'b' or ''
 904                                                 { $output .= '<i>'; $state .= 'i'; }
 905                                         }
 906                                         else if (strlen ($r) == 3)
 907                                         {
 908                                                 if ($state == 'b')
 909                                                 { $output .= '</b>'; $state = ''; }
 910                                                 else if ($state == 'bi')
 911                                                 { $output .= '</i></b><i>'; $state = 'i'; }
 912                                                 else if ($state == 'ib')
 913                                                 { $output .= '</b>'; $state = 'i'; }
 914                                                 else if ($state == 'both')
 915                                                 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
 916                                                 else # $state can be 'i' or ''
 917                                                 { $output .= '<b>'; $state .= 'b'; }
 918                                         }
 919                                         else if (strlen ($r) == 5)
 920                                         {
 921                                                 if ($state == 'b')
 922                                                 { $output .= '</b><i>'; $state = 'i'; }
 923                                                 else if ($state == 'i')
 924                                                 { $output .= '</i><b>'; $state = 'b'; }
 925                                                 else if ($state == 'bi')
 926                                                 { $output .= '</i></b>'; $state = ''; }
 927                                                 else if ($state == 'ib')
 928                                                 { $output .= '</b></i>'; $state = ''; }
 929                                                 else if ($state == 'both')
 930                                                 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
 931                                                 else # ($state == '')
 932                                                 { $buffer = ''; $state = 'both'; }
 933                                         }
 934                                 }
 935                                 $i++;
 936                         }
 937                         # Now close all remaining tags.  Notice that the order is important.
 938                         if ($state == 'b' || $state == 'ib')
 939                                 $output .= '</b>';
 940                         if ($state == 'i' || $state == 'bi' || $state == 'ib')
 941                                 $output .= '</i>';
 942                         if ($state == 'bi')
 943                                 $output .= '</b>';
 944                         if ($state == 'both')
 945                                 $output .= '<b><i>'.$buffer.'</i></b>';
 946                         return $output;
 947                 }
 948         }
 949
 950         /**
 951          * Replace external links
 952          *
 953          * Note: this is all very hackish and the order of execution matters a lot.
 954          * Make sure to run maintenance/parserTests.php if you change this code.
 955          *
 956          * @access private
 957          */
 958         function replaceExternalLinks( $text ) {
 959                 $fname = 'Parser::replaceExternalLinks';
 960                 wfProfileIn( $fname );
 961
 962                 $sk =& $this->mOptions->getSkin();
 963                 $linktrail = wfMsgForContent('linktrail');
 964                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 965
 966                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 967
 968                 $i = 0;
 969                 while ( $i<count( $bits ) ) {
 970                         $url = $bits[$i++];
 971                         $protocol = $bits[$i++];
 972                         $text = $bits[$i++];
 973                         $trail = $bits[$i++];
 974
 975                         # The characters '<' and '>' (which were escaped by
 976                         # removeHTMLtags()) should not be included in
 977                         # URLs, per RFC 2396.
 978                         if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
 979                                 $text = substr($url, $m2[0][1]) . ' ' . $text;
 980                                 $url = substr($url, 0, $m2[0][1]);
 981                         }
 982
 983                         # If the link text is an image URL, replace it with an <img> tag
 984                         # This happened by accident in the original parser, but some people used it extensively
 985                         $img = $this->maybeMakeImageLink( $text );
 986                         if ( $img !== false ) {
 987                                 $text = $img;
 988                         }
 989
 990                         $dtrail = '';
 991
 992                         # No link text, e.g. [http://domain.tld/some.link]
 993                         if ( $text == '' ) {
 994                                 # Autonumber if allowed
 995                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 996                                         $text = '[' . ++$this->mAutonumber . ']';
 997                                 } else {
 998                                         # Otherwise just use the URL
 999                                         $text = htmlspecialchars( $url );
1000                                 }
1001                         } else {
1002                                 # Have link text, e.g. [http://domain.tld/some.link text]s
1003                                 # Check for trail
1004                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1005                                         $dtrail = $m2[1];
1006                                         $trail = $m2[2];
1007                                 }
1008                         }
1009
1010                         $encUrl = htmlspecialchars( $url );
1011                         # Bit in parentheses showing the URL for the printable version
1012                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
1013                                 $paren = '';
1014                         } else {
1015                                 # Expand the URL for printable version
1016                                 if ( ! $sk->suppressUrlExpansion() ) {
1017                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1018                                 } else {
1019                                         $paren = '';
1020                                 }
1021                         }
1022
1023                         # Process the trail (i.e. everything after this link up until start of the next link),
1024                         # replacing any non-bracketed links
1025                         $trail = $this->replaceFreeExternalLinks( $trail );
1026
1027                         # Use the encoded URL
1028                         # This means that users can paste URLs directly into the text
1029                         # Funny characters like &ouml; aren't valid in URLs anyway
1030                         # This was changed in August 2004
1031                         $s .= $sk->makeExternalLink( $url, $text, false ) . $dtrail. $paren . $trail;
1032                 }
1033
1034                 wfProfileOut( $fname );
1035                 return $s;
1036         }
1037
1038         /**
1039          * Replace anything that looks like a URL with a link
1040          * @access private
1041          */
1042         function replaceFreeExternalLinks( $text ) {
1043                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1044                 $s = array_shift( $bits );
1045                 $i = 0;
1046
1047                 $sk =& $this->mOptions->getSkin();
1048
1049                 while ( $i < count( $bits ) ){
1050                         $protocol = $bits[$i++];
1051                         $remainder = $bits[$i++];
1052
1053                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1054                                 # Found some characters after the protocol that look promising
1055                                 $url = $protocol . $m[1];
1056                                 $trail = $m[2];
1057
1058                                 # The characters '<' and '>' (which were escaped by
1059                                 # removeHTMLtags()) should not be included in
1060                                 # URLs, per RFC 2396.
1061                                 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1062                                         $trail = substr($url, $m2[0][1]) . $trail;
1063                                         $url = substr($url, 0, $m2[0][1]);
1064                                 }
1065
1066                                 # Move trailing punctuation to $trail
1067                                 $sep = ',;\.:!?';
1068                                 # If there is no left bracket, then consider right brackets fair game too
1069                                 if ( strpos( $url, '(' ) === false ) {
1070                                         $sep .= ')';
1071                                 }
1072
1073                                 $numSepChars = strspn( strrev( $url ), $sep );
1074                                 if ( $numSepChars ) {
1075                                         $trail = substr( $url, -$numSepChars ) . $trail;
1076                                         $url = substr( $url, 0, -$numSepChars );
1077                                 }
1078
1079                                 # Replace &amp; from obsolete syntax with &.
1080                                 # All HTML entities will be escaped by makeExternalLink()
1081                                 # or maybeMakeImageLink()
1082                                 $url = str_replace( '&amp;', '&', $url );
1083
1084                                 # Is this an external image?
1085                                 $text = $this->maybeMakeImageLink( $url );
1086                                 if ( $text === false ) {
1087                                         # Not an image, make a link
1088                                         $text = $sk->makeExternalLink( $url, $url );
1089                                 }
1090                                 $s .= $text . $trail;
1091                         } else {
1092                                 $s .= $protocol . $remainder;
1093                         }
1094                 }
1095                 return $s;
1096         }
1097
1098         /**
1099          * make an image if it's allowed
1100          * @access private
1101          */
1102         function maybeMakeImageLink( $url ) {
1103                 $sk =& $this->mOptions->getSkin();
1104                 $text = false;
1105                 if ( $this->mOptions->getAllowExternalImages() ) {
1106                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1107                                 # Image found
1108                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1109                         }
1110                 }
1111                 return $text;
1112         }
1113
1114         /**
1115          * Process [[ ]] wikilinks
1116          *
1117          * @access private
1118          */
1119
1120         function replaceInternalLinks( $s ) {
1121                 global $wgLang, $wgContLang, $wgLinkCache;
1122                 global $wgDisableLangConversion;
1123                 static $fname = 'Parser::replaceInternalLinks' ;
1124
1125                 wfProfileIn( $fname );
1126
1127                 wfProfileIn( $fname.'-setup' );
1128                 static $tc = FALSE;
1129                 # the % is needed to support urlencoded titles as well
1130                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1131
1132                 $sk =& $this->mOptions->getSkin();
1133                 global $wgUseOldExistenceCheck;
1134                 # "Post-parse link colour check" works only on wiki text since it's now
1135                 # in Parser. Enable it, then disable it when we're done.
1136                 $saveParseColour = $sk->postParseLinkColour( !$wgUseOldExistenceCheck );
1137
1138                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1139
1140                 #split the entire text string on occurences of [[
1141                 $a = explode( '[[', ' ' . $s );
1142                 #get the first element (all text up to first [[), and remove the space we added
1143                 $s = array_shift( $a );
1144                 $s = substr( $s, 1 );
1145
1146                 # Match a link having the form [[namespace:link|alternate]]trail
1147                 static $e1 = FALSE;
1148                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1149                 # Match cases where there is no "]]", which might still be images
1150                 static $e1_img = FALSE;
1151                 if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }
1152                 # Match the end of a line for a word that's not followed by whitespace,
1153                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1154                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1155
1156                 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1157
1158                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1159
1160                 if ( $useLinkPrefixExtension ) {
1161                         if ( preg_match( $e2, $s, $m ) ) {
1162                                 $first_prefix = $m[2];
1163                                 $s = $m[1];
1164                         } else {
1165                                 $first_prefix = false;
1166                         }
1167                 } else {
1168                         $prefix = '';
1169                 }
1170
1171                 $selflink = $this->mTitle->getPrefixedText();
1172                 wfProfileOut( $fname.'-setup' );
1173
1174                 $checkVariantLink = sizeof($wgContLang->getVariants())>1;
1175                 # Loop for each link
1176                 for ($k = 0; isset( $a[$k] ); $k++) {
1177                         $line = $a[$k];
1178                         if ( $useLinkPrefixExtension ) {
1179                                 wfProfileIn( $fname.'-prefixhandling' );
1180                                 if ( preg_match( $e2, $s, $m ) ) {
1181                                         $prefix = $m[2];
1182                                         $s = $m[1];
1183                                 } else {
1184                                         $prefix='';
1185                                 }
1186                                 # first link
1187                                 if($first_prefix) {
1188                                         $prefix = $first_prefix;
1189                                         $first_prefix = false;
1190                                 }
1191                                 wfProfileOut( $fname.'-prefixhandling' );
1192                         }
1193
1194                         $might_be_img = false;
1195
1196                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1197                                 $text = $m[2];
1198                                 # fix up urlencoded title texts
1199                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1200                                 $trail = $m[3];
1201                         } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
1202                                 $might_be_img = true;
1203                                 $text = $m[2];
1204                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1205                                 $trail = "";
1206                         } else { # Invalid form; output directly
1207                                 $s .= $prefix . '[[' . $line ;
1208                                 continue;
1209                         }
1210
1211                         # Don't allow internal links to pages containing
1212                         # PROTO: where PROTO is a valid URL protocol; these
1213                         # should be external links.
1214                         if (preg_match('/^((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1215                                 $s .= $prefix . '[[' . $line ;
1216                                 continue;
1217                         }
1218
1219                         # Make subpage if necessary
1220                         $link = $this->maybeDoSubpageLink( $m[1], $text );
1221
1222                         $noforce = (substr($m[1], 0, 1) != ':');
1223                         if (!$noforce) {
1224                                 # Strip off leading ':'
1225                                 $link = substr($link, 1);
1226                         }
1227
1228                         $nt =& Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) );
1229                         if( !$nt ) {
1230                                 $s .= $prefix . '[[' . $line;
1231                                 continue;
1232                         }
1233
1234                         #check other language variants of the link
1235                         #if the article does not exist
1236                         if( $checkVariantLink
1237                             && $nt->getArticleID() == 0 ) {
1238                                 $wgContLang->findVariantLink($link, $nt);
1239                         }
1240
1241                         $ns = $nt->getNamespace();
1242                         $iw = $nt->getInterWiki();
1243
1244                         if ($might_be_img) { # if this is actually an invalid link
1245                                 if ($ns == NS_IMAGE && $noforce) { #but might be an image
1246                                         $found = false;
1247                                         while (isset ($a[$k+1]) ) {
1248                                                 #look at the next 'line' to see if we can close it there
1249                                                 $next_line =  array_shift(array_splice( $a, $k + 1, 1) );
1250                                                 if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) {
1251                                                 # the first ]] closes the inner link, the second the image
1252                                                         $found = true;
1253                                                         $text .= '[[' . $m[1];
1254                                                         $trail = $m[2];
1255                                                         break;
1256                                                 } elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) {
1257                                                         #if there's exactly one ]] that's fine, we'll keep looking
1258                                                         $text .= '[[' . $m[0];
1259                                                 } else {
1260                                                         #if $next_line is invalid too, we need look no further
1261                                                         $text .= '[[' . $next_line;
1262                                                         break;
1263                                                 }
1264                                         }
1265                                         if ( !$found ) {
1266                                                 # we couldn't find the end of this imageLink, so output it raw
1267                                                 #but don't ignore what might be perfectly normal links in the text we've examined
1268                                                 $text = $this->replaceInternalLinks($text);
1269                                                 $s .= $prefix . '[[' . $link . '|' . $text;
1270                                                 # note: no $trail, because without an end, there *is* no trail
1271                                                 continue;
1272                                         }
1273                                 } else { #it's not an image, so output it raw
1274                                         $s .= $prefix . '[[' . $link . '|' . $text;
1275                                         # note: no $trail, because without an end, there *is* no trail
1276                                         continue;
1277                                 }
1278                         }
1279
1280                         $wasblank = ( '' == $text );
1281                         if( $wasblank ) $text = $link;
1282
1283
1284                         # Link not escaped by : , create the various objects
1285                         if( $noforce ) {
1286
1287                                 # Interwikis
1288                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1289                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1290                                         $tmp = $prefix . $trail ;
1291                                         $s .= (trim($tmp) == '')? '': $tmp;
1292                                         continue;
1293                                 }
1294
1295                                 if ( $ns == NS_IMAGE ) {
1296                                         wfProfileIn( "$fname-image" );
1297
1298                                         # recursively parse links inside the image caption
1299                                         # actually, this will parse them in any other parameters, too,
1300                                         # but it might be hard to fix that, and it doesn't matter ATM
1301                                         $text = $this->replaceExternalLinks($text);
1302                                         $text = $this->replaceInternalLinks($text);
1303
1304                                         # replace the image with a link-holder so that replaceExternalLinks() can't mess with it
1305                                         $s .= $prefix . $this->insertStripItem( $sk->makeImageLinkObj( $nt, $text ), $this->mStripState ) . $trail;
1306                                         $wgLinkCache->addImageLinkObj( $nt );
1307
1308                                         wfProfileOut( "$fname-image" );
1309                                         continue;
1310                                 }
1311
1312                                 if ( $ns == NS_CATEGORY ) {
1313                                         wfProfileIn( "$fname-category" );
1314                                         $t = $nt->getText();
1315
1316                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1317                                         $pPLC=$sk->postParseLinkColour();
1318                                         $sk->postParseLinkColour( false );
1319                                         $t = $sk->makeLinkObj( $nt, $t, '', '' , $prefix );
1320                                         $sk->postParseLinkColour( $pPLC );
1321                                         $wgLinkCache->resume();
1322
1323                                         if ( $wasblank ) {
1324                                                 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1325                                                         $sortkey = $this->mTitle->getText();
1326                                                 } else {
1327                                                         $sortkey = $this->mTitle->getPrefixedText();
1328                                                 }
1329                                         } else {
1330                                                 $sortkey = $text;
1331                                         }
1332                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1333                                         $this->mOutput->addCategoryLink( $t );
1334                                         $s .= $prefix . $trail ;
1335
1336                                         wfProfileOut( "$fname-category" );
1337                                         continue;
1338                                 }
1339                         }
1340
1341                         if( ( $nt->getPrefixedText() === $selflink ) &&
1342                             ( $nt->getFragment() === '' ) ) {
1343                                 # Self-links are handled specially; generally de-link and change to bold.
1344                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1345                                 continue;
1346                         }
1347
1348                         # Special and Media are pseudo-namespaces; no pages actually exist in them
1349                         if( $ns == NS_MEDIA ) {
1350                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text, true ) . $trail;
1351                                 $wgLinkCache->addImageLinkObj( $nt );
1352                                 continue;
1353                         } elseif( $ns == NS_SPECIAL ) {
1354                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1355                                 continue;
1356                         }
1357                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1358                 }
1359                 $sk->postParseLinkColour( $saveParseColour );
1360                 wfProfileOut( $fname );
1361                 return $s;
1362         }
1363
1364         /**
1365          * Handle link to subpage if necessary
1366          * @param string $target the source of the link
1367          * @param string &$text the link text, modified as necessary
1368          * @return string the full name of the link
1369          * @access private
1370          */
1371         function maybeDoSubpageLink($target, &$text) {
1372                 # Valid link forms:
1373                 # Foobar -- normal
1374                 # :Foobar -- override special treatment of prefix (images, language links)
1375                 # /Foobar -- convert to CurrentPage/Foobar
1376                 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1377                 global $wgNamespacesWithSubpages;
1378
1379                 $fname = 'Parser::maybeDoSubpageLink';
1380                 wfProfileIn( $fname );
1381                 # Look at the first character
1382                 if( $target{0} == '/' ) {
1383                         # / at end means we don't want the slash to be shown
1384                         if(substr($target,-1,1)=='/') {
1385                                 $target=substr($target,1,-1);
1386                                 $noslash=$target;
1387                         } else {
1388                                 $noslash=substr($target,1);
1389                         }
1390
1391                         # Some namespaces don't allow subpages
1392                         if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1393                                 # subpages allowed here
1394                                 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1395                                 if( '' === $text ) {
1396                                         $text = $target;
1397                                 } # this might be changed for ugliness reasons
1398                         } else {
1399                                 # no subpage allowed, use standard link
1400                                 $ret = $target;
1401                         }
1402                 } else {
1403                         # no subpage
1404                         $ret = $target;
1405                 }
1406
1407                 wfProfileOut( $fname );
1408                 return $ret;
1409         }
1410
1411         /**#@+
1412          * Used by doBlockLevels()
1413          * @access private
1414          */
1415         /* private */ function closeParagraph() {
1416                 $result = '';
1417                 if ( '' != $this->mLastSection ) {
1418                         $result = '</' . $this->mLastSection  . ">\n";
1419                 }
1420                 $this->mInPre = false;
1421                 $this->mLastSection = '';
1422                 return $result;
1423         }
1424         # getCommon() returns the length of the longest common substring
1425         # of both arguments, starting at the beginning of both.
1426         #
1427         /* private */ function getCommon( $st1, $st2 ) {
1428                 $fl = strlen( $st1 );
1429                 $shorter = strlen( $st2 );
1430                 if ( $fl < $shorter ) { $shorter = $fl; }
1431
1432                 for ( $i = 0; $i < $shorter; ++$i ) {
1433                         if ( $st1{$i} != $st2{$i} ) { break; }
1434                 }
1435                 return $i;
1436         }
1437         # These next three functions open, continue, and close the list
1438         # element appropriate to the prefix character passed into them.
1439         #
1440         /* private */ function openList( $char ) {
1441                 $result = $this->closeParagraph();
1442
1443                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1444                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1445                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1446                 else if ( ';' == $char ) {
1447                         $result .= '<dl><dt>';
1448                         $this->mDTopen = true;
1449                 }
1450                 else { $result = '<!-- ERR 1 -->'; }
1451
1452                 return $result;
1453         }
1454
1455         /* private */ function nextItem( $char ) {
1456                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1457                 else if ( ':' == $char || ';' == $char ) {
1458                         $close = '</dd>';
1459                         if ( $this->mDTopen ) { $close = '</dt>'; }
1460                         if ( ';' == $char ) {
1461                                 $this->mDTopen = true;
1462                                 return $close . '<dt>';
1463                         } else {
1464                                 $this->mDTopen = false;
1465                                 return $close . '<dd>';
1466                         }
1467                 }
1468                 return '<!-- ERR 2 -->';
1469         }
1470
1471         /* private */ function closeList( $char ) {
1472                 if ( '*' == $char ) { $text = '</li></ul>'; }
1473                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1474                 else if ( ':' == $char ) {
1475                         if ( $this->mDTopen ) {
1476                                 $this->mDTopen = false;
1477                                 $text = '</dt></dl>';
1478                         } else {
1479                                 $text = '</dd></dl>';
1480                         }
1481                 }
1482                 else {  return '<!-- ERR 3 -->'; }
1483                 return $text."\n";
1484         }
1485         /**#@-*/
1486
1487         /**
1488          * Make lists from lines starting with ':', '*', '#', etc.
1489          *
1490          * @access private
1491          * @return string the lists rendered as HTML
1492          */
1493         function doBlockLevels( $text, $linestart ) {
1494                 $fname = 'Parser::doBlockLevels';
1495                 wfProfileIn( $fname );
1496
1497                 # Parsing through the text line by line.  The main thing
1498                 # happening here is handling of block-level elements p, pre,
1499                 # and making lists from lines starting with * # : etc.
1500                 #
1501                 $textLines = explode( "\n", $text );
1502
1503                 $lastPrefix = $output = $lastLine = '';
1504                 $this->mDTopen = $inBlockElem = false;
1505                 $prefixLength = 0;
1506                 $paragraphStack = false;
1507
1508                 if ( !$linestart ) {
1509                         $output .= array_shift( $textLines );
1510                 }
1511                 foreach ( $textLines as $oLine ) {
1512                         $lastPrefixLength = strlen( $lastPrefix );
1513                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1514                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1515                         if ( !$this->mInPre ) {
1516                                 # Multiple prefixes may abut each other for nested lists.
1517                                 $prefixLength = strspn( $oLine, '*#:;' );
1518                                 $pref = substr( $oLine, 0, $prefixLength );
1519
1520                                 # eh?
1521                                 $pref2 = str_replace( ';', ':', $pref );
1522                                 $t = substr( $oLine, $prefixLength );
1523                                 $this->mInPre = !empty($preOpenMatch);
1524                         } else {
1525                                 # Don't interpret any other prefixes in preformatted text
1526                                 $prefixLength = 0;
1527                                 $pref = $pref2 = '';
1528                                 $t = $oLine;
1529                         }
1530
1531                         # List generation
1532                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1533                                 # Same as the last item, so no need to deal with nesting or opening stuff
1534                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1535                                 $paragraphStack = false;
1536
1537                                 if ( substr( $pref, -1 ) == ';') {
1538                                         # The one nasty exception: definition lists work like this:
1539                                         # ; title : definition text
1540                                         # So we check for : in the remainder text to split up the
1541                                         # title and definition, without b0rking links.
1542                                         if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1543                                                 $t = $t2;
1544                                                 $output .= $term . $this->nextItem( ':' );
1545                                         }
1546                                 }
1547                         } elseif( $prefixLength || $lastPrefixLength ) {
1548                                 # Either open or close a level...
1549                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1550                                 $paragraphStack = false;
1551
1552                                 while( $commonPrefixLength < $lastPrefixLength ) {
1553                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1554                                         --$lastPrefixLength;
1555                                 }
1556                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1557                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1558                                 }
1559                                 while ( $prefixLength > $commonPrefixLength ) {
1560                                         $char = substr( $pref, $commonPrefixLength, 1 );
1561                                         $output .= $this->openList( $char );
1562
1563                                         if ( ';' == $char ) {
1564                                                 # FIXME: This is dupe of code above
1565                                                 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1566                                                         $t = $t2;
1567                                                         $output .= $term . $this->nextItem( ':' );
1568                                                 }
1569                                         }
1570                                         ++$commonPrefixLength;
1571                                 }
1572                                 $lastPrefix = $pref2;
1573                         }
1574                         if( 0 == $prefixLength ) {
1575                                 wfProfileIn( "$fname-paragraph" );
1576                                 # No prefix (not in list)--go to paragraph mode
1577                                 $uniq_prefix = UNIQ_PREFIX;
1578                                 // XXX: use a stack for nestable elements like span, table and div
1579                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
1580                                 $closematch = preg_match(
1581                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1582                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/iS', $t );
1583                                 if ( $openmatch or $closematch ) {
1584                                         $paragraphStack = false;
1585                                         $output .= $this->closeParagraph();
1586                                         if($preOpenMatch and !$preCloseMatch) {
1587                                                 $this->mInPre = true;
1588                                         }
1589                                         if ( $closematch ) {
1590                                                 $inBlockElem = false;
1591                                         } else {
1592                                                 $inBlockElem = true;
1593                                         }
1594                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1595                                         if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1596                                                 // pre
1597                                                 if ($this->mLastSection != 'pre') {
1598                                                         $paragraphStack = false;
1599                                                         $output .= $this->closeParagraph().'<pre>';
1600                                                         $this->mLastSection = 'pre';
1601                                                 }
1602                                                 $t = substr( $t, 1 );
1603                                         } else {
1604                                                 // paragraph
1605                                                 if ( '' == trim($t) ) {
1606                                                         if ( $paragraphStack ) {
1607                                                                 $output .= $paragraphStack.'<br />';
1608                                                                 $paragraphStack = false;
1609                                                                 $this->mLastSection = 'p';
1610                                                         } else {
1611                                                                 if ($this->mLastSection != 'p' ) {
1612                                                                         $output .= $this->closeParagraph();
1613                                                                         $this->mLastSection = '';
1614                                                                         $paragraphStack = '<p>';
1615                                                                 } else {
1616                                                                         $paragraphStack = '</p><p>';
1617                                                                 }
1618                                                         }
1619                                                 } else {
1620                                                         if ( $paragraphStack ) {
1621                                                                 $output .= $paragraphStack;
1622                                                                 $paragraphStack = false;
1623                                                                 $this->mLastSection = 'p';
1624                                                         } else if ($this->mLastSection != 'p') {
1625                                                                 $output .= $this->closeParagraph().'<p>';
1626                                                                 $this->mLastSection = 'p';
1627                                                         }
1628                                                 }
1629                                         }
1630                                 }
1631                                 wfProfileOut( "$fname-paragraph" );
1632                         }
1633                         if ($paragraphStack === false) {
1634                                 $output .= $t."\n";
1635                         }
1636                 }
1637                 while ( $prefixLength ) {
1638                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1639                         --$prefixLength;
1640                 }
1641                 if ( '' != $this->mLastSection ) {
1642                         $output .= '</' . $this->mLastSection . '>';
1643                         $this->mLastSection = '';
1644                 }
1645
1646                 wfProfileOut( $fname );
1647                 return $output;
1648         }
1649
1650         /**
1651          * Split up a string on ':', ignoring any occurences inside
1652          * <a>..</a> or <span>...</span>
1653          * @param string $str the string to split
1654          * @param string &$before set to everything before the ':'
1655          * @param string &$after set to everything after the ':'
1656          * return string the position of the ':', or false if none found
1657          */
1658         function findColonNoLinks($str, &$before, &$after) {
1659                 # I wonder if we should make this count all tags, not just <a>
1660                 # and <span>. That would prevent us from matching a ':' that
1661                 # comes in the middle of italics other such formatting....
1662                 # -- Wil
1663                 $fname = 'Parser::findColonNoLinks';
1664                 wfProfileIn( $fname );
1665                 $pos = 0;
1666                 do {
1667                         $colon = strpos($str, ':', $pos);
1668
1669                         if ($colon !== false) {
1670                                 $before = substr($str, 0, $colon);
1671                                 $after = substr($str, $colon + 1);
1672
1673                                 # Skip any ':' within <a> or <span> pairs
1674                                 $a = substr_count($before, '<a');
1675                                 $s = substr_count($before, '<span');
1676                                 $ca = substr_count($before, '</a>');
1677                                 $cs = substr_count($before, '</span>');
1678
1679                                 if ($a <= $ca and $s <= $cs) {
1680                                         # Tags are balanced before ':'; ok
1681                                         break;
1682                                 }
1683                                 $pos = $colon + 1;
1684                         }
1685                 } while ($colon !== false);
1686                 wfProfileOut( $fname );
1687                 return $colon;
1688         }
1689
1690         /**
1691          * Return value of a magic variable (like PAGENAME)
1692          *
1693          * @access private
1694          */
1695         function getVariableValue( $index ) {
1696                 global $wgContLang, $wgSitename, $wgServer;
1697
1698                 /**
1699                  * Some of these require message or data lookups and can be
1700                  * expensive to check many times.
1701                  */
1702                 static $varCache = array();
1703                 if( isset( $varCache[$index] ) ) return $varCache[$index];
1704
1705                 switch ( $index ) {
1706                         case MAG_CURRENTMONTH:
1707                                 return $varCache[$index] = $wgContLang->formatNum( date( 'm' ) );
1708                         case MAG_CURRENTMONTHNAME:
1709                                 return $varCache[$index] = $wgContLang->getMonthName( date('n') );
1710                         case MAG_CURRENTMONTHNAMEGEN:
1711                                 return $varCache[$index] = $wgContLang->getMonthNameGen( date('n') );
1712                         case MAG_CURRENTDAY:
1713                                 return $varCache[$index] = $wgContLang->formatNum( date('j') );
1714                         case MAG_PAGENAME:
1715                                 return $this->mTitle->getText();
1716                         case MAG_PAGENAMEE:
1717                                 return $this->mTitle->getPartialURL();
1718                         case MAG_NAMESPACE:
1719                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1720                                 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1721                         case MAG_CURRENTDAYNAME:
1722                                 return $varCache[$index] = $wgContLang->getWeekdayName( date('w')+1 );
1723                         case MAG_CURRENTYEAR:
1724                                 return $varCache[$index] = $wgContLang->formatNum( date( 'Y' ) );
1725                         case MAG_CURRENTTIME:
1726                                 return $varCache[$index] = $wgContLang->time( wfTimestampNow(), false );
1727                         case MAG_NUMBEROFARTICLES:
1728                                 return $varCache[$index] = $wgContLang->formatNum( wfNumberOfArticles() );
1729                         case MAG_SITENAME:
1730                                 return $wgSitename;
1731                         case MAG_SERVER:
1732                                 return $wgServer;
1733                         default:
1734                                 return NULL;
1735                 }
1736         }
1737
1738         /**
1739          * initialise the magic variables (like CURRENTMONTHNAME)
1740          *
1741          * @access private
1742          */
1743         function initialiseVariables() {
1744                 $fname = 'Parser::initialiseVariables';
1745                 wfProfileIn( $fname );
1746                 global $wgVariableIDs;
1747                 $this->mVariables = array();
1748                 foreach ( $wgVariableIDs as $id ) {
1749                         $mw =& MagicWord::get( $id );
1750                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1751                 }
1752                 wfProfileOut( $fname );
1753         }
1754
1755         /**
1756          * Replace magic variables, templates, and template arguments
1757          * with the appropriate text. Templates are substituted recursively,
1758          * taking care to avoid infinite loops.
1759          *
1760          * Note that the substitution depends on value of $mOutputType:
1761          *  OT_WIKI: only {{subst:}} templates
1762          *  OT_MSG: only magic variables
1763          *  OT_HTML: all templates and magic variables
1764          *
1765          * @param string $tex The text to transform
1766          * @param array $args Key-value pairs representing template parameters to substitute
1767          * @access private
1768          */
1769         function replaceVariables( $text, $args = array() ) {
1770                 global $wgLang, $wgScript, $wgArticlePath;
1771
1772                 # Prevent too big inclusions
1773                 if( strlen( $text ) > MAX_INCLUDE_SIZE ) {
1774                         return $text;
1775                 }
1776
1777                 $fname = 'Parser::replaceVariables';
1778                 wfProfileIn( $fname );
1779
1780                 $titleChars = Title::legalChars();
1781
1782                 # This function is called recursively. To keep track of arguments we need a stack:
1783                 array_push( $this->mArgStack, $args );
1784
1785                 # Variable substitution
1786                 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", array( &$this, 'variableSubstitution' ), $text );
1787
1788                 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1789                         # Argument substitution
1790                         $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", array( &$this, 'argSubstitution' ), $text );
1791                 }
1792                 # Template substitution
1793                 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1794                 $text = preg_replace_callback( $regex, array( &$this, 'braceSubstitution' ), $text );
1795
1796                 array_pop( $this->mArgStack );
1797
1798                 wfProfileOut( $fname );
1799                 return $text;
1800         }
1801
1802         /**
1803          * Replace magic variables
1804          * @access private
1805          */
1806         function variableSubstitution( $matches ) {
1807                 if ( !$this->mVariables ) {
1808                         $this->initialiseVariables();
1809                 }
1810                 $skip = false;
1811                 if ( $this->mOutputType == OT_WIKI ) {
1812                         # Do only magic variables prefixed by SUBST
1813                         $mwSubst =& MagicWord::get( MAG_SUBST );
1814                         if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1815                                 $skip = true;
1816                         # Note that if we don't substitute the variable below,
1817                         # we don't remove the {{subst:}} magic word, in case
1818                         # it is a template rather than a magic variable.
1819                 }
1820                 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1821                         $text = $this->mVariables[$matches[1]];
1822                         $this->mOutput->mContainsOldMagic = true;
1823                 } else {
1824                         $text = $matches[0];
1825                 }
1826                 return $text;
1827         }
1828
1829         # Split template arguments
1830         function getTemplateArgs( $argsString ) {
1831                 if ( $argsString === '' ) {
1832                         return array();
1833                 }
1834
1835                 $args = explode( '|', substr( $argsString, 1 ) );
1836
1837                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1838                 # merged with the next arg because the '|' character between belongs
1839                 # to the link syntax and not the template parameter syntax.
1840                 $argc = count($args);
1841                 $i = 0;
1842                 for ( $i = 0; $i < $argc-1; $i++ ) {
1843                         if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1844                                 $args[$i] .= '|'.$args[$i+1];
1845                                 array_splice($args, $i+1, 1);
1846                                 $i--;
1847                                 $argc--;
1848                         }
1849                 }
1850
1851                 return $args;
1852         }
1853
1854         /**
1855          * Return the text of a template, after recursively
1856          * replacing any variables or templates within the template.
1857          *
1858          * @param array $matches The parts of the template
1859          *  $matches[1]: the title, i.e. the part before the |
1860          *  $matches[2]: the parameters (including a leading |), if  any
1861          * @return string the text of the template
1862          * @access private
1863          */
1864         function braceSubstitution( $matches ) {
1865                 global $wgLinkCache, $wgContLang;
1866                 $fname = 'Parser::braceSubstitution';
1867                 $found = false;
1868                 $nowiki = false;
1869                 $noparse = false;
1870
1871                 $title = NULL;
1872
1873                 # Need to know if the template comes at the start of a line,
1874                 # to treat the beginning of the template like the beginning
1875                 # of a line for tables and block-level elements.
1876                 $linestart = $matches[1];
1877
1878                 # $part1 is the bit before the first |, and must contain only title characters
1879                 # $args is a list of arguments, starting from index 0, not including $part1
1880
1881                 $part1 = $matches[2];
1882                 # If the third subpattern matched anything, it will start with |
1883
1884                 $args = $this->getTemplateArgs($matches[3]);
1885                 $argc = count( $args );
1886
1887                 # Don't parse {{{}}} because that's only for template arguments
1888                 if ( $linestart === '{' ) {
1889                         $text = $matches[0];
1890                         $found = true;
1891                         $noparse = true;
1892                 }
1893
1894                 # SUBST
1895                 if ( !$found ) {
1896                         $mwSubst =& MagicWord::get( MAG_SUBST );
1897                         if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1898                                 # One of two possibilities is true:
1899                                 # 1) Found SUBST but not in the PST phase
1900                                 # 2) Didn't find SUBST and in the PST phase
1901                                 # In either case, return without further processing
1902                                 $text = $matches[0];
1903                                 $found = true;
1904                                 $noparse = true;
1905                         }
1906                 }
1907
1908                 # MSG, MSGNW and INT
1909                 if ( !$found ) {
1910                         # Check for MSGNW:
1911                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1912                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1913                                 $nowiki = true;
1914                         } else {
1915                                 # Remove obsolete MSG:
1916                                 $mwMsg =& MagicWord::get( MAG_MSG );
1917                                 $mwMsg->matchStartAndRemove( $part1 );
1918                         }
1919
1920                         # Check if it is an internal message
1921                         $mwInt =& MagicWord::get( MAG_INT );
1922                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1923                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1924                                         $text = $linestart . wfMsgReal( $part1, $args, true );
1925                                         $found = true;
1926                                 }
1927                         }
1928                 }
1929
1930                 # NS
1931                 if ( !$found ) {
1932                         # Check for NS: (namespace expansion)
1933                         $mwNs = MagicWord::get( MAG_NS );
1934                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1935                                 if ( intval( $part1 ) ) {
1936                                         $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1937                                         $found = true;
1938                                 } else {
1939                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1940                                         if ( !is_null( $index ) ) {
1941                                                 $text = $linestart . $wgContLang->getNsText( $index );
1942                                                 $found = true;
1943                                         }
1944                                 }
1945                         }
1946                 }
1947
1948                 # LOCALURL and LOCALURLE
1949                 if ( !$found ) {
1950                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1951                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1952
1953                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1954                                 $func = 'getLocalURL';
1955                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1956                                 $func = 'escapeLocalURL';
1957                         } else {
1958                                 $func = '';
1959                         }
1960
1961                         if ( $func !== '' ) {
1962                                 $title = Title::newFromText( $part1 );
1963                                 if ( !is_null( $title ) ) {
1964                                         if ( $argc > 0 ) {
1965                                                 $text = $linestart . $title->$func( $args[0] );
1966                                         } else {
1967                                                 $text = $linestart . $title->$func();
1968                                         }
1969                                         $found = true;
1970                                 }
1971                         }
1972                 }
1973
1974                 # GRAMMAR
1975                 if ( !$found && $argc == 1 ) {
1976                         $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1977                         if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1978                                 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
1979                                 $found = true;
1980                         }
1981                 }
1982
1983                 # Template table test
1984
1985                 # Did we encounter this template already? If yes, it is in the cache
1986                 # and we need to check for loops.
1987                 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
1988                         # set $text to cached message.
1989                         $text = $linestart . $this->mTemplates[$part1];
1990                         $found = true;
1991
1992                         # Infinite loop test
1993                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1994                                 $noparse = true;
1995                                 $found = true;
1996                                 $text .= '<!-- WARNING: template loop detected -->';
1997                         }
1998                 }
1999
2000                 # Load from database
2001                 $itcamefromthedatabase = false;
2002                 if ( !$found ) {
2003                         $ns = NS_TEMPLATE;
2004                         $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
2005                         if ($subpage !== '') {
2006                                 $ns = $this->mTitle->getNamespace();
2007                         }
2008                         $title = Title::newFromText( $part1, $ns );
2009                         if ( !is_null( $title ) && !$title->isExternal() ) {
2010                                 # Check for excessive inclusion
2011                                 $dbk = $title->getPrefixedDBkey();
2012                                 if ( $this->incrementIncludeCount( $dbk ) ) {
2013                                         # This should never be reached.
2014                                         $article = new Article( $title );
2015                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
2016                                         if ( $articleContent !== false ) {
2017                                                 $found = true;
2018                                                 $text = $linestart . $articleContent;
2019                                                 $itcamefromthedatabase = true;
2020                                         }
2021                                 }
2022
2023                                 # If the title is valid but undisplayable, make a link to it
2024                                 if ( $this->mOutputType == OT_HTML && !$found ) {
2025                                         $text = $linestart . '[['.$title->getPrefixedText().']]';
2026                                         $found = true;
2027                                 }
2028
2029                                 # Template cache array insertion
2030                                 $this->mTemplates[$part1] = $text;
2031                         }
2032                 }
2033
2034                 # Recursive parsing, escaping and link table handling
2035                 # Only for HTML output
2036                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
2037                         $text = wfEscapeWikiText( $text );
2038                 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
2039                         # Clean up argument array
2040                         $assocArgs = array();
2041                         $index = 1;
2042                         foreach( $args as $arg ) {
2043                                 $eqpos = strpos( $arg, '=' );
2044                                 if ( $eqpos === false ) {
2045                                         $assocArgs[$index++] = $arg;
2046                                 } else {
2047                                         $name = trim( substr( $arg, 0, $eqpos ) );
2048                                         $value = trim( substr( $arg, $eqpos+1 ) );
2049                                         if ( $value === false ) {
2050                                                 $value = '';
2051                                         }
2052                                         if ( $name !== false ) {
2053                                                 $assocArgs[$name] = $value;
2054                                         }
2055                                 }
2056                         }
2057
2058                         # Add a new element to the templace recursion path
2059                         $this->mTemplatePath[$part1] = 1;
2060
2061                         $text = $this->strip( $text, $this->mStripState );
2062                         $text = $this->removeHTMLtags( $text );
2063                         $text = $this->replaceVariables( $text, $assocArgs );
2064
2065                         # Resume the link cache and register the inclusion as a link
2066                         if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
2067                                 $wgLinkCache->addLinkObj( $title );
2068                         }
2069
2070                         # If the template begins with a table or block-level
2071                         # element, it should be treated as beginning a new line.
2072                         if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
2073                                 $text = "\n" . $text;
2074                         }
2075                 }
2076
2077                 # Empties the template path
2078                 $this->mTemplatePath = array();
2079                 if ( !$found ) {
2080                         return $matches[0];
2081                 } else {
2082                         # replace ==section headers==
2083                         # XXX this needs to go away once we have a better parser.
2084                         if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
2085                                 if( !is_null( $title ) )
2086                                         $encodedname = base64_encode($title->getPrefixedDBkey());
2087                                 else
2088                                         $encodedname = base64_encode("");
2089                                 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
2090                                         PREG_SPLIT_DELIM_CAPTURE);
2091                                 $text = '';
2092                                 $nsec = 0;
2093                                 for( $i = 0; $i < count($m); $i += 2 ) {
2094                                         $text .= $m[$i];
2095                                         if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
2096                                         $hl = $m[$i + 1];
2097                                         if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
2098                                                 $text .= $hl;
2099                                                 continue;
2100                                         }
2101                                         preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
2102                                         $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
2103                                                 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
2104
2105                                         $nsec++;
2106                                 }
2107                         }
2108                 }
2109
2110                 # Empties the template path
2111                 $this->mTemplatePath = array();
2112
2113                 if ( !$found ) {
2114                         return $matches[0];
2115                 } else {
2116                         return $text;
2117                 }
2118         }
2119
2120         /**
2121          * Triple brace replacement -- used for template arguments
2122          * @access private
2123          */
2124         function argSubstitution( $matches ) {
2125                 $arg = trim( $matches[1] );
2126                 $text = $matches[0];
2127                 $inputArgs = end( $this->mArgStack );
2128
2129                 if ( array_key_exists( $arg, $inputArgs ) ) {
2130                         $text = $inputArgs[$arg];
2131                 }
2132
2133                 return $text;
2134         }
2135
2136         /**
2137          * Returns true if the function is allowed to include this entity
2138          * @access private
2139          */
2140         function incrementIncludeCount( $dbk ) {
2141                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2142                         $this->mIncludeCount[$dbk] = 0;
2143                 }
2144                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2145                         return true;
2146                 } else {
2147                         return false;
2148                 }
2149         }
2150
2151
2152         /**
2153          * Cleans up HTML, removes dangerous tags and attributes, and
2154          * removes HTML comments
2155          * @access private
2156          */
2157         function removeHTMLtags( $text ) {
2158                 global $wgUseTidy, $wgUserHtml;
2159                 $fname = 'Parser::removeHTMLtags';
2160                 wfProfileIn( $fname );
2161
2162                 if( $wgUserHtml ) {
2163                         $htmlpairs = array( # Tags that must be closed
2164                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2165                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2166                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
2167                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2168                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
2169                         );
2170                         $htmlsingle = array(
2171                                 'br', 'hr', 'li', 'dt', 'dd'
2172                         );
2173                         $htmlnest = array( # Tags that can be nested--??
2174                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2175                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
2176                         );
2177                         $tabletags = array( # Can only appear inside table
2178                                 'td', 'th', 'tr'
2179                         );
2180                 } else {
2181                         $htmlpairs = array();
2182                         $htmlsingle = array();
2183                         $htmlnest = array();
2184                         $tabletags = array();
2185                 }
2186
2187                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2188                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2189
2190                 $htmlattrs = $this->getHTMLattrs () ;
2191
2192                 # Remove HTML comments
2193                 $text = $this->removeHTMLcomments( $text );
2194
2195                 $bits = explode( '<', $text );
2196                 $text = array_shift( $bits );
2197                 if(!$wgUseTidy) {
2198                         $tagstack = array(); $tablestack = array();
2199                         foreach ( $bits as $x ) {
2200                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2201                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2202                                 $x, $regs );
2203                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2204                                 error_reporting( $prev );
2205
2206                                 $badtag = 0 ;
2207                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2208                                         # Check our stack
2209                                         if ( $slash ) {
2210                                                 # Closing a tag...
2211                                                 if ( ! in_array( $t, $htmlsingle ) &&
2212                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2213                                                         @array_push( $tagstack, $ot );
2214                                                         $badtag = 1;
2215                                                 } else {
2216                                                         if ( $t == 'table' ) {
2217                                                                 $tagstack = array_pop( $tablestack );
2218                                                         }
2219                                                         $newparams = '';
2220                                                 }
2221                                         } else {
2222                                                 # Keep track for later
2223                                                 if ( in_array( $t, $tabletags ) &&
2224                                                 ! in_array( 'table', $tagstack ) ) {
2225                                                         $badtag = 1;
2226                                                 } else if ( in_array( $t, $tagstack ) &&
2227                                                 ! in_array ( $t , $htmlnest ) ) {
2228                                                         $badtag = 1 ;
2229                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
2230                                                         if ( $t == 'table' ) {
2231                                                                 array_push( $tablestack, $tagstack );
2232                                                                 $tagstack = array();
2233                                                         }
2234                                                         array_push( $tagstack, $t );
2235                                                 }
2236                                                 # Strip non-approved attributes from the tag
2237                                                 $newparams = $this->fixTagAttributes($params);
2238
2239                                         }
2240                                         if ( ! $badtag ) {
2241                                                 $rest = str_replace( '>', '&gt;', $rest );
2242                                                 $text .= "<$slash$t $newparams$brace$rest";
2243                                                 continue;
2244                                         }
2245                                 }
2246                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2247                         }
2248                         # Close off any remaining tags
2249                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2250                                 $text .= "</$t>\n";
2251                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2252                         }
2253                 } else {
2254                         # this might be possible using tidy itself
2255                         foreach ( $bits as $x ) {
2256                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2257                                 $x, $regs );
2258                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2259                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2260                                         $newparams = $this->fixTagAttributes($params);
2261                                         $rest = str_replace( '>', '&gt;', $rest );
2262                                         $text .= "<$slash$t $newparams$brace$rest";
2263                                 } else {
2264                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2265                                 }
2266                         }
2267                 }
2268                 wfProfileOut( $fname );
2269                 return $text;
2270         }
2271
2272         /**
2273          * Remove '<!--', '-->', and everything between.
2274          * To avoid leaving blank lines, when a comment is both preceded
2275          * and followed by a newline (ignoring spaces), trim leading and
2276          * trailing spaces and one of the newlines.
2277          *
2278          * @access private
2279          */
2280         function removeHTMLcomments( $text ) {
2281                 $fname='Parser::removeHTMLcomments';
2282                 wfProfileIn( $fname );
2283                 while (($start = strpos($text, '<!--')) !== false) {
2284                         $end = strpos($text, '-->', $start + 4);
2285                         if ($end === false) {
2286                                 # Unterminated comment; bail out
2287                                 break;
2288                         }
2289
2290                         $end += 3;
2291
2292                         # Trim space and newline if the comment is both
2293                         # preceded and followed by a newline
2294                         $spaceStart = max($start - 1, 0);
2295                         $spaceLen = $end - $spaceStart;
2296                         while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2297                                 $spaceStart--;
2298                                 $spaceLen++;
2299                         }
2300                         while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2301                                 $spaceLen++;
2302                         if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2303                                 # Remove the comment, leading and trailing
2304                                 # spaces, and leave only one newline.
2305                                 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2306                         }
2307                         else {
2308                                 # Remove just the comment.
2309                                 $text = substr_replace($text, '', $start, $end - $start);
2310                         }
2311                 }
2312                 wfProfileOut( $fname );
2313                 return $text;
2314         }
2315
2316         /**
2317          * This function accomplishes several tasks:
2318          * 1) Auto-number headings if that option is enabled
2319          * 2) Add an [edit] link to sections for logged in users who have enabled the option
2320          * 3) Add a Table of contents on the top for users who have enabled the option
2321          * 4) Auto-anchor headings
2322          *
2323          * It loops through all headlines, collects the necessary data, then splits up the
2324          * string and re-inserts the newly formatted headlines.
2325          * @access private
2326          */
2327         /* private */ function formatHeadings( $text, $isMain=true ) {
2328                 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2329
2330                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2331                 $doShowToc = $this->mOptions->getShowToc();
2332                 $forceTocHere = false;
2333                 if( !$this->mTitle->userCanEdit() ) {
2334                         $showEditLink = 0;
2335                         $rightClickHack = 0;
2336                 } else {
2337                         $showEditLink = $this->mOptions->getEditSection();
2338                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2339                 }
2340
2341                 # Inhibit editsection links if requested in the page
2342                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2343                 if( $esw->matchAndRemove( $text ) ) {
2344                         $showEditLink = 0;
2345                 }
2346                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2347                 # do not add TOC
2348                 $mw =& MagicWord::get( MAG_NOTOC );
2349                 if( $mw->matchAndRemove( $text ) ) {
2350                         $doShowToc = 0;
2351                 }
2352
2353                 # never add the TOC to the Main Page. This is an entry page that should not
2354                 # be more than 1-2 screens large anyway
2355                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2356                         $doShowToc = 0;
2357                 }
2358
2359                 # Get all headlines for numbering them and adding funky stuff like [edit]
2360                 # links - this is for later, but we need the number of headlines right now
2361                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2362
2363                 # if there are fewer than 4 headlines in the article, do not show TOC
2364                 if( $numMatches < 4 ) {
2365                         $doShowToc = 0;
2366                 }
2367
2368                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2369                 # override above conditions and always show TOC at that place
2370                 $mw =& MagicWord::get( MAG_TOC );
2371                 if ($mw->match( $text ) ) {
2372                         $doShowToc = 1;
2373                         $forceTocHere = true;
2374                 } else {
2375                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2376                         # override above conditions and always show TOC above first header
2377                         $mw =& MagicWord::get( MAG_FORCETOC );
2378                         if ($mw->matchAndRemove( $text ) ) {
2379                                 $doShowToc = 1;
2380                         }
2381                 }
2382
2383
2384
2385                 # We need this to perform operations on the HTML
2386                 $sk =& $this->mOptions->getSkin();
2387
2388                 # headline counter
2389                 $headlineCount = 0;
2390                 $sectionCount = 0; # headlineCount excluding template sections
2391
2392                 # Ugh .. the TOC should have neat indentation levels which can be
2393                 # passed to the skin functions. These are determined here
2394                 $toclevel = 0;
2395                 $toc = '';
2396                 $full = '';
2397                 $head = array();
2398                 $sublevelCount = array();
2399                 $level = 0;
2400                 $prevlevel = 0;
2401                 foreach( $matches[3] as $headline ) {
2402                         $istemplate = 0;
2403                         $templatetitle = "";
2404                         $templatesection = 0;
2405
2406                         if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2407                                 $istemplate = 1;
2408                                 $templatetitle = base64_decode($mat[1]);
2409                                 $templatesection = 1 + (int)base64_decode($mat[2]);
2410                                 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2411                         }
2412
2413                         $numbering = '';
2414                         if( $level ) {
2415                                 $prevlevel = $level;
2416                         }
2417                         $level = $matches[1][$headlineCount];
2418                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2419                                 # reset when we enter a new level
2420                                 $sublevelCount[$level] = 0;
2421                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2422                                 $toclevel += $level - $prevlevel;
2423                         }
2424                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2425                                 # reset when we step back a level
2426                                 $sublevelCount[$level+1]=0;
2427                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2428                                 $toclevel -= $prevlevel - $level;
2429                         }
2430                         # count number of headlines for each level
2431                         @$sublevelCount[$level]++;
2432                         if( $doNumberHeadings || $doShowToc ) {
2433                                 $dot = 0;
2434                                 for( $i = 1; $i <= $level; $i++ ) {
2435                                         if( !empty( $sublevelCount[$i] ) ) {
2436                                                 if( $dot ) {
2437                                                         $numbering .= '.';
2438                                                 }
2439                                                 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2440                                                 $dot = 1;
2441                                         }
2442                                 }
2443                         }
2444
2445                         # The canonized header is a version of the header text safe to use for links
2446                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2447                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2448                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2449
2450                         # Remove link placeholders by the link text.
2451                         #     <!--LINK number-->
2452                         # turns into
2453                         #     link text with suffix
2454                         $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2455                                                             "\$wgLinkHolders['texts'][\$1]",
2456                                                             $canonized_headline );
2457
2458                         # strip out HTML
2459                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2460                         $tocline = trim( $canonized_headline );
2461                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2462                         $replacearray = array(
2463                                 '%3A' => ':',
2464                                 '%' => '.'
2465                         );
2466                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2467                         $refer[$headlineCount] = $canonized_headline;
2468
2469                         # count how many in assoc. array so we can track dupes in anchors
2470                         @$refers[$canonized_headline]++;
2471                         $refcount[$headlineCount]=$refers[$canonized_headline];
2472
2473                         # Prepend the number to the heading text
2474
2475                         if( $doNumberHeadings || $doShowToc ) {
2476                                 $tocline = $numbering . ' ' . $tocline;
2477
2478                                 # Don't number the heading if it is the only one (looks silly)
2479                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2480                                         # the two are different if the line contains a link
2481                                         $headline=$numbering . ' ' . $headline;
2482                                 }
2483                         }
2484
2485                         # Create the anchor for linking from the TOC to the section
2486                         $anchor = $canonized_headline;
2487                         if($refcount[$headlineCount] > 1 ) {
2488                                 $anchor .= '_' . $refcount[$headlineCount];
2489                         }
2490                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2491                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2492                         }
2493                         if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2494                                 if ( empty( $head[$headlineCount] ) ) {
2495                                         $head[$headlineCount] = '';
2496                                 }
2497                                 if( $istemplate )
2498                                         $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2499                                 else
2500                                         $head[$headlineCount] .= $sk->editSectionLink($this->mTitle, $sectionCount+1);
2501                         }
2502
2503                         # Add the edit section span
2504                         if( $rightClickHack ) {
2505                                 if( $istemplate )
2506                                         $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2507                                 else
2508                                         $headline = $sk->editSectionScript($this->mTitle, $sectionCount+1,$headline);
2509                         }
2510
2511                         # give headline the correct <h#> tag
2512                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2513
2514                         $headlineCount++;
2515                         if( !$istemplate )
2516                                 $sectionCount++;
2517                 }
2518
2519                 if( $doShowToc ) {
2520                         $toclines = $headlineCount;
2521                         $toc .= $sk->tocUnindent( $toclevel );
2522                         $toc = $sk->tocTable( $toc );
2523                 }
2524
2525                 # split up and insert constructed headlines
2526
2527                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2528                 $i = 0;
2529
2530                 foreach( $blocks as $block ) {
2531                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2532                                 # This is the [edit] link that appears for the top block of text when
2533                                 # section editing is enabled
2534
2535                                 # Disabled because it broke block formatting
2536                                 # For example, a bullet point in the top line
2537                                 # $full .= $sk->editSectionLink(0);
2538                         }
2539                         $full .= $block;
2540                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2541                         # Top anchor now in skin
2542                                 $full = $full.$toc;
2543                         }
2544
2545                         if( !empty( $head[$i] ) ) {
2546                                 $full .= $head[$i];
2547                         }
2548                         $i++;
2549                 }
2550                 if($forceTocHere) {
2551                         $mw =& MagicWord::get( MAG_TOC );
2552                         return $mw->replace( $toc, $full );
2553                 } else {
2554                         return $full;
2555                 }
2556         }
2557
2558         /**
2559          * Return an HTML link for the "ISBN 123456" text
2560          * @access private
2561          */
2562         function magicISBN( $text ) {
2563                 global $wgLang;
2564                 $fname = 'Parser::magicISBN';
2565                 wfProfileIn( $fname );
2566
2567                 $a = split( 'ISBN ', ' '.$text );
2568                 if ( count ( $a ) < 2 ) {
2569                         wfProfileOut( $fname );
2570                         return $text;
2571                 }
2572                 $text = substr( array_shift( $a ), 1);
2573                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2574
2575                 foreach ( $a as $x ) {
2576                         $isbn = $blank = '' ;
2577                         while ( ' ' == $x{0} ) {
2578                                 $blank .= ' ';
2579                                 $x = substr( $x, 1 );
2580                         }
2581                         if ( $x == '' ) { # blank isbn
2582                                 $text .= "ISBN $blank";
2583                                 continue;
2584                         }
2585                         while ( strstr( $valid, $x{0} ) != false ) {
2586                                 $isbn .= $x{0};
2587                                 $x = substr( $x, 1 );
2588                         }
2589                         $num = str_replace( '-', '', $isbn );
2590                         $num = str_replace( ' ', '', $num );
2591
2592                         if ( '' == $num ) {
2593                                 $text .= "ISBN $blank$x";
2594                         } else {
2595                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2596                                 $text .= '<a href="' .
2597                                 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2598                                         "\" class=\"internal\">ISBN $isbn</a>";
2599                                 $text .= $x;
2600                         }
2601                 }
2602                 wfProfileOut( $fname );
2603                 return $text;
2604         }
2605
2606         /**
2607          * Return an HTML link for the "GEO ..." text
2608          * @access private
2609          */
2610         function magicGEO( $text ) {
2611                 global $wgLang, $wgUseGeoMode;
2612                 $fname = 'Parser::magicGEO';
2613                 wfProfileIn( $fname );
2614
2615                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2616                 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2617                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2618                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2619                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2620                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2621
2622                 $a = split( 'GEO ', ' '.$text );
2623                 if ( count ( $a ) < 2 ) {
2624                         wfProfileOut( $fname );
2625                         return $text;
2626                 }
2627                 $text = substr( array_shift( $a ), 1);
2628                 $valid = '0123456789.+-:';
2629
2630                 foreach ( $a as $x ) {
2631                         $geo = $blank = '' ;
2632                         while ( ' ' == $x{0} ) {
2633                                 $blank .= ' ';
2634                                 $x = substr( $x, 1 );
2635                         }
2636                         while ( strstr( $valid, $x{0} ) != false ) {
2637                                 $geo .= $x{0};
2638                                 $x = substr( $x, 1 );
2639                         }
2640                         $num = str_replace( '+', '', $geo );
2641                         $num = str_replace( ' ', '', $num );
2642
2643                         if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2644                                 $text .= "GEO $blank$x";
2645                         } else {
2646                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2647                                 $text .= '<a href="' .
2648                                 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2649                                         "\" class=\"internal\">GEO $geo</a>";
2650                                 $text .= $x;
2651                         }
2652                 }
2653                 wfProfileOut( $fname );
2654                 return $text;
2655         }
2656
2657         /**
2658          * Return an HTML link for the "RFC 1234" text
2659          * @access private
2660          * @param string $text text to be processed
2661          */
2662         function magicRFC( $text, $keyword='RFC ', $urlmsg='rfcurl'  ) {
2663                 global $wgLang;
2664
2665                 $valid = '0123456789';
2666                 $internal = false;
2667
2668                 $a = split( $keyword, ' '.$text );
2669                 if ( count ( $a ) < 2 ) {
2670                         return $text;
2671                 }
2672                 $text = substr( array_shift( $a ), 1);
2673
2674                 /* Check if keyword is preceed by [[.
2675                  * This test is made here cause of the array_shift above
2676                  * that prevent the test to be done in the foreach.
2677                  */
2678                 if ( substr( $text, -2 ) == '[[' ) {
2679                         $internal = true;
2680                 }
2681
2682                 foreach ( $a as $x ) {
2683                         /* token might be empty if we have RFC RFC 1234 */
2684                         if ( $x=='' ) {
2685                                 $text.=$keyword;
2686                                 continue;
2687                                 }
2688
2689                         $id = $blank = '' ;
2690
2691                         /** remove and save whitespaces in $blank */
2692                         while ( $x{0} == ' ' ) {
2693                                 $blank .= ' ';
2694                                 $x = substr( $x, 1 );
2695                         }
2696
2697                         /** remove and save the rfc number in $id */
2698                         while ( strstr( $valid, $x{0} ) != false ) {
2699                                 $id .= $x{0};
2700                                 $x = substr( $x, 1 );
2701                         }
2702
2703                         if ( $id == '' ) {
2704                                 /* call back stripped spaces*/
2705                                 $text .= $keyword.$blank.$x;
2706                         } elseif( $internal ) {
2707                                 /* normal link */
2708                                 $text .= $keyword.$id.$x;
2709                         } else {
2710                                 /* build the external link*/
2711                                 $url = wfmsg( $urlmsg );
2712                                 $url = str_replace( '$1', $id, $url);
2713                                 $sk =& $this->mOptions->getSkin();
2714                                 $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
2715                                 $text .= "<a href='{$url}'{$la}>{$keyword}{$id}</a>{$x}";
2716                         }
2717
2718                         /* Check if the next RFC keyword is preceed by [[ */
2719                         $internal = ( substr($x,-2) == '[[' );
2720                 }
2721                 return $text;
2722         }
2723
2724         /**
2725          * Transform wiki markup when saving a page by doing \r\n -> \n
2726          * conversion, substitting signatures, {{subst:}} templates, etc.
2727          *
2728          * @param string $text the text to transform
2729          * @param Title &$title the Title object for the current article
2730          * @param User &$user the User object describing the current user
2731          * @param ParserOptions $options parsing options
2732          * @param bool $clearState whether to clear the parser state first
2733          * @return string the altered wiki markup
2734          * @access public
2735          */
2736         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2737                 $this->mOptions = $options;
2738                 $this->mTitle =& $title;
2739                 $this->mOutputType = OT_WIKI;
2740
2741                 if ( $clearState ) {
2742                         $this->clearState();
2743                 }
2744
2745                 $stripState = false;
2746                 $pairs = array(
2747                         "\r\n" => "\n",
2748                         );
2749                 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
2750                 $text = $this->strip( $text, $stripState, false );
2751                 $text = $this->pstPass2( $text, $user );
2752                 $text = $this->unstrip( $text, $stripState );
2753                 $text = $this->unstripNoWiki( $text, $stripState );
2754                 return $text;
2755         }
2756
2757         /**
2758          * Pre-save transform helper function
2759          * @access private
2760          */
2761         function pstPass2( $text, &$user ) {
2762                 global $wgLang, $wgContLang, $wgLocaltimezone;
2763
2764                 # Variable replacement
2765                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2766                 $text = $this->replaceVariables( $text );
2767
2768                 # Signatures
2769                 #
2770                 $n = $user->getName();
2771                 $k = $user->getOption( 'nickname' );
2772                 if ( '' == $k ) { $k = $n; }
2773                 if ( isset( $wgLocaltimezone ) ) {
2774                         $oldtz = getenv( 'TZ' );
2775                         putenv( 'TZ='.$wgLocaltimezone );
2776                 }
2777                 /* Note: this is an ugly timezone hack for the European wikis */
2778                 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2779                   ' (' . date( 'T' ) . ')';
2780                 if ( isset( $wgLocaltimezone ) ) {
2781                         putenv( 'TZ='.$oldtzs );
2782                 }
2783
2784                 $text = preg_replace( '/~~~~~~/', $d, $text );
2785                 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2786                 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2787
2788                 # Context links: [[|name]] and [[name (context)|]]
2789                 #
2790                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2791                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2792                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2793                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2794
2795                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2796                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2797                 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/";                # [[namespace:page|]] and [[:namespace:page|]]
2798                 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2799                 $context = '';
2800                 $t = $this->mTitle->getText();
2801                 if ( preg_match( $conpat, $t, $m ) ) {
2802                         $context = $m[2];
2803                 }
2804                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2805                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2806                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2807
2808                 if ( '' == $context ) {
2809                         $text = preg_replace( $p2, '[[\\1]]', $text );
2810                 } else {
2811                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2812                 }
2813
2814                 # Trim trailing whitespace
2815                 # MAG_END (__END__) tag allows for trailing
2816                 # whitespace to be deliberately included
2817                 $text = rtrim( $text );
2818                 $mw =& MagicWord::get( MAG_END );
2819                 $mw->matchAndRemove( $text );
2820
2821                 return $text;
2822         }
2823
2824         /**
2825          * Set up some variables which are usually set up in parse()
2826          * so that an external function can call some class members with confidence
2827          * @access public
2828          */
2829         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2830                 $this->mTitle =& $title;
2831                 $this->mOptions = $options;
2832                 $this->mOutputType = $outputType;
2833                 if ( $clearState ) {
2834                         $this->clearState();
2835                 }
2836         }
2837
2838         /**
2839          * Transform a MediaWiki message by replacing magic variables.
2840          *
2841          * @param string $text the text to transform
2842          * @param ParserOptions $options  options
2843          * @return string the text with variables substituted
2844          * @access public
2845          */
2846         function transformMsg( $text, $options ) {
2847                 global $wgTitle;
2848                 static $executing = false;
2849
2850                 # Guard against infinite recursion
2851                 if ( $executing ) {
2852                         return $text;
2853                 }
2854                 $executing = true;
2855
2856                 $this->mTitle = $wgTitle;
2857                 $this->mOptions = $options;
2858                 $this->mOutputType = OT_MSG;
2859                 $this->clearState();
2860                 $text = $this->replaceVariables( $text );
2861
2862                 $executing = false;
2863                 return $text;
2864         }
2865
2866         /**
2867          * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2868          * Callback will be called with the text within
2869          * Transform and return the text within
2870          * @access public
2871          */
2872         function setHook( $tag, $callback ) {
2873                 $oldVal = @$this->mTagHooks[$tag];
2874                 $this->mTagHooks[$tag] = $callback;
2875                 return $oldVal;
2876         }
2877
2878         /**
2879          * Replace <!--LINK--> link placeholders with actual links, in the buffer
2880          * Placeholders created in Skin::makeLinkObj()
2881          * Returns an array of links found, indexed by PDBK:
2882          *  0 - broken
2883          *  1 - normal link
2884          *  2 - stub
2885          * $options is a bit field, RLH_FOR_UPDATE to select for update
2886          */
2887         function replaceLinkHolders( &$text, $options = 0 ) {
2888                 global $wgUser, $wgLinkCache, $wgUseOldExistenceCheck, $wgLinkHolders;
2889                 global $wgInterwikiLinkHolders;
2890                 global $outputReplace;
2891
2892                 if ( $wgUseOldExistenceCheck ) {
2893                         return array();
2894                 }
2895
2896                 $fname = 'Parser::replaceLinkHolders';
2897                 wfProfileIn( $fname );
2898
2899                 $pdbks = array();
2900                 $colours = array();
2901
2902                 #if ( !empty( $tmpLinks[0] ) ) { #TODO
2903                 if ( !empty( $wgLinkHolders['namespaces'] ) ) {
2904                         wfProfileIn( $fname.'-check' );
2905                         $dbr =& wfGetDB( DB_SLAVE );
2906                         $cur = $dbr->tableName( 'cur' );
2907                         $sk = $wgUser->getSkin();
2908                         $threshold = $wgUser->getOption('stubthreshold');
2909
2910                         # Sort by namespace
2911                         asort( $wgLinkHolders['namespaces'] );
2912
2913                         # Generate query
2914                         $query = false;
2915                         foreach ( $wgLinkHolders['namespaces'] as $key => $val ) {
2916                                 # Make title object
2917                                 $title = $wgLinkHolders['titles'][$key];
2918
2919                                 # Skip invalid entries.
2920                                 # Result will be ugly, but prevents crash.
2921                                 if ( is_null( $title ) ) {
2922                                         continue;
2923                                 }
2924                                 $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
2925
2926                                 # Check if it's in the link cache already
2927                                 if ( $wgLinkCache->getGoodLinkID( $pdbk ) ) {
2928                                         $colours[$pdbk] = 1;
2929                                 } elseif ( $wgLinkCache->isBadLink( $pdbk ) ) {
2930                                         $colours[$pdbk] = 0;
2931                                 } else {
2932                                         # Not in the link cache, add it to the query
2933                                         if ( !isset( $current ) ) {
2934                                                 $current = $val;
2935                                                 $query =  "SELECT cur_id, cur_namespace, cur_title";
2936                                                 if ( $threshold > 0 ) {
2937                                                         $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect";
2938                                                 }
2939                                                 $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN(";
2940                                         } elseif ( $current != $val ) {
2941                                                 $current = $val;
2942                                                 $query .= ")) OR (cur_namespace=$val AND cur_title IN(";
2943                                         } else {
2944                                                 $query .= ', ';
2945                                         }
2946
2947                                         $query .= $dbr->addQuotes( $wgLinkHolders['dbkeys'][$key] );
2948                                 }
2949                         }
2950                         if ( $query ) {
2951                                 $query .= '))';
2952                                 if ( $options & RLH_FOR_UPDATE ) {
2953                                         $query .= ' FOR UPDATE';
2954                                 }
2955
2956                                 $res = $dbr->query( $query, $fname );
2957
2958                                 # Fetch data and form into an associative array
2959                                 # non-existent = broken
2960                                 # 1 = known
2961                                 # 2 = stub
2962                                 while ( $s = $dbr->fetchObject($res) ) {
2963                                         $title = Title::makeTitle( $s->cur_namespace, $s->cur_title );
2964                                         $pdbk = $title->getPrefixedDBkey();
2965                                         $wgLinkCache->addGoodLink( $s->cur_id, $pdbk );
2966
2967                                         if ( $threshold >  0 ) {
2968                                                 $size = $s->cur_len;
2969                                                 if ( $s->cur_is_redirect || $s->cur_namespace != 0 || $length < $threshold ) {
2970                                                         $colours[$pdbk] = 1;
2971                                                 } else {
2972                                                         $colours[$pdbk] = 2;
2973                                                 }
2974                                         } else {
2975                                                 $colours[$pdbk] = 1;
2976                                         }
2977                                 }
2978                         }
2979                         wfProfileOut( $fname.'-check' );
2980
2981                         # Construct search and replace arrays
2982                         wfProfileIn( $fname.'-construct' );
2983                         $outputReplace = array();
2984                         foreach ( $wgLinkHolders['namespaces'] as $key => $ns ) {
2985                                 $pdbk = $pdbks[$key];
2986                                 $searchkey = '<!--LINK '.$key.'-->';
2987                                 $title = $wgLinkHolders['titles'][$key];
2988                                 if ( empty( $colours[$pdbk] ) ) {
2989                                         $wgLinkCache->addBadLink( $pdbk );
2990                                         $colours[$pdbk] = 0;
2991                                         $outputReplace[$searchkey] = $sk->makeBrokenLinkObj( $title,
2992                                                                         $wgLinkHolders['texts'][$key],
2993                                                                         $wgLinkHolders['queries'][$key] );
2994                                 } elseif ( $colours[$pdbk] == 1 ) {
2995                                         $outputReplace[$searchkey] = $sk->makeKnownLinkObj( $title,
2996                                                                         $wgLinkHolders['texts'][$key],
2997                                                                         $wgLinkHolders['queries'][$key] );
2998                                 } elseif ( $colours[$pdbk] == 2 ) {
2999                                         $outputReplace[$searchkey] = $sk->makeStubLinkObj( $title,
3000                                                                         $wgLinkHolders['texts'][$key],
3001                                                                         $wgLinkHolders['queries'][$key] );
3002                                 }
3003                         }
3004                         wfProfileOut( $fname.'-construct' );
3005
3006                         # Do the thing
3007                         wfProfileIn( $fname.'-replace' );
3008
3009                         $text = preg_replace_callback(
3010                                 '/(<!--LINK .*?-->)/',
3011                                 "outputReplaceMatches",
3012                                 $text);
3013                         wfProfileOut( $fname.'-replace' );
3014                 }
3015
3016                 if ( !empty( $wgInterwikiLinkHolders ) ) {
3017                         wfProfileIn( $fname.'-interwiki' );
3018                         $outputReplace = $wgInterwikiLinkHolders;
3019                         $text = preg_replace_callback(
3020                                 '/<!--IWLINK (.*?)-->/',
3021                                 "outputReplaceMatches",
3022                                 $text );
3023                         wfProfileOut( $fname.'-interwiki' );
3024                 }
3025
3026                 wfProfileOut( $fname );
3027                 return $colours;
3028         }
3029
3030         /**
3031          * Renders an image gallery from a text with one line per image.
3032          * text labels may be given by using |-style alternative text. E.g.
3033          *   Image:one.jpg|The number "1"
3034          *   Image:tree.jpg|A tree
3035          * given as text will return the HTML of a gallery with two images,
3036          * labeled 'The number "1"' and
3037          * 'A tree'.
3038          */
3039         function renderImageGallery( $text ) {
3040                 global $wgLinkCache;
3041                 $ig = new ImageGallery();
3042                 $ig->setShowBytes( false );
3043                 $ig->setShowFilename( false );
3044                 $lines = explode( "\n", $text );
3045
3046                 foreach ( $lines as $line ) {
3047                         # match lines like these:
3048                         # Image:someimage.jpg|This is some image
3049                         preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
3050                         # Skip empty lines
3051                         if ( count( $matches ) == 0 ) {
3052                                 continue;
3053                         }
3054                         $nt = Title::newFromURL( $matches[1] );
3055                         if ( isset( $matches[3] ) ) {
3056                                 $label = $matches[3];
3057                         } else {
3058                                 $label = '';
3059                         }
3060                         $ig->add( Image::newFromTitle( $nt ), $label );
3061                         $wgLinkCache->addImageLinkObj( $nt );
3062                 }
3063                 return $ig->toHTML();
3064         }
3065 }
3066
3067 /**
3068  * @todo document
3069  * @package MediaWiki
3070  */
3071 class ParserOutput
3072 {
3073         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
3074         var $mCacheTime; # Used in ParserCache
3075         var $mVersion;   # Compatibility check
3076
3077         function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
3078                 $containsOldMagic = false )
3079         {
3080                 $this->mText = $text;
3081                 $this->mLanguageLinks = $languageLinks;
3082                 $this->mCategoryLinks = $categoryLinks;
3083                 $this->mContainsOldMagic = $containsOldMagic;
3084                 $this->mCacheTime = '';
3085                 $this->mVersion = MW_PARSER_VERSION;
3086         }
3087
3088         function getText()                   { return $this->mText; }
3089         function getLanguageLinks()          { return $this->mLanguageLinks; }
3090         function getCategoryLinks()          { return array_keys( $this->mCategoryLinks ); }
3091         function getCacheTime()              { return $this->mCacheTime; }
3092         function containsOldMagic()          { return $this->mContainsOldMagic; }
3093         function setText( $text )            { return wfSetVar( $this->mText, $text ); }
3094         function setLanguageLinks( $ll )     { return wfSetVar( $this->mLanguageLinks, $ll ); }
3095         function setCategoryLinks( $cl )     { return wfSetVar( $this->mCategoryLinks, $cl ); }
3096         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
3097         function setCacheTime( $t )          { return wfSetVar( $this->mCacheTime, $t ); }
3098         function addCategoryLink( $c )       { $this->mCategoryLinks[$c] = 1; }
3099
3100         function merge( $other ) {
3101                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
3102                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
3103                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
3104         }
3105
3106         /**
3107          * Return true if this cached output object predates the global or
3108          * per-article cache invalidation timestamps, or if it comes from
3109          * an incompatible older version.
3110          *
3111          * @param string $touched the affected article's last touched timestamp
3112          * @return bool
3113          * @access public
3114          */
3115         function expired( $touched ) {
3116                 global $wgCacheEpoch;
3117                 return $this->getCacheTime() <= $touched ||
3118                        $this->getCacheTime() <= $wgCacheEpoch ||
3119                        !isset( $this->mVersion ) ||
3120                        version_compare( $this->mVersion, MW_PARSER_VERSION, "lt" );
3121         }
3122 }
3123
3124 /**
3125  * Set options of the Parser
3126  * @todo document
3127  * @package MediaWiki
3128  */
3129 class ParserOptions
3130 {
3131         # All variables are private
3132         var $mUseTeX;                    # Use texvc to expand <math> tags
3133         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
3134         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
3135         var $mAllowExternalImages;       # Allow external images inline
3136         var $mSkin;                      # Reference to the preferred skin
3137         var $mDateFormat;                # Date format index
3138         var $mEditSection;               # Create "edit section" links
3139         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
3140         var $mNumberHeadings;            # Automatically number headings
3141         var $mShowToc;                   # Show table of contents
3142
3143         function getUseTeX()                        { return $this->mUseTeX; }
3144         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
3145         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
3146         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
3147         function getSkin()                          { return $this->mSkin; }
3148         function getDateFormat()                    { return $this->mDateFormat; }
3149         function getEditSection()                   { return $this->mEditSection; }
3150         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
3151         function getNumberHeadings()                { return $this->mNumberHeadings; }
3152         function getShowToc()                       { return $this->mShowToc; }
3153
3154         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
3155         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
3156         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
3157         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
3158         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
3159         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
3160         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
3161         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
3162         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
3163
3164         function setSkin( &$x ) { $this->mSkin =& $x; }
3165
3166         # Get parser options
3167         /* static */ function newFromUser( &$user ) {
3168                 $popts = new ParserOptions;
3169                 $popts->initialiseFromUser( $user );
3170                 return $popts;
3171         }
3172
3173         # Get user options
3174         function initialiseFromUser( &$userInput ) {
3175                 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
3176                 $fname = 'ParserOptions::initialiseFromUser';
3177                 wfProfileIn( $fname );
3178                 if ( !$userInput ) {
3179                         $user = new User;
3180                         $user->setLoaded( true );
3181                 } else {
3182                         $user =& $userInput;
3183                 }
3184
3185                 $this->mUseTeX = $wgUseTeX;
3186                 $this->mUseDynamicDates = $wgUseDynamicDates;
3187                 $this->mInterwikiMagic = $wgInterwikiMagic;
3188                 $this->mAllowExternalImages = $wgAllowExternalImages;
3189                 wfProfileIn( $fname.'-skin' );
3190                 $this->mSkin =& $user->getSkin();
3191                 wfProfileOut( $fname.'-skin' );
3192                 $this->mDateFormat = $user->getOption( 'date' );
3193                 $this->mEditSection = $user->getOption( 'editsection' );
3194                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
3195                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
3196                 $this->mShowToc = $user->getOption( 'showtoc' );
3197                 wfProfileOut( $fname );
3198         }
3199
3200
3201 }
3202
3203 /**
3204  * Callback function used by Parser::replaceLinkHolders()
3205  * to substitute link placeholders.
3206  */
3207 function &outputReplaceMatches( $matches ) {
3208         global $outputReplace;
3209         return $outputReplace[$matches[1]];
3210 }
3211
3212 /**
3213  * Return the total number of articles
3214  */
3215 function wfNumberOfArticles() {
3216         global $wgNumberOfArticles;
3217
3218         wfLoadSiteStats();
3219         return $wgNumberOfArticles;
3220 }
3221
3222 /**
3223  * Get various statistics from the database
3224  * @private
3225  */
3226 function wfLoadSiteStats() {
3227         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
3228         $fname = 'wfLoadSiteStats';
3229
3230         if ( -1 != $wgNumberOfArticles ) return;
3231         $dbr =& wfGetDB( DB_SLAVE );
3232         $s = $dbr->selectRow( 'site_stats',
3233                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
3234                 array( 'ss_row_id' => 1 ), $fname
3235         );
3236
3237         if ( $s === false ) {
3238                 return;
3239         } else {
3240                 $wgTotalViews = $s->ss_total_views;
3241                 $wgTotalEdits = $s->ss_total_edits;
3242                 $wgNumberOfArticles = $s->ss_good_articles;
3243         }
3244 }
3245
3246 function wfEscapeHTMLTagsOnly( $in ) {
3247         return str_replace(
3248                 array( '"', '>', '<' ),
3249                 array( '&quot;', '&gt;', '&lt;' ),
3250                 $in );
3251 }
3252
3253 ?>