includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 /**
   6  * File for Parser and related classes
   7  *
   8  * @package MediaWiki
   9  * @version $Id$
  10  */
  11
  12 /**
  13  * Variable substitution O(N^2) attack
  14  *
  15  * Without countermeasures, it would be possible to attack the parser by saving
  16  * a page filled with a large number of inclusions of large pages. The size of
  17  * the generated page would be proportional to the square of the input size.
  18  * Hence, we limit the number of inclusions of any given page, thus bringing any
  19  * attack back to O(N).
  20  */
  21 define( 'MAX_INCLUDE_REPEAT', 100 );
  22 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
  23
  24 # Allowed values for $mOutputType
  25 define( 'OT_HTML', 1 );
  26 define( 'OT_WIKI', 2 );
  27 define( 'OT_MSG' , 3 );
  28
  29 # string parameter for extractTags which will cause it
  30 # to strip HTML comments in addition to regular
  31 # <XML>-style tags. This should not be anything we
  32 # may want to use in wikisyntax
  33 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  34
  35 # prefix for escaping, used in two functions at least
  36 define( 'UNIQ_PREFIX', 'NaodW29');
  37
  38 # Constants needed for external link processing
  39 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  40 define( 'HTTP_PROTOCOLS', 'http|https' );
  41 # Everything except bracket, space, or control characters
  42 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  43 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  44 # Including space
  45 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  46 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  47 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  48 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  49 define( 'EXT_IMAGE_REGEX',
  50         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  51         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  52         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  53 );
  54
  55 $wgCurrentSectionNumber = 0; # XXX
  56
  57 /**
  58  * PHP Parser
  59  *
  60  * Processes wiki markup
  61  *
  62  * <pre>
  63  * There are three main entry points into the Parser class:
  64  * parse()
  65  *   produces HTML output
  66  * preSaveTransform().
  67  *   produces altered wiki markup.
  68  * transformMsg()
  69  *   performs brace substitution on MediaWiki messages
  70  *
  71  * Globals used:
  72  *    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  73  *
  74  * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  75  *
  76  * settings:
  77  *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  78  *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  79  *  $wgLocaltimezone
  80  *
  81  *  * only within ParserOptions
  82  * </pre>
  83  *
  84  * @package MediaWiki
  85  */
  86 class Parser
  87 {
  88         /**#@+
  89          * @access private
  90          */
  91         # Persistent:
  92         var $mTagHooks;
  93
  94         # Cleared with clearState():
  95         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  96         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  97
  98         # Temporary:
  99         var $mOptions, $mTitle, $mOutputType,
 100             $mTemplates,        // cache of already loaded templates, avoids
 101                                 // multiple SQL queries for the same string
 102             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
 103                                 // in this path. Used for loop detection.
 104
 105         /**#@-*/
 106
 107         /**
 108          * Constructor
 109          *
 110          * @access public
 111          */
 112         function Parser() {
 113                 $this->mTemplates = array();
 114                 $this->mTemplatePath = array();
 115                 $this->mTagHooks = array();
 116                 $this->clearState();
 117         }
 118
 119         /**
 120          * Clear Parser state
 121          *
 122          * @access private
 123          */
 124         function clearState() {
 125                 $this->mOutput = new ParserOutput;
 126                 $this->mAutonumber = 0;
 127                 $this->mLastSection = "";
 128                 $this->mDTopen = false;
 129                 $this->mVariables = false;
 130                 $this->mIncludeCount = array();
 131                 $this->mStripState = array();
 132                 $this->mArgStack = array();
 133                 $this->mInPre = false;
 134         }
 135
 136         /**
 137          * First pass--just handle <nowiki> sections, pass the rest off
 138          * to internalParse() which does all the real work.
 139          *
 140          * @access private
 141          * @return ParserOutput a ParserOutput
 142          */
 143         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 144                 global $wgUseTidy;
 145                 $fname = 'Parser::parse';
 146                 wfProfileIn( $fname );
 147
 148                 if ( $clearState ) {
 149                         $this->clearState();
 150                 }
 151
 152                 $this->mOptions = $options;
 153                 $this->mTitle =& $title;
 154                 $this->mOutputType = OT_HTML;
 155
 156                 $stripState = NULL;
 157                 $text = $this->strip( $text, $this->mStripState );
 158                 $text = $this->internalParse( $text, $linestart );
 159                 $text = $this->unstrip( $text, $this->mStripState );
 160                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 161                 if(!$wgUseTidy) {
 162                         $fixtags = array(
 163                                 # french spaces, last one Guillemet-left
 164                                 # only if there is something before the space
 165                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 166                                 # french spaces, Guillemet-right
 167                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 168                                 '/<hr *>/i' => '<hr />',
 169                                 '/<br *>/i' => '<br />',
 170                                 '/<center *>/i' => '<div class="center">',
 171                                 '/<\\/center *>/i' => '</div>',
 172                                 # Clean up spare ampersands; note that we probably ought to be
 173                                 # more careful about named entities.
 174                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 175                         );
 176                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 177                 } else {
 178                         $fixtags = array(
 179                                 # french spaces, last one Guillemet-left
 180                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 181                                 # french spaces, Guillemet-right
 182                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 183                                 '/<center *>/i' => '<div class="center">',
 184                                 '/<\\/center *>/i' => '</div>'
 185                         );
 186                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 187                 }
 188                 # only once and last
 189                 $text = $this->doBlockLevels( $text, $linestart );
 190                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 191                 if($wgUseTidy) {
 192                         $text = $this->tidy($text);
 193                 }
 194                 $this->mOutput->setText( $text );
 195                 wfProfileOut( $fname );
 196                 return $this->mOutput;
 197         }
 198
 199         /**
 200          * Get a random string
 201          *
 202          * @access private
 203          * @static
 204          */
 205         function getRandomString() {
 206                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 207         }
 208
 209         /**
 210          * Replaces all occurrences of <$tag>content</$tag> in the text
 211          * with a random marker and returns the new text. the output parameter
 212          * $content will be an associative array filled with data on the form
 213          * $unique_marker => content.
 214          *
 215          * If $content is already set, the additional entries will be appended
 216          * If $tag is set to STRIP_COMMENTS, the function will extract
 217          * <!-- HTML comments -->
 218          *
 219          * @access private
 220          * @static
 221          */
 222         function extractTags($tag, $text, &$content, $uniq_prefix = ''){
 223                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 224                 if ( !$content ) {
 225                         $content = array( );
 226                 }
 227                 $n = 1;
 228                 $stripped = '';
 229
 230                 while ( '' != $text ) {
 231                         if($tag==STRIP_COMMENTS) {
 232                                 $p = preg_split( '/<!--/i', $text, 2 );
 233                         } else {
 234                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 235                         }
 236                         $stripped .= $p[0];
 237                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 238                                 $text = '';
 239                         } else {
 240                                 if($tag==STRIP_COMMENTS) {
 241                                         $q = preg_split( '/-->/i', $p[1], 2 );
 242                                 } else {
 243                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 244                                 }
 245                                 $marker = $rnd . sprintf('%08X', $n++);
 246                                 $content[$marker] = $q[0];
 247                                 $stripped .= $marker;
 248                                 $text = $q[1];
 249                         }
 250                 }
 251                 return $stripped;
 252         }
 253
 254         /**
 255          * Strips and renders nowiki, pre, math, hiero
 256          * If $render is set, performs necessary rendering operations on plugins
 257          * Returns the text, and fills an array with data needed in unstrip()
 258          * If the $state is already a valid strip state, it adds to the state
 259          *
 260          * @param bool $stripcomments when set, HTML comments <!-- like this -->
 261          *  will be stripped in addition to other tags. This is important
 262          *  for section editing, where these comments cause confusion when
 263          *  counting the sections in the wikisource
 264          *
 265          * @access private
 266          */
 267         function strip( $text, &$state, $stripcomments = false ) {
 268                 $render = ($this->mOutputType == OT_HTML);
 269                 $html_content = array();
 270                 $nowiki_content = array();
 271                 $math_content = array();
 272                 $pre_content = array();
 273                 $comment_content = array();
 274                 $ext_content = array();
 275
 276                 # Replace any instances of the placeholders
 277                 $uniq_prefix = UNIQ_PREFIX;
 278                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 279
 280                 # html
 281                 global $wgRawHtml, $wgWhitelistEdit;
 282                 if( $wgRawHtml && $wgWhitelistEdit ) {
 283                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 284                         foreach( $html_content as $marker => $content ) {
 285                                 if ($render ) {
 286                                         # Raw and unchecked for validity.
 287                                         $html_content[$marker] = $content;
 288                                 } else {
 289                                         $html_content[$marker] = '<html>'.$content.'</html>';
 290                                 }
 291                         }
 292                 }
 293
 294                 # nowiki
 295                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 296                 foreach( $nowiki_content as $marker => $content ) {
 297                         if( $render ){
 298                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 299                         } else {
 300                                 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
 301                         }
 302                 }
 303
 304                 # math
 305                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 306                 foreach( $math_content as $marker => $content ){
 307                         if( $render ) {
 308                                 if( $this->mOptions->getUseTeX() ) {
 309                                         $math_content[$marker] = renderMath( $content );
 310                                 } else {
 311                                         $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
 312                                 }
 313                         } else {
 314                                 $math_content[$marker] = '<math>'.$content.'</math>';
 315                         }
 316                 }
 317
 318                 # pre
 319                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 320                 foreach( $pre_content as $marker => $content ){
 321                         if( $render ){
 322                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 323                         } else {
 324                                 $pre_content[$marker] = '<pre>'.$content.'</pre>';
 325                         }
 326                 }
 327
 328                 # Comments
 329                 if($stripcomments) {
 330                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 331                         foreach( $comment_content as $marker => $content ){
 332                                 $comment_content[$marker] = '<!--'.$content.'-->';
 333                         }
 334                 }
 335
 336                 # Extensions
 337                 foreach ( $this->mTagHooks as $tag => $callback ) {
 338                         $ext_contents[$tag] = array();
 339                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 340                         foreach( $ext_content[$tag] as $marker => $content ) {
 341                                 if ( $render ) {
 342                                         $ext_content[$tag][$marker] = $callback( $content );
 343                                 } else {
 344                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 345                                 }
 346                         }
 347                 }
 348
 349                 # Merge state with the pre-existing state, if there is one
 350                 if ( $state ) {
 351                         $state['html'] = $state['html'] + $html_content;
 352                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 353                         $state['math'] = $state['math'] + $math_content;
 354                         $state['pre'] = $state['pre'] + $pre_content;
 355                         $state['comment'] = $state['comment'] + $comment_content;
 356
 357                         foreach( $ext_content as $tag => $array ) {
 358                                 if ( array_key_exists( $tag, $state ) ) {
 359                                         $state[$tag] = $state[$tag] + $array;
 360                                 }
 361                         }
 362                 } else {
 363                         $state = array(
 364                           'html' => $html_content,
 365                           'nowiki' => $nowiki_content,
 366                           'math' => $math_content,
 367                           'pre' => $pre_content,
 368                           'comment' => $comment_content,
 369                         ) + $ext_content;
 370                 }
 371                 return $text;
 372         }
 373
 374         /**
 375          * restores pre, math, and heiro removed by strip()
 376          *
 377          * always call unstripNoWiki() after this one
 378          * @access private
 379          */
 380         function unstrip( $text, &$state ) {
 381                 # Must expand in reverse order, otherwise nested tags will be corrupted
 382                 $contentDict = end( $state );
 383                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 384                         if( key($state) != 'nowiki' && key($state) != 'html') {
 385                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 386                                         $text = str_replace( key( $contentDict ), $content, $text );
 387                                 }
 388                         }
 389                 }
 390
 391                 return $text;
 392         }
 393
 394         /**
 395          * always call this after unstrip() to preserve the order
 396          *
 397          * @access private
 398          */
 399         function unstripNoWiki( $text, &$state ) {
 400                 # Must expand in reverse order, otherwise nested tags will be corrupted
 401                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 402                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 403                 }
 404
 405                 global $wgRawHtml;
 406                 if ($wgRawHtml) {
 407                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 408                                 $text = str_replace( key( $state['html'] ), $content, $text );
 409                         }
 410                 }
 411
 412                 return $text;
 413         }
 414
 415         /**
 416          * Add an item to the strip state
 417          * Returns the unique tag which must be inserted into the stripped text
 418          * The tag will be replaced with the original text in unstrip()
 419          *
 420          * @access private
 421          */
 422         function insertStripItem( $text, &$state ) {
 423                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 424                 if ( !$state ) {
 425                         $state = array(
 426                           'html' => array(),
 427                           'nowiki' => array(),
 428                           'math' => array(),
 429                           'pre' => array()
 430                         );
 431                 }
 432                 $state['item'][$rnd] = $text;
 433                 return $rnd;
 434         }
 435
 436         /**
 437          * Return allowed HTML attributes
 438          *
 439          * @access private
 440          */
 441         function getHTMLattrs () {
 442                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 443                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 444                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 445                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 446                                 /* FONT */ 'type', 'start', 'value', 'compact',
 447                                 /* For various lists, mostly deprecated but safe */
 448                                 'summary', 'width', 'border', 'frame', 'rules',
 449                                 'cellspacing', 'cellpadding', 'valign', 'char',
 450                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 451                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 452                                 'id', 'class', 'name', 'style' /* For CSS */
 453                                 );
 454                 return $htmlattrs ;
 455         }
 456
 457         /**
 458          * Remove non approved attributes and javascript in css
 459          *
 460          * @access private
 461          */
 462         function fixTagAttributes ( $t ) {
 463                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 464                 $htmlattrs = $this->getHTMLattrs() ;
 465
 466                 # Strip non-approved attributes from the tag
 467                 $t = preg_replace(
 468                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 469                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 470                         $t);
 471
 472                 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
 473
 474                 # Strip javascript "expression" from stylesheets. Brute force approach:
 475                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 476
 477                 if( preg_match(
 478                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 479                         wfMungeToUtf8( $t ) ) )
 480                 {
 481                         $t='';
 482                 }
 483
 484                 return trim ( $t ) ;
 485         }
 486
 487         /**
 488          * interface with html tidy, used if $wgUseTidy = true
 489          *
 490          * @access private
 491          */
 492         function tidy ( $text ) {
 493                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 494                 global $wgInputEncoding, $wgOutputEncoding;
 495                 $fname = 'Parser::tidy';
 496                 wfProfileIn( $fname );
 497
 498                 $cleansource = '';
 499                 switch(strtoupper($wgOutputEncoding)) {
 500                         case 'ISO-8859-1':
 501                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 502                                 break;
 503                         case 'UTF-8':
 504                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 505                                 break;
 506                         default:
 507                                 $wgTidyOpts .= ' -raw';
 508                         }
 509
 510                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 511 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 512 '<head><title>test</title></head><body>'.$text.'</body></html>';
 513                 $descriptorspec = array(
 514                         0 => array('pipe', 'r'),
 515                         1 => array('pipe', 'w'),
 516                         2 => array('file', '/dev/null', 'a')
 517                 );
 518                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 519                 if (is_resource($process)) {
 520                         fwrite($pipes[0], $wrappedtext);
 521                         fclose($pipes[0]);
 522                         while (!feof($pipes[1])) {
 523                                 $cleansource .= fgets($pipes[1], 1024);
 524                         }
 525                         fclose($pipes[1]);
 526                         $return_value = proc_close($process);
 527                 }
 528
 529                 wfProfileOut( $fname );
 530
 531                 if( $cleansource == '' && $text != '') {
 532                         wfDebug( "Tidy error detected!\n" );
 533                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 534                 } else {
 535                         return $cleansource;
 536                 }
 537         }
 538
 539         /**
 540          * parse the wiki syntax used to render tables
 541          *
 542          * @access private
 543          */
 544         function doTableStuff ( $t ) {
 545                 $fname = 'Parser::doTableStuff';
 546                 wfProfileIn( $fname );
 547
 548                 $t = explode ( "\n" , $t ) ;
 549                 $td = array () ; # Is currently a td tag open?
 550                 $ltd = array () ; # Was it TD or TH?
 551                 $tr = array () ; # Is currently a tr tag open?
 552                 $ltr = array () ; # tr attributes
 553                 $indent_level = 0; # indent level of the table
 554                 foreach ( $t AS $k => $x )
 555                 {
 556                         $x = trim ( $x ) ;
 557                         $fc = substr ( $x , 0 , 1 ) ;
 558                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 559                                 $indent_level = strlen( $matches[1] );
 560                                 $t[$k] = "\n" .
 561                                         str_repeat( '<dl><dd>', $indent_level ) .
 562                                         '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 563                                 array_push ( $td , false ) ;
 564                                 array_push ( $ltd , '' ) ;
 565                                 array_push ( $tr , false ) ;
 566                                 array_push ( $ltr , '' ) ;
 567                         }
 568                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 569                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 570                                 $z = "</table>\n" ;
 571                                 $l = array_pop ( $ltd ) ;
 572                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 573                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 574                                 array_pop ( $ltr ) ;
 575                                 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
 576                         }
 577                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 578                                 $x = substr ( $x , 1 ) ;
 579                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 580                                 $z = '' ;
 581                                 $l = array_pop ( $ltd ) ;
 582                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 583                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 584                                 array_pop ( $ltr ) ;
 585                                 $t[$k] = $z ;
 586                                 array_push ( $tr , false ) ;
 587                                 array_push ( $td , false ) ;
 588                                 array_push ( $ltd , '' ) ;
 589                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 590                         }
 591                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 592                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 593                                         $fc = '+' ;
 594                                         $x = substr ( $x , 1 ) ;
 595                                 }
 596                                 $after = substr ( $x , 1 ) ;
 597                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 598                                 $after = explode ( '||' , $after ) ;
 599                                 $t[$k] = '' ;
 600                                 foreach ( $after AS $theline )
 601                                 {
 602                                         $z = '' ;
 603                                         if ( $fc != '+' )
 604                                         {
 605                                                 $tra = array_pop ( $ltr ) ;
 606                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 607                                                 array_push ( $tr , true ) ;
 608                                                 array_push ( $ltr , '' ) ;
 609                                         }
 610
 611                                         $l = array_pop ( $ltd ) ;
 612                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 613                                         if ( $fc == '|' ) $l = 'td' ;
 614                                         else if ( $fc == '!' ) $l = 'th' ;
 615                                         else if ( $fc == '+' ) $l = 'caption' ;
 616                                         else $l = '' ;
 617                                         array_push ( $ltd , $l ) ;
 618                                         $y = explode ( '|' , $theline , 2 ) ;
 619                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 620                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 621                                         $t[$k] .= $y ;
 622                                         array_push ( $td , true ) ;
 623                                 }
 624                         }
 625                 }
 626
 627                 # Closing open td, tr && table
 628                 while ( count ( $td ) > 0 )
 629                 {
 630                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 631                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 632                         $t[] = '</table>' ;
 633                 }
 634
 635                 $t = implode ( "\n" , $t ) ;
 636                 #               $t = $this->removeHTMLtags( $t );
 637                 wfProfileOut( $fname );
 638                 return $t ;
 639         }
 640
 641         /**
 642          * Helper function for parse() that transforms wiki markup into
 643          * HTML. Only called for $mOutputType == OT_HTML.
 644          *
 645          * @access private
 646          */
 647         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 648         global $wgLang;
 649
 650                 $fname = 'Parser::internalParse';
 651                 wfProfileIn( $fname );
 652
 653                 $text = $this->removeHTMLtags( $text );
 654                 $text = $this->replaceVariables( $text, $args );
 655
 656                 $text = $wgLang->convert($text);
 657
 658                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 659
 660                 $text = $this->doHeadings( $text );
 661                 if($this->mOptions->getUseDynamicDates()) {
 662                         global $wgDateFormatter;
 663                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 664                 }
 665                 $text = $this->doAllQuotes( $text );
 666                 $text = $this->replaceExternalLinks( $text );
 667                 $text = $this->doMagicLinks( $text );
 668                 $text = $this->replaceInternalLinks ( $text );
 669                 # Another call to replace links and images inside captions of images
 670                 $text = $this->replaceInternalLinks ( $text );
 671
 672                 $text = $this->unstrip( $text, $this->mStripState );
 673                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 674
 675                 $text = $this->doTableStuff( $text );
 676                 $text = $this->formatHeadings( $text, $isMain );
 677                 $sk =& $this->mOptions->getSkin();
 678                 $text = $sk->transformContent( $text );
 679
 680                 wfProfileOut( $fname );
 681                 return $text;
 682         }
 683
 684         /**
 685          * Replace special strings like "ISBN xxx" and "RFC xxx" with
 686          * magic external links.
 687          *
 688          * @access private
 689          */
 690         function &doMagicLinks( &$text ) {
 691                 global $wgUseGeoMode;
 692                 $text = $this->magicISBN( $text );
 693                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 694                         $text = $this->magicGEO( $text );
 695                 }
 696                 $text = $this->magicRFC( $text );
 697                 return $text;
 698         }
 699
 700         /**
 701          * Parse ^^ tokens and return html
 702          *
 703          * @access private
 704          */
 705         function doExponent ( $text ) {
 706                 $fname = 'Parser::doExponent';
 707                 wfProfileIn( $fname);
 708                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 709                 wfProfileOut( $fname);
 710                 return $text;
 711         }
 712
 713         /**
 714          * Parse headers and return html
 715          *
 716          * @access private
 717          */
 718         function doHeadings( $text ) {
 719                 $fname = 'Parser::doHeadings';
 720                 wfProfileIn( $fname );
 721                 for ( $i = 6; $i >= 1; --$i ) {
 722                         $h = substr( '======', 0, $i );
 723                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 724                           "<h{$i}>\\1</h{$i}>\\2", $text );
 725                 }
 726                 wfProfileOut( $fname );
 727                 return $text;
 728         }
 729
 730         /**
 731          * Replace single quotes with HTML markup
 732          * @access private
 733          * @return string the altered text
 734          */
 735         function doAllQuotes( $text ) {
 736                 $fname = 'Parser::doAllQuotes';
 737                 wfProfileIn( $fname );
 738                 $outtext = '';
 739                 $lines = explode( "\n", $text );
 740                 foreach ( $lines as $line ) {
 741                         $outtext .= $this->doQuotes ( $line ) . "\n";
 742                 }
 743                 $outtext = substr($outtext, 0,-1);
 744                 wfProfileOut( $fname );
 745                 return $outtext;
 746         }
 747
 748         /**
 749          * Helper function for doAllQuotes()
 750          * @access private
 751          */
 752         function doQuotes( $text ) {
 753                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 754                 if (count ($arr) == 1)
 755                         return $text;
 756                 else
 757                 {
 758                         # First, do some preliminary work. This may shift some apostrophes from
 759                         # being mark-up to being text. It also counts the number of occurrences
 760                         # of bold and italics mark-ups.
 761                         $i = 0;
 762                         $numbold = 0;
 763                         $numitalics = 0;
 764                         foreach ($arr as $r)
 765                         {
 766                                 if (($i % 2) == 1)
 767                                 {
 768                                         # If there are ever four apostrophes, assume the first is supposed to
 769                                         # be text, and the remaining three constitute mark-up for bold text.
 770                                         if (strlen ($arr[$i]) == 4)
 771                                         {
 772                                                 $arr[$i-1] .= "'";
 773                                                 $arr[$i] = "'''";
 774                                         }
 775                                         # If there are more than 5 apostrophes in a row, assume they're all
 776                                         # text except for the last 5.
 777                                         else if (strlen ($arr[$i]) > 5)
 778                                         {
 779                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 780                                                 $arr[$i] = "'''''";
 781                                         }
 782                                         # Count the number of occurrences of bold and italics mark-ups.
 783                                         # We are not counting sequences of five apostrophes.
 784                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 785                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 786                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 787                                 }
 788                                 $i++;
 789                         }
 790
 791                         # If there is an odd number of both bold and italics, it is likely
 792                         # that one of the bold ones was meant to be an apostrophe followed
 793                         # by italics. Which one we cannot know for certain, but it is more
 794                         # likely to be one that has a single-letter word before it.
 795                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 796                         {
 797                                 $i = 0;
 798                                 $firstsingleletterword = -1;
 799                                 $firstmultiletterword = -1;
 800                                 $firstspace = -1;
 801                                 foreach ($arr as $r)
 802                                 {
 803                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 804                                         {
 805                                                 $x1 = substr ($arr[$i-1], -1);
 806                                                 $x2 = substr ($arr[$i-1], -2, 1);
 807                                                 if ($x1 == ' ') {
 808                                                         if ($firstspace == -1) $firstspace = $i;
 809                                                 } else if ($x2 == ' ') {
 810                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 811                                                 } else {
 812                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 813                                                 }
 814                                         }
 815                                         $i++;
 816                                 }
 817
 818                                 # If there is a single-letter word, use it!
 819                                 if ($firstsingleletterword > -1)
 820                                 {
 821                                         $arr [ $firstsingleletterword ] = "''";
 822                                         $arr [ $firstsingleletterword-1 ] .= "'";
 823                                 }
 824                                 # If not, but there's a multi-letter word, use that one.
 825                                 else if ($firstmultiletterword > -1)
 826                                 {
 827                                         $arr [ $firstmultiletterword ] = "''";
 828                                         $arr [ $firstmultiletterword-1 ] .= "'";
 829                                 }
 830                                 # ... otherwise use the first one that has neither.
 831                                 # (notice that it is possible for all three to be -1 if, for example,
 832                                 # there is only one pentuple-apostrophe in the line)
 833                                 else if ($firstspace > -1)
 834                                 {
 835                                         $arr [ $firstspace ] = "''";
 836                                         $arr [ $firstspace-1 ] .= "'";
 837                                 }
 838                         }
 839
 840                         # Now let's actually convert our apostrophic mush to HTML!
 841                         $output = '';
 842                         $buffer = '';
 843                         $state = '';
 844                         $i = 0;
 845                         foreach ($arr as $r)
 846                         {
 847                                 if (($i % 2) == 0)
 848                                 {
 849                                         if ($state == 'both')
 850                                                 $buffer .= $r;
 851                                         else
 852                                                 $output .= $r;
 853                                 }
 854                                 else
 855                                 {
 856                                         if (strlen ($r) == 2)
 857                                         {
 858                                                 if ($state == 'i')
 859                                                 { $output .= '</i>'; $state = ''; }
 860                                                 else if ($state == 'bi')
 861                                                 { $output .= '</i>'; $state = 'b'; }
 862                                                 else if ($state == 'ib')
 863                                                 { $output .= '</b></i><b>'; $state = 'b'; }
 864                                                 else if ($state == 'both')
 865                                                 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
 866                                                 else # $state can be 'b' or ''
 867                                                 { $output .= '<i>'; $state .= 'i'; }
 868                                         }
 869                                         else if (strlen ($r) == 3)
 870                                         {
 871                                                 if ($state == 'b')
 872                                                 { $output .= '</b>'; $state = ''; }
 873                                                 else if ($state == 'bi')
 874                                                 { $output .= '</i></b><i>'; $state = 'i'; }
 875                                                 else if ($state == 'ib')
 876                                                 { $output .= '</b>'; $state = 'i'; }
 877                                                 else if ($state == 'both')
 878                                                 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
 879                                                 else # $state can be 'i' or ''
 880                                                 { $output .= '<b>'; $state .= 'b'; }
 881                                         }
 882                                         else if (strlen ($r) == 5)
 883                                         {
 884                                                 if ($state == 'b')
 885                                                 { $output .= '</b><i>'; $state = 'i'; }
 886                                                 else if ($state == 'i')
 887                                                 { $output .= '</i><b>'; $state = 'b'; }
 888                                                 else if ($state == 'bi')
 889                                                 { $output .= '</i></b>'; $state = ''; }
 890                                                 else if ($state == 'ib')
 891                                                 { $output .= '</b></i>'; $state = ''; }
 892                                                 else if ($state == 'both')
 893                                                 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
 894                                                 else # ($state == '')
 895                                                 { $buffer = ''; $state = 'both'; }
 896                                         }
 897                                 }
 898                                 $i++;
 899                         }
 900                         # Now close all remaining tags.  Notice that the order is important.
 901                         if ($state == 'b' || $state == 'ib')
 902                                 $output .= '</b>';
 903                         if ($state == 'i' || $state == 'bi' || $state == 'ib')
 904                                 $output .= '</i>';
 905                         if ($state == 'bi')
 906                                 $output .= '</b>';
 907                         if ($state == 'both')
 908                                 $output .= '<b><i>'.$buffer.'</i></b>';
 909                         return $output;
 910                 }
 911         }
 912
 913         /**
 914          * Replace external links
 915          *
 916          * Note: we have to do external links before the internal ones,
 917          * and otherwise take great care in the order of things here, so
 918          * that we don't end up interpreting some URLs twice.
 919          *
 920          * @access private
 921          */
 922         function replaceExternalLinks( $text ) {
 923                 $fname = 'Parser::replaceExternalLinks';
 924                 wfProfileIn( $fname );
 925
 926                 $sk =& $this->mOptions->getSkin();
 927                 $linktrail = wfMsg('linktrail');
 928                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 929
 930                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 931
 932                 $i = 0;
 933                 while ( $i<count( $bits ) ) {
 934                         $url = $bits[$i++];
 935                         $protocol = $bits[$i++];
 936                         $text = $bits[$i++];
 937                         $trail = $bits[$i++];
 938
 939                         # If the link text is an image URL, replace it with an <img> tag
 940                         # This happened by accident in the original parser, but some people used it extensively
 941                         $img = $this->maybeMakeImageLink( $text );
 942                         if ( $img !== false ) {
 943                                 $text = $img;
 944                         }
 945
 946                         $dtrail = '';
 947
 948                         # No link text, e.g. [http://domain.tld/some.link]
 949                         if ( $text == '' ) {
 950                                 # Autonumber if allowed
 951                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 952                                         $text = '[' . ++$this->mAutonumber . ']';
 953                                 } else {
 954                                         # Otherwise just use the URL
 955                                         $text = htmlspecialchars( $url );
 956                                 }
 957                         } else {
 958                                 # Have link text, e.g. [http://domain.tld/some.link text]s
 959                                 # Check for trail
 960                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
 961                                         $dtrail = $m2[1];
 962                                         $trail = $m2[2];
 963                                 }
 964                         }
 965
 966                         $encUrl = htmlspecialchars( $url );
 967                         # Bit in parentheses showing the URL for the printable version
 968                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
 969                                 $paren = '';
 970                         } else {
 971                                 # Expand the URL for printable version
 972                                 if ( ! $sk->suppressUrlExpansion() ) {
 973                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
 974                                 } else {
 975                                         $paren = '';
 976                                 }
 977                         }
 978
 979                         # Process the trail (i.e. everything after this link up until start of the next link),
 980                         # replacing any non-bracketed links
 981                         $trail = $this->replaceFreeExternalLinks( $trail );
 982
 983                         $la = $sk->getExternalLinkAttributes( $url, $text );
 984
 985                         # Use the encoded URL
 986                         # This means that users can paste URLs directly into the text
 987                         # Funny characters like &ouml; aren't valid in URLs anyway
 988                         # This was changed in August 2004
 989                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
 990                 }
 991
 992                 wfProfileOut( $fname );
 993                 return $s;
 994         }
 995
 996         /**
 997          * Replace anything that looks like a URL with a link
 998          * @access private
 999          */
1000         function replaceFreeExternalLinks( $text ) {
1001                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1002                 $s = array_shift( $bits );
1003                 $i = 0;
1004
1005                 $sk =& $this->mOptions->getSkin();
1006
1007                 while ( $i < count( $bits ) ){
1008                         $protocol = $bits[$i++];
1009                         $remainder = $bits[$i++];
1010
1011                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1012                                 # Found some characters after the protocol that look promising
1013                                 $url = $protocol . $m[1];
1014                                 $trail = $m[2];
1015
1016                                 # Move trailing punctuation to $trail
1017                                 $sep = ',;\.:!?';
1018                                 # If there is no left bracket, then consider right brackets fair game too
1019                                 if ( strpos( $url, '(' ) === false ) {
1020                                         $sep .= ')';
1021                                 }
1022
1023                                 $numSepChars = strspn( strrev( $url ), $sep );
1024                                 if ( $numSepChars ) {
1025                                         $trail = substr( $url, -$numSepChars ) . $trail;
1026                                         $url = substr( $url, 0, -$numSepChars );
1027                                 }
1028
1029                                 # Replace &amp; from obsolete syntax with &
1030                                 $url = str_replace( '&amp;', '&', $url );
1031
1032                                 # Is this an external image?
1033                                 $text = $this->maybeMakeImageLink( $url );
1034                                 if ( $text === false ) {
1035                                         # Not an image, make a link
1036                                         $text = $sk->makeExternalLink( $url, $url );
1037                                 }
1038                                 $s .= $text . $trail;
1039                         } else {
1040                                 $s .= $protocol . $remainder;
1041                         }
1042                 }
1043                 return $s;
1044         }
1045
1046         /**
1047          * make an image if it's allowed
1048          * @access private
1049          */
1050         function maybeMakeImageLink( $url ) {
1051                 $sk =& $this->mOptions->getSkin();
1052                 $text = false;
1053                 if ( $this->mOptions->getAllowExternalImages() ) {
1054                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1055                                 # Image found
1056                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1057                         }
1058                 }
1059                 return $text;
1060         }
1061
1062         /**
1063          * Process [[ ]] wikilinks
1064          *
1065          * @access private
1066          */
1067         function replaceInternalLinks( $s ) {
1068                 global $wgLang, $wgLinkCache;
1069                 global $wgNamespacesWithSubpages;
1070                 static $fname = 'Parser::replaceInternalLinks' ;
1071                 wfProfileIn( $fname );
1072
1073                 wfProfileIn( $fname.'-setup' );
1074                 static $tc = FALSE;
1075                 # the % is needed to support urlencoded titles as well
1076                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1077                 $sk =& $this->mOptions->getSkin();
1078
1079                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1080
1081                 $a = explode( '[[', ' ' . $s );
1082                 $s = array_shift( $a );
1083                 $s = substr( $s, 1 );
1084
1085                 # Match a link having the form [[namespace:link|alternate]]trail
1086                 static $e1 = FALSE;
1087                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1088                 # Match the end of a line for a word that's not followed by whitespace,
1089                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1090                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1091
1092                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1093                 # Special and Media are pseudo-namespaces; no pages actually exist in them
1094
1095                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1096
1097                 if ( $useLinkPrefixExtension ) {
1098                         if ( preg_match( $e2, $s, $m ) ) {
1099                                 $first_prefix = $m[2];
1100                                 $s = $m[1];
1101                         } else {
1102                                 $first_prefix = false;
1103                         }
1104                 } else {
1105                         $prefix = '';
1106                 }
1107
1108                 wfProfileOut( $fname.'-setup' );
1109
1110                 # start procedeeding each line
1111                 foreach ( $a as $line ) {
1112                         wfProfileIn( $fname.'-prefixhandling' );
1113                         if ( $useLinkPrefixExtension ) {
1114                                 if ( preg_match( $e2, $s, $m ) ) {
1115                                         $prefix = $m[2];
1116                                         $s = $m[1];
1117                                 } else {
1118                                         $prefix='';
1119                                 }
1120                                 # first link
1121                                 if($first_prefix) {
1122                                         $prefix = $first_prefix;
1123                                         $first_prefix = false;
1124                                 }
1125                         }
1126                         wfProfileOut( $fname.'-prefixhandling' );
1127
1128                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1129                                 $text = $m[2];
1130                                 # fix up urlencoded title texts
1131                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1132                                 $trail = $m[3];
1133                         } else { # Invalid form; output directly
1134                                 $s .= $prefix . '[[' . $line ;
1135                                 continue;
1136                         }
1137
1138                         # Valid link forms:
1139                         # Foobar -- normal
1140                         # :Foobar -- override special treatment of prefix (images, language links)
1141                         # /Foobar -- convert to CurrentPage/Foobar
1142                         # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1143
1144                         # Look at the first character
1145                         $c = substr($m[1],0,1);
1146                         $noforce = ($c != ':');
1147
1148                         # subpage
1149                         if( $c == '/' ) {
1150                                 # / at end means we don't want the slash to be shown
1151                                 if(substr($m[1],-1,1)=='/') {
1152                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1153                                         $noslash=$m[1];
1154                                 } else {
1155                                         $noslash=substr($m[1],1);
1156                                 }
1157
1158                                 # Some namespaces don't allow subpages
1159                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1160                                         # subpages allowed here
1161                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1162                                         if( '' == $text ) {
1163                                                 $text= $m[1];
1164                                         } # this might be changed for ugliness reasons
1165                                 } else {
1166                                         # no subpage allowed, use standard link
1167                                         $link = $noslash;
1168                                 }
1169
1170                         } elseif( $noforce ) { # no subpage
1171                                 $link = $m[1];
1172                         } else {
1173                                 # We don't want to keep the first character
1174                                 $link = substr( $m[1], 1 );
1175                         }
1176
1177                         $wasblank = ( '' == $text );
1178                         if( $wasblank ) $text = $link;
1179
1180                         $nt = Title::newFromText( $link );
1181                         if( !$nt ) {
1182                                 $s .= $prefix . '[[' . $line;
1183                                 continue;
1184                         }
1185
1186                         $ns = $nt->getNamespace();
1187                         $iw = $nt->getInterWiki();
1188
1189                         # Link not escaped by : , create the various objects
1190                         if( $noforce ) {
1191
1192                                 # Interwikis
1193                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1194                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1195                                         $tmp = $prefix . $trail ;
1196                                         $s .= (trim($tmp) == '')? '': $tmp;
1197                                         continue;
1198                                 }
1199
1200                                 if ( $ns == NS_IMAGE ) {
1201                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1202                                         $wgLinkCache->addImageLinkObj( $nt );
1203                                         continue;
1204                                 }
1205
1206                                 if ( $ns == NS_CATEGORY ) {
1207                                         $t = $nt->getText() ;
1208                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1209
1210                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1211                                         $pPLC=$sk->postParseLinkColour();
1212                                         $sk->postParseLinkColour( false );
1213                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1214                                         $sk->postParseLinkColour( $pPLC );
1215                                         $wgLinkCache->resume();
1216
1217                                         if ( $wasblank ) {
1218                                                 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1219                                                         $sortkey = $this->mTitle->getText();
1220                                                 } else {
1221                                                         $sortkey = $this->mTitle->getPrefixedText();
1222                                                 }
1223                                         } else {
1224                                                 $sortkey = $text;
1225                                         }
1226                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1227                                         $this->mOutput->mCategoryLinks[] = $t ;
1228                                         $s .= $prefix . $trail ;
1229                                         continue;
1230                                 }
1231                         }
1232
1233                         if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1234                             ( strpos( $link, '#' ) === FALSE ) ) {
1235                                 # Self-links are handled specially; generally de-link and change to bold.
1236                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1237                                 continue;
1238                         }
1239
1240                         if( $ns == NS_MEDIA ) {
1241                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1242                                 $wgLinkCache->addImageLinkObj( $nt );
1243                                 continue;
1244                         } elseif( $ns == NS_SPECIAL ) {
1245                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1246                                 continue;
1247                         }
1248                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1249                 }
1250                 wfProfileOut( $fname );
1251                 return $s;
1252         }
1253
1254         /**#@+
1255          * Used by doBlockLevels()
1256          * @access private
1257          */
1258         /* private */ function closeParagraph() {
1259                 $result = '';
1260                 if ( '' != $this->mLastSection ) {
1261                         $result = '</' . $this->mLastSection  . ">\n";
1262                 }
1263                 $this->mInPre = false;
1264                 $this->mLastSection = '';
1265                 return $result;
1266         }
1267         # getCommon() returns the length of the longest common substring
1268         # of both arguments, starting at the beginning of both.
1269         #
1270         /* private */ function getCommon( $st1, $st2 ) {
1271                 $fl = strlen( $st1 );
1272                 $shorter = strlen( $st2 );
1273                 if ( $fl < $shorter ) { $shorter = $fl; }
1274
1275                 for ( $i = 0; $i < $shorter; ++$i ) {
1276                         if ( $st1{$i} != $st2{$i} ) { break; }
1277                 }
1278                 return $i;
1279         }
1280         # These next three functions open, continue, and close the list
1281         # element appropriate to the prefix character passed into them.
1282         #
1283         /* private */ function openList( $char ) {
1284                 $result = $this->closeParagraph();
1285
1286                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1287                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1288                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1289                 else if ( ';' == $char ) {
1290                         $result .= '<dl><dt>';
1291                         $this->mDTopen = true;
1292                 }
1293                 else { $result = '<!-- ERR 1 -->'; }
1294
1295                 return $result;
1296         }
1297
1298         /* private */ function nextItem( $char ) {
1299                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1300                 else if ( ':' == $char || ';' == $char ) {
1301                         $close = '</dd>';
1302                         if ( $this->mDTopen ) { $close = '</dt>'; }
1303                         if ( ';' == $char ) {
1304                                 $this->mDTopen = true;
1305                                 return $close . '<dt>';
1306                         } else {
1307                                 $this->mDTopen = false;
1308                                 return $close . '<dd>';
1309                         }
1310                 }
1311                 return '<!-- ERR 2 -->';
1312         }
1313
1314         /* private */ function closeList( $char ) {
1315                 if ( '*' == $char ) { $text = '</li></ul>'; }
1316                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1317                 else if ( ':' == $char ) {
1318                         if ( $this->mDTopen ) {
1319                                 $this->mDTopen = false;
1320                                 $text = '</dt></dl>';
1321                         } else {
1322                                 $text = '</dd></dl>';
1323                         }
1324                 }
1325                 else {  return '<!-- ERR 3 -->'; }
1326                 return $text."\n";
1327         }
1328         /**#@-*/
1329
1330         /**
1331          * Make lists from lines starting with ':', '*', '#', etc.
1332          *
1333          * @access private
1334          * @return string the lists rendered as HTML
1335          */
1336         function doBlockLevels( $text, $linestart ) {
1337                 $fname = 'Parser::doBlockLevels';
1338                 wfProfileIn( $fname );
1339
1340                 # Parsing through the text line by line.  The main thing
1341                 # happening here is handling of block-level elements p, pre,
1342                 # and making lists from lines starting with * # : etc.
1343                 #
1344                 $textLines = explode( "\n", $text );
1345
1346                 $lastPrefix = $output = $lastLine = '';
1347                 $this->mDTopen = $inBlockElem = false;
1348                 $prefixLength = 0;
1349                 $paragraphStack = false;
1350
1351                 if ( !$linestart ) {
1352                         $output .= array_shift( $textLines );
1353                 }
1354                 foreach ( $textLines as $oLine ) {
1355                         $lastPrefixLength = strlen( $lastPrefix );
1356                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1357                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1358                         if ( !$this->mInPre ) {
1359                                 # Multiple prefixes may abut each other for nested lists.
1360                                 $prefixLength = strspn( $oLine, '*#:;' );
1361                                 $pref = substr( $oLine, 0, $prefixLength );
1362
1363                                 # eh?
1364                                 $pref2 = str_replace( ';', ':', $pref );
1365                                 $t = substr( $oLine, $prefixLength );
1366                                 $this->mInPre = !empty($preOpenMatch);
1367                         } else {
1368                                 # Don't interpret any other prefixes in preformatted text
1369                                 $prefixLength = 0;
1370                                 $pref = $pref2 = '';
1371                                 $t = $oLine;
1372                         }
1373
1374                         # List generation
1375                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1376                                 # Same as the last item, so no need to deal with nesting or opening stuff
1377                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1378                                 $paragraphStack = false;
1379
1380                                 if ( substr( $pref, -1 ) == ';') {
1381                                         # The one nasty exception: definition lists work like this:
1382                                         # ; title : definition text
1383                                         # So we check for : in the remainder text to split up the
1384                                         # title and definition, without b0rking links.
1385                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1386                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1387                                                 $term = $match[1];
1388                                                 $output .= $term . $this->nextItem( ':' );
1389                                                 $t = $match[2];
1390                                         }
1391                                 }
1392                         } elseif( $prefixLength || $lastPrefixLength ) {
1393                                 # Either open or close a level...
1394                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1395                                 $paragraphStack = false;
1396
1397                                 while( $commonPrefixLength < $lastPrefixLength ) {
1398                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1399                                         --$lastPrefixLength;
1400                                 }
1401                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1402                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1403                                 }
1404                                 while ( $prefixLength > $commonPrefixLength ) {
1405                                         $char = substr( $pref, $commonPrefixLength, 1 );
1406                                         $output .= $this->openList( $char );
1407
1408                                         if ( ';' == $char ) {
1409                                                 # FIXME: This is dupe of code above
1410                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1411                                                         $term = $match[1];
1412                                                         $output .= $term . $this->nextItem( ':' );
1413                                                         $t = $match[2];
1414                                                 }
1415                                         }
1416                                         ++$commonPrefixLength;
1417                                 }
1418                                 $lastPrefix = $pref2;
1419                         }
1420                         if( 0 == $prefixLength ) {
1421                                 # No prefix (not in list)--go to paragraph mode
1422                                 $uniq_prefix = UNIQ_PREFIX;
1423                                 // XXX: use a stack for nestable elements like span, table and div
1424                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1425                                 $closematch = preg_match(
1426                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1427                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1428                                 if ( $openmatch or $closematch ) {
1429                                         $paragraphStack = false;
1430                                         $output .= $this->closeParagraph();
1431                                         if($preOpenMatch and !$preCloseMatch) {
1432                                                 $this->mInPre = true;
1433                                         }
1434                                         if ( $closematch ) {
1435                                                 $inBlockElem = false;
1436                                         } else {
1437                                                 $inBlockElem = true;
1438                                         }
1439                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1440                                         if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1441                                                 // pre
1442                                                 if ($this->mLastSection != 'pre') {
1443                                                         $paragraphStack = false;
1444                                                         $output .= $this->closeParagraph().'<pre>';
1445                                                         $this->mLastSection = 'pre';
1446                                                 }
1447                                                 $t = substr( $t, 1 );
1448                                         } else {
1449                                                 // paragraph
1450                                                 if ( '' == trim($t) ) {
1451                                                         if ( $paragraphStack ) {
1452                                                                 $output .= $paragraphStack.'<br />';
1453                                                                 $paragraphStack = false;
1454                                                                 $this->mLastSection = 'p';
1455                                                         } else {
1456                                                                 if ($this->mLastSection != 'p' ) {
1457                                                                         $output .= $this->closeParagraph();
1458                                                                         $this->mLastSection = '';
1459                                                                         $paragraphStack = '<p>';
1460                                                                 } else {
1461                                                                         $paragraphStack = '</p><p>';
1462                                                                 }
1463                                                         }
1464                                                 } else {
1465                                                         if ( $paragraphStack ) {
1466                                                                 $output .= $paragraphStack;
1467                                                                 $paragraphStack = false;
1468                                                                 $this->mLastSection = 'p';
1469                                                         } else if ($this->mLastSection != 'p') {
1470                                                                 $output .= $this->closeParagraph().'<p>';
1471                                                                 $this->mLastSection = 'p';
1472                                                         }
1473                                                 }
1474                                         }
1475                                 }
1476                         }
1477                         if ($paragraphStack === false) {
1478                                 $output .= $t."\n";
1479                         }
1480                 }
1481                 while ( $prefixLength ) {
1482                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1483                         --$prefixLength;
1484                 }
1485                 if ( '' != $this->mLastSection ) {
1486                         $output .= '</' . $this->mLastSection . '>';
1487                         $this->mLastSection = '';
1488                 }
1489
1490                 wfProfileOut( $fname );
1491                 return $output;
1492         }
1493
1494         /**
1495          * Return value of a magic variable (like PAGENAME)
1496          *
1497          * @access private
1498          */
1499         function getVariableValue( $index ) {
1500                 global $wgLang, $wgSitename, $wgServer;
1501
1502                 switch ( $index ) {
1503                         case MAG_CURRENTMONTH:
1504                                 return $wgLang->formatNum( date( 'm' ) );
1505                         case MAG_CURRENTMONTHNAME:
1506                                 return $wgLang->getMonthName( date('n') );
1507                         case MAG_CURRENTMONTHNAMEGEN:
1508                                 return $wgLang->getMonthNameGen( date('n') );
1509                         case MAG_CURRENTDAY:
1510                                 return $wgLang->formatNum( date('j') );
1511                         case MAG_PAGENAME:
1512                                 return $this->mTitle->getText();
1513                         case MAG_PAGENAMEE:
1514                                 return $this->mTitle->getPartialURL();
1515                         case MAG_NAMESPACE:
1516                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1517                                 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1518                         case MAG_CURRENTDAYNAME:
1519                                 return $wgLang->getWeekdayName( date('w')+1 );
1520                         case MAG_CURRENTYEAR:
1521                                 return $wgLang->formatNum( date( 'Y' ) );
1522                         case MAG_CURRENTTIME:
1523                                 return $wgLang->time( wfTimestampNow(), false );
1524                         case MAG_NUMBEROFARTICLES:
1525                                 return $wgLang->formatNum( wfNumberOfArticles() );
1526                         case MAG_SITENAME:
1527                                 return $wgSitename;
1528                         case MAG_SERVER:
1529                                 return $wgServer;
1530                         default:
1531                                 return NULL;
1532                 }
1533         }
1534
1535         /**
1536          * initialise the magic variables (like CURRENTMONTHNAME)
1537          *
1538          * @access private
1539          */
1540         function initialiseVariables() {
1541                 global $wgVariableIDs;
1542                 $this->mVariables = array();
1543                 foreach ( $wgVariableIDs as $id ) {
1544                         $mw =& MagicWord::get( $id );
1545                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1546                 }
1547         }
1548
1549         /**
1550          * Replace magic variables, templates, and template arguments
1551          * with the appropriate text. Templates are substituted recursively,
1552          * taking care to avoid infinite loops.
1553          *
1554          * Note that the substitution depends on value of $mOutputType:
1555          *  OT_WIKI: only {{subst:}} templates
1556          *  OT_MSG: only magic variables
1557          *  OT_HTML: all templates and magic variables
1558          *
1559          * @param string $tex The text to transform
1560          * @param array $args Key-value pairs representing template parameters to substitute
1561          * @access private
1562          */
1563         function replaceVariables( $text, $args = array() ) {
1564                 global $wgLang, $wgScript, $wgArticlePath;
1565
1566                 # Prevent too big inclusions
1567                 if(strlen($text)> MAX_INCLUDE_SIZE)
1568                 return $text;
1569
1570                 $fname = 'Parser::replaceVariables';
1571                 wfProfileIn( $fname );
1572
1573                 $titleChars = Title::legalChars();
1574
1575                 # This function is called recursively. To keep track of arguments we need a stack:
1576                 array_push( $this->mArgStack, $args );
1577
1578                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1579                 $GLOBALS['wgCurParser'] =& $this;
1580
1581                 if ( $this->mOutputType == OT_HTML ) {
1582                         # Argument substitution
1583                         $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1584                 }
1585                 # Template substitution
1586                 $regex = '/{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1587                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1588
1589                 array_pop( $this->mArgStack );
1590
1591                 wfProfileOut( $fname );
1592                 return $text;
1593         }
1594
1595         # Split template arguments
1596         function getTemplateArgs( $argsString ) {
1597                 if ( $argsString === '' ) {
1598                         return array();
1599                 }
1600
1601                 $args = explode( '|', substr( $argsString, 1 ) );
1602
1603                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1604                 # merged with the next arg because the '|' character between belongs
1605                 # to the link syntax and not the template parameter syntax.
1606                 $argc = count($args);
1607                 $i = 0;
1608                 for ( $i = 0; $i < $argc-1; $i++ ) {
1609                         if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1610                                 $args[$i] .= '|'.$args[$i+1];
1611                                 array_splice($args, $i+1, 1);
1612                                 $i--;
1613                                 $argc--;
1614                         }
1615                 }
1616
1617                 return $args;
1618         }
1619
1620         /**
1621          * Return the text of a template, after recursively
1622          * replacing any variables or templates within the template.
1623          *
1624          * @param array $matches The parts of the template
1625          *  $matches[1]: the title, i.e. the part before the |
1626          *  $matches[2]: the parameters (including a leading |), if  any
1627          * @return string the text of the template
1628          * @access private
1629          */
1630         function braceSubstitution( $matches ) {
1631                 global $wgLinkCache, $wgLang;
1632                 $fname = 'Parser::braceSubstitution';
1633                 $found = false;
1634                 $nowiki = false;
1635                 $noparse = false;
1636
1637                 $title = NULL;
1638
1639                 # $part1 is the bit before the first |, and must contain only title characters
1640                 # $args is a list of arguments, starting from index 0, not including $part1
1641
1642                 $part1 = $matches[1];
1643                 # If the second subpattern matched anything, it will start with |
1644
1645                 $args = $this->getTemplateArgs($matches[2]);
1646                 $argc = count( $args );
1647
1648                 # {{{}}}
1649                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1650                         $text = $matches[0];
1651                         $found = true;
1652                         $noparse = true;
1653                 }
1654
1655                 # SUBST
1656                 if ( !$found ) {
1657                         $mwSubst =& MagicWord::get( MAG_SUBST );
1658                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1659                                 if ( $this->mOutputType != OT_WIKI ) {
1660                                         # Invalid SUBST not replaced at PST time
1661                                         # Return without further processing
1662                                         $text = $matches[0];
1663                                         $found = true;
1664                                         $noparse= true;
1665                                 }
1666                         } elseif ( $this->mOutputType == OT_WIKI ) {
1667                                 # SUBST not found in PST pass, do nothing
1668                                 $text = $matches[0];
1669                                 $found = true;
1670                         }
1671                 }
1672
1673                 # MSG, MSGNW and INT
1674                 if ( !$found ) {
1675                         # Check for MSGNW:
1676                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1677                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1678                                 $nowiki = true;
1679                         } else {
1680                                 # Remove obsolete MSG:
1681                                 $mwMsg =& MagicWord::get( MAG_MSG );
1682                                 $mwMsg->matchStartAndRemove( $part1 );
1683                         }
1684
1685                         # Check if it is an internal message
1686                         $mwInt =& MagicWord::get( MAG_INT );
1687                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1688                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1689                                         $text = wfMsgReal( $part1, $args, true );
1690                                         $found = true;
1691                                 }
1692                         }
1693                 }
1694
1695                 # NS
1696                 if ( !$found ) {
1697                         # Check for NS: (namespace expansion)
1698                         $mwNs = MagicWord::get( MAG_NS );
1699                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1700                                 if ( intval( $part1 ) ) {
1701                                         $text = $wgLang->getNsText( intval( $part1 ) );
1702                                         $found = true;
1703                                 } else {
1704                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1705                                         if ( !is_null( $index ) ) {
1706                                                 $text = $wgLang->getNsText( $index );
1707                                                 $found = true;
1708                                         }
1709                                 }
1710                         }
1711                 }
1712
1713                 # LOCALURL and LOCALURLE
1714                 if ( !$found ) {
1715                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1716                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1717
1718                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1719                                 $func = 'getLocalURL';
1720                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1721                                 $func = 'escapeLocalURL';
1722                         } else {
1723                                 $func = '';
1724                         }
1725
1726                         if ( $func !== '' ) {
1727                                 $title = Title::newFromText( $part1 );
1728                                 if ( !is_null( $title ) ) {
1729                                         if ( $argc > 0 ) {
1730                                                 $text = $title->$func( $args[0] );
1731                                         } else {
1732                                                 $text = $title->$func();
1733                                         }
1734                                         $found = true;
1735                                 }
1736                         }
1737                 }
1738
1739                 # Internal variables
1740                 if ( !$this->mVariables ) {
1741                         $this->initialiseVariables();
1742                 }
1743                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1744                         $text = $this->mVariables[$part1];
1745                         $found = true;
1746                         $this->mOutput->mContainsOldMagic = true;
1747                 }
1748
1749                 # GRAMMAR
1750                 if ( !$found && $argc == 1 ) {
1751                         $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1752                         if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1753                                 $text = $wgLang->convertGrammar( $args[0], $part1 );
1754                                 $found = true;
1755                         }
1756                 }
1757
1758                 # Template table test
1759
1760                 # Did we encounter this template already? If yes, it is in the cache
1761                 # and we need to check for loops.
1762                 if ( isset( $this->mTemplates[$part1] ) ) {
1763                         # Infinite loop test
1764                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1765                                 $noparse = true;
1766                                 $found = true;
1767                         }
1768                         # set $text to cached message.
1769                         $text = $this->mTemplates[$part1];
1770                         $found = true;
1771                 }
1772
1773                 # Load from database
1774                 if ( !$found ) {
1775                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1776                         if ( !is_null( $title ) && !$title->isExternal() ) {
1777                                 # Check for excessive inclusion
1778                                 $dbk = $title->getPrefixedDBkey();
1779                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1780                                         # This should never be reached.
1781                                         $article = new Article( $title );
1782                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1783                                         if ( $articleContent !== false ) {
1784                                                 $found = true;
1785                                                 $text = $articleContent;
1786                                         }
1787                                 }
1788
1789                                 # If the title is valid but undisplayable, make a link to it
1790                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1791                                         $text = '[['.$title->getPrefixedText().']]';
1792                                         $found = true;
1793                                 }
1794
1795                                 # Template cache array insertion
1796                                 $this->mTemplates[$part1] = $text;
1797                         }
1798                 }
1799
1800                 # Recursive parsing, escaping and link table handling
1801                 # Only for HTML output
1802                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1803                         $text = wfEscapeWikiText( $text );
1804                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1805                         # Clean up argument array
1806                         $assocArgs = array();
1807                         $index = 1;
1808                         foreach( $args as $arg ) {
1809                                 $eqpos = strpos( $arg, '=' );
1810                                 if ( $eqpos === false ) {
1811                                         $assocArgs[$index++] = $arg;
1812                                 } else {
1813                                         $name = trim( substr( $arg, 0, $eqpos ) );
1814                                         $value = trim( substr( $arg, $eqpos+1 ) );
1815                                         if ( $value === false ) {
1816                                                 $value = '';
1817                                         }
1818                                         if ( $name !== false ) {
1819                                                 $assocArgs[$name] = $value;
1820                                         }
1821                                 }
1822                         }
1823
1824                         # Do not enter included links in link table
1825                         if ( !is_null( $title ) ) {
1826                                 $wgLinkCache->suspend();
1827                         }
1828
1829                         # Add a new element to the templace recursion path
1830                         $this->mTemplatePath[$part1] = 1;
1831
1832                         $text = $this->strip( $text, $this->mStripState );
1833                         $text = $this->removeHTMLtags( $text );
1834                         $text = $this->replaceVariables( $text, $assocArgs );
1835
1836                         # Resume the link cache and register the inclusion as a link
1837                         if ( !is_null( $title ) ) {
1838                                 $wgLinkCache->resume();
1839                                 $wgLinkCache->addLinkObj( $title );
1840                         }
1841                 }
1842
1843                 # Empties the template path
1844                 $this->mTemplatePath = array();
1845
1846                 if ( !$found ) {
1847                         return $matches[0];
1848                 } else {
1849                         # replace ==section headers==
1850                         # XXX this needs to go away once we have a better parser.
1851                         if ( $this->mOutputType != OT_WIKI ) {
1852                                 $encodedname = base64_encode($title->getPrefixedDBkey());
1853                                 $wfCurrentSectionNumber = 0;
1854                                 for ( $i = 1; $i <= 6; ++$i ) {
1855                                         $h = substr( '======', 0, $i );
1856                                         $text = preg_replace_callback( "/^({$h})([^=].*){$h}\\s?$/m",
1857                                         create_function('$matches',
1858 'return "${matches[1]}$matches[2] __MWTEMPLATESECTION='.$encodedname.
1859 '&" . wfGetSectionNumber() . "__${matches[1]}";'
1860                                         ), $text);
1861                                 }
1862                         }
1863                         return $text;
1864                 }
1865         }
1866
1867         /**
1868          * Triple brace replacement -- used for template arguments
1869          * @access private
1870          */
1871         function argSubstitution( $matches ) {
1872                 $arg = trim( $matches[1] );
1873                 $text = $matches[0];
1874                 $inputArgs = end( $this->mArgStack );
1875
1876                 if ( array_key_exists( $arg, $inputArgs ) ) {
1877                         $text = $this->strip( $inputArgs[$arg], $this->mStripState );
1878                         $text = $this->removeHTMLtags( $text );
1879                         $text = $this->replaceVariables( $text, array() );
1880                 }
1881
1882                 return $text;
1883         }
1884
1885         /**
1886          * Returns true if the function is allowed to include this entity
1887          * @access private
1888          */
1889         function incrementIncludeCount( $dbk ) {
1890                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1891                         $this->mIncludeCount[$dbk] = 0;
1892                 }
1893                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1894                         return true;
1895                 } else {
1896                         return false;
1897                 }
1898         }
1899
1900
1901         /**
1902          * Cleans up HTML, removes dangerous tags and attributes, and
1903          * removes HTML comments
1904          * @access private
1905          */
1906         function removeHTMLtags( $text ) {
1907                 global $wgUseTidy, $wgUserHtml;
1908                 $fname = 'Parser::removeHTMLtags';
1909                 wfProfileIn( $fname );
1910
1911                 if( $wgUserHtml ) {
1912                         $htmlpairs = array( # Tags that must be closed
1913                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1914                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1915                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
1916                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1917                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
1918                         );
1919                         $htmlsingle = array(
1920                                 'br', 'hr', 'li', 'dt', 'dd'
1921                         );
1922                         $htmlnest = array( # Tags that can be nested--??
1923                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1924                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
1925                         );
1926                         $tabletags = array( # Can only appear inside table
1927                                 'td', 'th', 'tr'
1928                         );
1929                 } else {
1930                         $htmlpairs = array();
1931                         $htmlsingle = array();
1932                         $htmlnest = array();
1933                         $tabletags = array();
1934                 }
1935
1936                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1937                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1938
1939                 $htmlattrs = $this->getHTMLattrs () ;
1940
1941                 # Remove HTML comments
1942                 $text = $this->removeHTMLcomments( $text );
1943
1944                 $bits = explode( '<', $text );
1945                 $text = array_shift( $bits );
1946                 if(!$wgUseTidy) {
1947                         $tagstack = array(); $tablestack = array();
1948                         foreach ( $bits as $x ) {
1949                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1950                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1951                                 $x, $regs );
1952                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1953                                 error_reporting( $prev );
1954
1955                                 $badtag = 0 ;
1956                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1957                                         # Check our stack
1958                                         if ( $slash ) {
1959                                                 # Closing a tag...
1960                                                 if ( ! in_array( $t, $htmlsingle ) &&
1961                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1962                                                         @array_push( $tagstack, $ot );
1963                                                         $badtag = 1;
1964                                                 } else {
1965                                                         if ( $t == 'table' ) {
1966                                                                 $tagstack = array_pop( $tablestack );
1967                                                         }
1968                                                         $newparams = '';
1969                                                 }
1970                                         } else {
1971                                                 # Keep track for later
1972                                                 if ( in_array( $t, $tabletags ) &&
1973                                                 ! in_array( 'table', $tagstack ) ) {
1974                                                         $badtag = 1;
1975                                                 } else if ( in_array( $t, $tagstack ) &&
1976                                                 ! in_array ( $t , $htmlnest ) ) {
1977                                                         $badtag = 1 ;
1978                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1979                                                         if ( $t == 'table' ) {
1980                                                                 array_push( $tablestack, $tagstack );
1981                                                                 $tagstack = array();
1982                                                         }
1983                                                         array_push( $tagstack, $t );
1984                                                 }
1985                                                 # Strip non-approved attributes from the tag
1986                                                 $newparams = $this->fixTagAttributes($params);
1987
1988                                         }
1989                                         if ( ! $badtag ) {
1990                                                 $rest = str_replace( '>', '&gt;', $rest );
1991                                                 $text .= "<$slash$t $newparams$brace$rest";
1992                                                 continue;
1993                                         }
1994                                 }
1995                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1996                         }
1997                         # Close off any remaining tags
1998                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1999                                 $text .= "</$t>\n";
2000                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2001                         }
2002                 } else {
2003                         # this might be possible using tidy itself
2004                         foreach ( $bits as $x ) {
2005                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2006                                 $x, $regs );
2007                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2008                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2009                                         $newparams = $this->fixTagAttributes($params);
2010                                         $rest = str_replace( '>', '&gt;', $rest );
2011                                         $text .= "<$slash$t $newparams$brace$rest";
2012                                 } else {
2013                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2014                                 }
2015                         }
2016                 }
2017                 wfProfileOut( $fname );
2018                 return $text;
2019         }
2020
2021         /**
2022          * Remove '<!--', '-->', and everything between.
2023          * To avoid leaving blank lines, when a comment is both preceded
2024          * and followed by a newline (ignoring spaces), trim leading and
2025          * trailing spaces and one of the newlines.
2026          *
2027          * @access private
2028          */
2029         function removeHTMLcomments( $text ) {
2030                 $fname='Parser::removeHTMLcomments';
2031                 wfProfileIn( $fname );
2032                 while (($start = strpos($text, '<!--')) !== false) {
2033                         $end = strpos($text, '-->', $start + 4);
2034                         if ($end === false) {
2035                                 # Unterminated comment; bail out
2036                                 break;
2037                         }
2038
2039                         $end += 3;
2040
2041                         # Trim space and newline if the comment is both
2042                         # preceded and followed by a newline
2043                         $spaceStart = max($start - 1, 0);
2044                         $spaceLen = $end - $spaceStart;
2045                         while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2046                                 $spaceStart--;
2047                                 $spaceLen++;
2048                         }
2049                         while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2050                                 $spaceLen++;
2051                         if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2052                                 # Remove the comment, leading and trailing
2053                                 # spaces, and leave only one newline.
2054                                 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2055                         }
2056                         else {
2057                                 # Remove just the comment.
2058                                 $text = substr_replace($text, '', $start, $end - $start);
2059                         }
2060                 }
2061                 wfProfileOut( $fname );
2062                 return $text;
2063         }
2064
2065         /**
2066          * This function accomplishes several tasks:
2067          * 1) Auto-number headings if that option is enabled
2068          * 2) Add an [edit] link to sections for logged in users who have enabled the option
2069          * 3) Add a Table of contents on the top for users who have enabled the option
2070          * 4) Auto-anchor headings
2071          *
2072          * It loops through all headlines, collects the necessary data, then splits up the
2073          * string and re-inserts the newly formatted headlines.
2074          * @access private
2075          */
2076         /* private */ function formatHeadings( $text, $isMain=true ) {
2077                 global $wgInputEncoding, $wgMaxTocLevel, $wgLang, $wgLinkHolders;
2078
2079                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2080                 $doShowToc = $this->mOptions->getShowToc();
2081                 $forceTocHere = false;
2082                 if( !$this->mTitle->userCanEdit() ) {
2083                         $showEditLink = 0;
2084                         $rightClickHack = 0;
2085                 } else {
2086                         $showEditLink = $this->mOptions->getEditSection();
2087                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2088                 }
2089
2090                 # Inhibit editsection links if requested in the page
2091                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2092                 if( $esw->matchAndRemove( $text ) ) {
2093                         $showEditLink = 0;
2094                 }
2095                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2096                 # do not add TOC
2097                 $mw =& MagicWord::get( MAG_NOTOC );
2098                 if( $mw->matchAndRemove( $text ) ) {
2099                         $doShowToc = 0;
2100                 }
2101
2102                 # never add the TOC to the Main Page. This is an entry page that should not
2103                 # be more than 1-2 screens large anyway
2104                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2105                         $doShowToc = 0;
2106                 }
2107
2108                 # Get all headlines for numbering them and adding funky stuff like [edit]
2109                 # links - this is for later, but we need the number of headlines right now
2110                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2111
2112                 # if there are fewer than 4 headlines in the article, do not show TOC
2113                 if( $numMatches < 4 ) {
2114                         $doShowToc = 0;
2115                 }
2116
2117                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2118                 # override above conditions and always show TOC at that place
2119                 $mw =& MagicWord::get( MAG_TOC );
2120                 if ($mw->match( $text ) ) {
2121                         $doShowToc = 1;
2122                         $forceTocHere = true;
2123                 } else {
2124                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2125                         # override above conditions and always show TOC above first header
2126                         $mw =& MagicWord::get( MAG_FORCETOC );
2127                         if ($mw->matchAndRemove( $text ) ) {
2128                                 $doShowToc = 1;
2129                         }
2130                 }
2131
2132
2133
2134                 # We need this to perform operations on the HTML
2135                 $sk =& $this->mOptions->getSkin();
2136
2137                 # headline counter
2138                 $headlineCount = 0;
2139                 $sectionCount = 0; # headlineCount excluding template sections
2140
2141                 # Ugh .. the TOC should have neat indentation levels which can be
2142                 # passed to the skin functions. These are determined here
2143                 $toclevel = 0;
2144                 $toc = '';
2145                 $full = '';
2146                 $head = array();
2147                 $sublevelCount = array();
2148                 $level = 0;
2149                 $prevlevel = 0;
2150                 foreach( $matches[3] as $headline ) {
2151                         $istemplate = 0;
2152                         $templatetitle = "";
2153                         $templatesection = 0;
2154
2155                         if (preg_match("/__MWTEMPLATESECTION=([^&]+)&([^_]+)__/", $headline, $mat)) {
2156                                 $istemplate = 1;
2157                                 $templatetitle = base64_decode($mat[1]);
2158                                 $templatesection = 1 + (int)base64_decode($mat[2]);
2159                                 $headline = preg_replace("/__MWTEMPLATESECTION=([^&]+)&([^_]+)__/", "", $headline);
2160                         }
2161
2162                         $numbering = '';
2163                         if( $level ) {
2164                                 $prevlevel = $level;
2165                         }
2166                         $level = $matches[1][$headlineCount];
2167                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2168                                 # reset when we enter a new level
2169                                 $sublevelCount[$level] = 0;
2170                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2171                                 $toclevel += $level - $prevlevel;
2172                         }
2173                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2174                                 # reset when we step back a level
2175                                 $sublevelCount[$level+1]=0;
2176                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2177                                 $toclevel -= $prevlevel - $level;
2178                         }
2179                         # count number of headlines for each level
2180                         @$sublevelCount[$level]++;
2181                         if( $doNumberHeadings || $doShowToc ) {
2182                                 $dot = 0;
2183                                 for( $i = 1; $i <= $level; $i++ ) {
2184                                         if( !empty( $sublevelCount[$i] ) ) {
2185                                                 if( $dot ) {
2186                                                         $numbering .= '.';
2187                                                 }
2188                                                 $numbering .= $wgLang->formatNum( $sublevelCount[$i] );
2189                                                 $dot = 1;
2190                                         }
2191                                 }
2192                         }
2193
2194                         # The canonized header is a version of the header text safe to use for links
2195                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2196                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2197                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2198
2199                         # Remove link placeholders by the link text.
2200                         #     <!--LINK number-->
2201                         # turns into
2202                         #     link text with suffix
2203                         $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2204                                                             "\$wgLinkHolders['texts'][\$1]",
2205                                                             $canonized_headline );
2206
2207                         # strip out HTML
2208                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2209                         $tocline = trim( $canonized_headline );
2210                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2211                         $replacearray = array(
2212                                 '%3A' => ':',
2213                                 '%' => '.'
2214                         );
2215                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2216                         $refer[$headlineCount] = $canonized_headline;
2217
2218                         # count how many in assoc. array so we can track dupes in anchors
2219                         @$refers[$canonized_headline]++;
2220                         $refcount[$headlineCount]=$refers[$canonized_headline];
2221
2222                         # Prepend the number to the heading text
2223
2224                         if( $doNumberHeadings || $doShowToc ) {
2225                                 $tocline = $numbering . ' ' . $tocline;
2226
2227                                 # Don't number the heading if it is the only one (looks silly)
2228                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2229                                         # the two are different if the line contains a link
2230                                         $headline=$numbering . ' ' . $headline;
2231                                 }
2232                         }
2233
2234                         # Create the anchor for linking from the TOC to the section
2235                         $anchor = $canonized_headline;
2236                         if($refcount[$headlineCount] > 1 ) {
2237                                 $anchor .= '_' . $refcount[$headlineCount];
2238                         }
2239                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2240                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2241                         }
2242                         if( $showEditLink ) {
2243                                 if ( empty( $head[$headlineCount] ) ) {
2244                                         $head[$headlineCount] = '';
2245                                 }
2246                                 if( $istemplate )
2247                                         $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2248                                 else
2249                                         $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2250                         }
2251
2252                         # Add the edit section span
2253                         if( $rightClickHack ) {
2254                                 $headline = $sk->editSectionScript($sectionCount+1,$headline);
2255                         }
2256
2257                         # give headline the correct <h#> tag
2258                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2259
2260                         $headlineCount++;
2261                         if( !$istemplate )
2262                                 $sectionCount++;
2263                 }
2264
2265                 if( $doShowToc ) {
2266                         $toclines = $headlineCount;
2267                         $toc .= $sk->tocUnindent( $toclevel );
2268                         $toc = $sk->tocTable( $toc );
2269                 }
2270
2271                 # split up and insert constructed headlines
2272
2273                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2274                 $i = 0;
2275
2276                 foreach( $blocks as $block ) {
2277                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2278                                 # This is the [edit] link that appears for the top block of text when
2279                                 # section editing is enabled
2280
2281                                 # Disabled because it broke block formatting
2282                                 # For example, a bullet point in the top line
2283                                 # $full .= $sk->editSectionLink(0);
2284                         }
2285                         $full .= $block;
2286                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2287                         # Top anchor now in skin
2288                                 $full = $full.$toc;
2289                         }
2290
2291                         if( !empty( $head[$i] ) ) {
2292                                 $full .= $head[$i];
2293                         }
2294                         $i++;
2295                 }
2296                 if($forceTocHere) {
2297                         $mw =& MagicWord::get( MAG_TOC );
2298                         return $mw->replace( $toc, $full );
2299                 } else {
2300                         return $full;
2301                 }
2302         }
2303
2304         /**
2305          * Return an HTML link for the "ISBN 123456" text
2306          * @access private
2307          */
2308         function magicISBN( $text ) {
2309                 global $wgLang;
2310                 $fname = 'Parser::magicISBN';
2311                 wfProfileIn( $fname );
2312
2313                 $a = split( 'ISBN ', ' '.$text );
2314                 if ( count ( $a ) < 2 ) {
2315                         wfProfileOut( $fname );
2316                         return $text;
2317                 }
2318                 $text = substr( array_shift( $a ), 1);
2319                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2320
2321                 foreach ( $a as $x ) {
2322                         $isbn = $blank = '' ;
2323                         while ( ' ' == $x{0} ) {
2324                                 $blank .= ' ';
2325                                 $x = substr( $x, 1 );
2326                         }
2327                         if ( $x == '' ) { # blank isbn
2328                                 $text .= "ISBN $blank";
2329                                 continue;
2330                         }
2331                         while ( strstr( $valid, $x{0} ) != false ) {
2332                                 $isbn .= $x{0};
2333                                 $x = substr( $x, 1 );
2334                         }
2335                         $num = str_replace( '-', '', $isbn );
2336                         $num = str_replace( ' ', '', $num );
2337
2338                         if ( '' == $num ) {
2339                                 $text .= "ISBN $blank$x";
2340                         } else {
2341                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2342                                 $text .= '<a href="' .
2343                                 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2344                                         "\" class=\"internal\">ISBN $isbn</a>";
2345                                 $text .= $x;
2346                         }
2347                 }
2348                 wfProfileOut( $fname );
2349                 return $text;
2350         }
2351
2352         /**
2353          * Return an HTML link for the "GEO ..." text
2354          * @access private
2355          */
2356         function magicGEO( $text ) {
2357                 global $wgLang, $wgUseGeoMode;
2358                 $fname = 'Parser::magicGEO';
2359                 wfProfileIn( $fname );
2360
2361                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2362                 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2363                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2364                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2365                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2366                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2367
2368                 $a = split( 'GEO ', ' '.$text );
2369                 if ( count ( $a ) < 2 ) {
2370                         wfProfileOut( $fname );
2371                         return $text;
2372                 }
2373                 $text = substr( array_shift( $a ), 1);
2374                 $valid = '0123456789.+-:';
2375
2376                 foreach ( $a as $x ) {
2377                         $geo = $blank = '' ;
2378                         while ( ' ' == $x{0} ) {
2379                                 $blank .= ' ';
2380                                 $x = substr( $x, 1 );
2381                         }
2382                         while ( strstr( $valid, $x{0} ) != false ) {
2383                                 $geo .= $x{0};
2384                                 $x = substr( $x, 1 );
2385                         }
2386                         $num = str_replace( '+', '', $geo );
2387                         $num = str_replace( ' ', '', $num );
2388
2389                         if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2390                                 $text .= "GEO $blank$x";
2391                         } else {
2392                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2393                                 $text .= '<a href="' .
2394                                 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2395                                         "\" class=\"internal\">GEO $geo</a>";
2396                                 $text .= $x;
2397                         }
2398                 }
2399                 wfProfileOut( $fname );
2400                 return $text;
2401         }
2402
2403         /**
2404          * Return an HTML link for the "RFC 1234" text
2405          * @access private
2406          * @param string $text text to be processed
2407          */
2408         function magicRFC( $text ) {
2409                 global $wgLang;
2410
2411                 $valid = '0123456789';
2412                 $internal = false;
2413
2414                 $a = split( 'RFC ', ' '.$text );
2415                 if ( count ( $a ) < 2 ) return $text;
2416                 $text = substr( array_shift( $a ), 1);
2417
2418                 /* Check if RFC keyword is preceed by [[.
2419                  * This test is made here cause of the array_shift above
2420                  * that prevent the test to be done in the foreach.
2421                  */
2422                 if(substr($text, -2) == '[[') { $internal = true; }
2423
2424                 foreach ( $a as $x ) {
2425                         /* token might be empty if we have RFC RFC 1234 */
2426                         if($x=='') {
2427                                 $text.='RFC ';
2428                                 continue;
2429                                 }
2430
2431                         $rfc = $blank = '' ;
2432
2433                         /** remove and save whitespaces in $blank */
2434                         while ( $x{0} == ' ' ) {
2435                                 $blank .= ' ';
2436                                 $x = substr( $x, 1 );
2437                         }
2438
2439                         /** remove and save the rfc number in $rfc */
2440                         while ( strstr( $valid, $x{0} ) != false ) {
2441                                 $rfc .= $x{0};
2442                                 $x = substr( $x, 1 );
2443                         }
2444
2445                         if ( $rfc == '') {
2446                                 /* call back stripped spaces*/
2447                                 $text .= "RFC $blank$x";
2448                         } elseif( $internal) {
2449                                 /* normal link */
2450                                 $text .= "RFC $rfc$x";
2451                         } else {
2452                                 /* build the external link*/
2453                                 $url = wfmsg( 'rfcurl' );
2454                                 $url = str_replace( '$1', $rfc, $url);
2455                                 $sk =& $this->mOptions->getSkin();
2456                                 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2457                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2458                         }
2459
2460                         /* Check if the next RFC keyword is preceed by [[ */
2461                         $internal = (substr($x,-2) == '[[');
2462                 }
2463                 return $text;
2464         }
2465
2466         /**
2467          * Transform wiki markup when saving a page by doing \r\n -> \n
2468          * conversion, substitting signatures, {{subst:}} templates, etc.
2469          *
2470          * @param string $text the text to transform
2471          * @param Title &$title the Title object for the current article
2472          * @param User &$user the User object describing the current user
2473          * @param ParserOptions $options parsing options
2474          * @param bool $clearState whether to clear the parser state first
2475          * @return string the altered wiki markup
2476          * @access public
2477          */
2478         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2479                 $this->mOptions = $options;
2480                 $this->mTitle =& $title;
2481                 $this->mOutputType = OT_WIKI;
2482
2483                 if ( $clearState ) {
2484                         $this->clearState();
2485                 }
2486
2487                 $stripState = false;
2488                 $pairs = array(
2489                         "\r\n" => "\n",
2490                         );
2491                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2492                 // now with regexes
2493                 /*
2494                 $pairs = array(
2495                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2496                         "/<br *?>/i" => "<br />",
2497                 );
2498                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2499                 */
2500                 $text = $this->strip( $text, $stripState, false );
2501                 $text = $this->pstPass2( $text, $user );
2502                 $text = $this->unstrip( $text, $stripState );
2503                 $text = $this->unstripNoWiki( $text, $stripState );
2504                 return $text;
2505         }
2506
2507         /**
2508          * Pre-save transform helper function
2509          * @access private
2510          */
2511         function pstPass2( $text, &$user ) {
2512                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2513
2514                 # Variable replacement
2515                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2516                 $text = $this->replaceVariables( $text );
2517
2518                 # Signatures
2519                 #
2520                 $n = $user->getName();
2521                 $k = $user->getOption( 'nickname' );
2522                 if ( '' == $k ) { $k = $n; }
2523                 if(isset($wgLocaltimezone)) {
2524                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2525                 }
2526                 /* Note: this is an ugly timezone hack for the European wikis */
2527                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2528                   ' (' . date( 'T' ) . ')';
2529                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2530
2531                 $text = preg_replace( '/~~~~~/', $d, $text );
2532                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2533                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2534
2535                 # Context links: [[|name]] and [[name (context)|]]
2536                 #
2537                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2538                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2539                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2540                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2541
2542                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2543                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2544                 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/";                # [[namespace:page|]] and [[:namespace:page|]]
2545                 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2546                 $context = '';
2547                 $t = $this->mTitle->getText();
2548                 if ( preg_match( $conpat, $t, $m ) ) {
2549                         $context = $m[2];
2550                 }
2551                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2552                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2553                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2554
2555                 if ( '' == $context ) {
2556                         $text = preg_replace( $p2, '[[\\1]]', $text );
2557                 } else {
2558                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2559                 }
2560
2561                 /*
2562                 $mw =& MagicWord::get( MAG_SUBST );
2563                 $wgCurParser = $this->fork();
2564                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2565                 $this->merge( $wgCurParser );
2566                 */
2567
2568                 # Trim trailing whitespace
2569                 # MAG_END (__END__) tag allows for trailing
2570                 # whitespace to be deliberately included
2571                 $text = rtrim( $text );
2572                 $mw =& MagicWord::get( MAG_END );
2573                 $mw->matchAndRemove( $text );
2574
2575                 return $text;
2576         }
2577
2578         /**
2579          * Set up some variables which are usually set up in parse()
2580          * so that an external function can call some class members with confidence
2581          * @access public
2582          */
2583         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2584                 $this->mTitle =& $title;
2585                 $this->mOptions = $options;
2586                 $this->mOutputType = $outputType;
2587                 if ( $clearState ) {
2588                         $this->clearState();
2589                 }
2590         }
2591
2592         /**
2593          * Transform a MediaWiki message by replacing magic variables.
2594          *
2595          * @param string $text the text to transform
2596          * @param ParserOptions $options  options
2597          * @return string the text with variables substituted
2598          * @access public
2599          */
2600         function transformMsg( $text, $options ) {
2601                 global $wgTitle;
2602                 static $executing = false;
2603
2604                 # Guard against infinite recursion
2605                 if ( $executing ) {
2606                         return $text;
2607                 }
2608                 $executing = true;
2609
2610                 $this->mTitle = $wgTitle;
2611                 $this->mOptions = $options;
2612                 $this->mOutputType = OT_MSG;
2613                 $this->clearState();
2614                 $text = $this->replaceVariables( $text );
2615
2616                 $executing = false;
2617                 return $text;
2618         }
2619
2620         /**
2621          * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2622          * Callback will be called with the text within
2623          * Transform and return the text within
2624          * @access public
2625          */
2626         function setHook( $tag, $callback ) {
2627                 $oldVal = @$this->mTagHooks[$tag];
2628                 $this->mTagHooks[$tag] = $callback;
2629                 return $oldVal;
2630         }
2631 }
2632
2633 /**
2634  * @todo document
2635  * @package MediaWiki
2636  */
2637 class ParserOutput
2638 {
2639         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2640         var $mCacheTime; # Used in ParserCache
2641
2642         function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2643                 $containsOldMagic = false )
2644         {
2645                 $this->mText = $text;
2646                 $this->mLanguageLinks = $languageLinks;
2647                 $this->mCategoryLinks = $categoryLinks;
2648                 $this->mContainsOldMagic = $containsOldMagic;
2649                 $this->mCacheTime = '';
2650         }
2651
2652         function getText() { return $this->mText; }
2653         function getLanguageLinks() { return $this->mLanguageLinks; }
2654         function getCategoryLinks() { return $this->mCategoryLinks; }
2655         function getCacheTime() { return $this->mCacheTime; }
2656         function containsOldMagic() { return $this->mContainsOldMagic; }
2657         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2658         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2659         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2660         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2661         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2662
2663         function merge( $other ) {
2664                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2665                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2666                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2667         }
2668
2669 }
2670
2671 /**
2672  * Set options of the Parser
2673  * @todo document
2674  * @package MediaWiki
2675  */
2676 class ParserOptions
2677 {
2678         # All variables are private
2679         var $mUseTeX;                    # Use texvc to expand <math> tags
2680         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2681         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2682         var $mAllowExternalImages;       # Allow external images inline
2683         var $mSkin;                      # Reference to the preferred skin
2684         var $mDateFormat;                # Date format index
2685         var $mEditSection;               # Create "edit section" links
2686         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2687         var $mNumberHeadings;            # Automatically number headings
2688         var $mShowToc;                   # Show table of contents
2689
2690         function getUseTeX()                        { return $this->mUseTeX; }
2691         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2692         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2693         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2694         function getSkin()                          { return $this->mSkin; }
2695         function getDateFormat()                    { return $this->mDateFormat; }
2696         function getEditSection()                   { return $this->mEditSection; }
2697         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2698         function getNumberHeadings()                { return $this->mNumberHeadings; }
2699         function getShowToc()                       { return $this->mShowToc; }
2700
2701         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2702         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2703         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2704         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2705         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2706         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2707         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2708         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2709         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2710
2711         function setSkin( &$x ) { $this->mSkin =& $x; }
2712
2713         # Get parser options
2714         /* static */ function newFromUser( &$user ) {
2715                 $popts = new ParserOptions;
2716                 $popts->initialiseFromUser( $user );
2717                 return $popts;
2718         }
2719
2720         # Get user options
2721         function initialiseFromUser( &$userInput ) {
2722                 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2723
2724                 $fname = 'ParserOptions::initialiseFromUser';
2725                 wfProfileIn( $fname );
2726                 if ( !$userInput ) {
2727                         $user = new User;
2728                         $user->setLoaded( true );
2729                 } else {
2730                         $user =& $userInput;
2731                 }
2732
2733                 $this->mUseTeX = $wgUseTeX;
2734                 $this->mUseDynamicDates = $wgUseDynamicDates;
2735                 $this->mInterwikiMagic = $wgInterwikiMagic;
2736                 $this->mAllowExternalImages = $wgAllowExternalImages;
2737                 wfProfileIn( $fname.'-skin' );
2738                 $this->mSkin =& $user->getSkin();
2739                 wfProfileOut( $fname.'-skin' );
2740                 $this->mDateFormat = $user->getOption( 'date' );
2741                 $this->mEditSection = $user->getOption( 'editsection' );
2742                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2743                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2744                 $this->mShowToc = $user->getOption( 'showtoc' );
2745                 wfProfileOut( $fname );
2746         }
2747
2748
2749 }
2750
2751 # Regex callbacks, used in Parser::replaceVariables
2752 function wfBraceSubstitution( $matches ) {
2753         global $wgCurParser;
2754         return $wgCurParser->braceSubstitution( $matches );
2755 }
2756
2757 function wfArgSubstitution( $matches ) {
2758         global $wgCurParser;
2759         return $wgCurParser->argSubstitution( $matches );
2760 }
2761
2762 /**
2763  * Return the total number of articles
2764  */
2765 function wfNumberOfArticles() {
2766         global $wgNumberOfArticles;
2767
2768         wfLoadSiteStats();
2769         return $wgNumberOfArticles;
2770 }
2771
2772 /**
2773  * Get various statistics from the database
2774  * @private
2775  */
2776 function wfLoadSiteStats() {
2777         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2778         $fname = 'wfLoadSiteStats';
2779
2780         if ( -1 != $wgNumberOfArticles ) return;
2781         $dbr =& wfGetDB( DB_SLAVE );
2782         $s = $dbr->getArray( 'site_stats',
2783                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2784                 array( 'ss_row_id' => 1 ), $fname
2785         );
2786
2787         if ( $s === false ) {
2788                 return;
2789         } else {
2790                 $wgTotalViews = $s->ss_total_views;
2791                 $wgTotalEdits = $s->ss_total_edits;
2792                 $wgNumberOfArticles = $s->ss_good_articles;
2793         }
2794 }
2795
2796 function wfEscapeHTMLTagsOnly( $in ) {
2797         return str_replace(
2798                 array( '"', '>', '<' ),
2799                 array( '&quot;', '&gt;', '&lt;' ),
2800                 $in );
2801 }
2802
2803 function wfGetSectionNumber() {
2804         global $wgCurrentSectionNumber;
2805         $str = base64_encode("$wgCurrentSectionNumber");
2806         $wgCurrentSectionNumber++;
2807         return $str;
2808 }
2809 ?>