includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 /**
   6  * PHP Parser
   7  *
   8  * Processes wiki markup
   9  *
  10  * There are two main entry points into the Parser class:
  11  * parse()
  12  *   produces HTML output
  13  * preSaveTransform().
  14  *   produces altered wiki markup.
  15  *
  16  * Globals used:
  17  *    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18  *
  19  * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20  *
  21  * settings:
  22  *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  23  *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  24  *  $wgLocaltimezone
  25  *
  26  *  * only within ParserOptions
  27  *
  28  * @package MediaWiki
  29  */
  30
  31 /**
  32  * Variable substitution O(N^2) attack
  33  *
  34 * Without countermeasures, it would be possible to attack the parser by saving
  35 * a page filled with a large number of inclusions of large pages. The size of
  36 * the generated page would be proportional to the square of the input size.
  37 * Hence, we limit the number of inclusions of any given page, thus bringing any
  38 * attack back to O(N).
  39 */
  40 define( 'MAX_INCLUDE_REPEAT', 100 );
  41 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
  42
  43 # Allowed values for $mOutputType
  44 define( 'OT_HTML', 1 );
  45 define( 'OT_WIKI', 2 );
  46 define( 'OT_MSG' , 3 );
  47
  48 # string parameter for extractTags which will cause it
  49 # to strip HTML comments in addition to regular
  50 # <XML>-style tags. This should not be anything we
  51 # may want to use in wikisyntax
  52 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  53
  54 # prefix for escaping, used in two functions at least
  55 define( 'UNIQ_PREFIX', 'NaodW29');
  56
  57 # Constants needed for external link processing
  58 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  59 define( 'HTTP_PROTOCOLS', 'http|https' );
  60 # Everything except bracket, space, or control characters
  61 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  62 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  63 # Including space
  64 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  65 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  66 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  67 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  68 define( 'EXT_IMAGE_REGEX',
  69         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  70         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  71         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  72 );
  73
  74 /**
  75  * @todo document
  76  * @package MediaWiki
  77  */
  78 class Parser
  79 {
  80         # Persistent:
  81         var $mTagHooks;
  82
  83         # Cleared with clearState():
  84         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  85         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  86
  87         # Temporary:
  88         var $mOptions, $mTitle, $mOutputType,
  89             $mTemplates,        // cache of already loaded templates, avoids
  90                                 // multiple SQL queries for the same string
  91             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  92                                 // in this path. Used for loop detection.
  93
  94         function Parser() {
  95                 $this->mTemplates = array();
  96                 $this->mTemplatePath = array();
  97                 $this->mTagHooks = array();
  98                 $this->clearState();
  99         }
 100
 101         function clearState() {
 102                 $this->mOutput = new ParserOutput;
 103                 $this->mAutonumber = 0;
 104                 $this->mLastSection = "";
 105                 $this->mDTopen = false;
 106                 $this->mVariables = false;
 107                 $this->mIncludeCount = array();
 108                 $this->mStripState = array();
 109                 $this->mArgStack = array();
 110                 $this->mInPre = false;
 111         }
 112
 113         # First pass--just handle <nowiki> sections, pass the rest off
 114         # to internalParse() which does all the real work.
 115         #
 116         # Returns a ParserOutput
 117         #
 118         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 119                 global $wgUseTidy;
 120                 $fname = 'Parser::parse';
 121                 wfProfileIn( $fname );
 122
 123                 if ( $clearState ) {
 124                         $this->clearState();
 125                 }
 126
 127                 $this->mOptions = $options;
 128                 $this->mTitle =& $title;
 129                 $this->mOutputType = OT_HTML;
 130
 131                 $stripState = NULL;
 132                 $text = $this->strip( $text, $this->mStripState );
 133                 $text = $this->internalParse( $text, $linestart );
 134                 $text = $this->unstrip( $text, $this->mStripState );
 135                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 136                 if(!$wgUseTidy) {
 137                         $fixtags = array(
 138                                 # french spaces, last one Guillemet-left
 139                                 # only if there is something before the space
 140                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 141                                 # french spaces, Guillemet-right
 142                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 143                                 '/<hr *>/i' => '<hr />',
 144                                 '/<br *>/i' => '<br />',
 145                                 '/<center *>/i' => '<div class="center">',
 146                                 '/<\\/center *>/i' => '</div>',
 147                                 # Clean up spare ampersands; note that we probably ought to be
 148                                 # more careful about named entities.
 149                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 150                         );
 151                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 152                 } else {
 153                         $fixtags = array(
 154                                 # french spaces, last one Guillemet-left
 155                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 156                                 # french spaces, Guillemet-right
 157                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 158                                 '/<center *>/i' => '<div class="center">',
 159                                 '/<\\/center *>/i' => '</div>'
 160                         );
 161                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 162                 }
 163                 # only once and last
 164                 $text = $this->doBlockLevels( $text, $linestart );
 165                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 166                 if($wgUseTidy) {
 167                         $text = $this->tidy($text);
 168                 }
 169                 $this->mOutput->setText( $text );
 170                 wfProfileOut( $fname );
 171                 return $this->mOutput;
 172         }
 173
 174         /* static */ function getRandomString() {
 175                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 176         }
 177
 178         # Replaces all occurrences of <$tag>content</$tag> in the text
 179         # with a random marker and returns the new text. the output parameter
 180         # $content will be an associative array filled with data on the form
 181         # $unique_marker => content.
 182
 183         # If $content is already set, the additional entries will be appended
 184
 185         # If $tag is set to STRIP_COMMENTS, the function will extract
 186         # <!-- HTML comments -->
 187
 188         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ''){
 189                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 190                 if ( !$content ) {
 191                         $content = array( );
 192                 }
 193                 $n = 1;
 194                 $stripped = '';
 195
 196                 while ( '' != $text ) {
 197                         if($tag==STRIP_COMMENTS) {
 198                                 $p = preg_split( '/<!--/i', $text, 2 );
 199                         } else {
 200                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 201                         }
 202                         $stripped .= $p[0];
 203                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 204                                 $text = '';
 205                         } else {
 206                                 if($tag==STRIP_COMMENTS) {
 207                                         $q = preg_split( '/-->/i', $p[1], 2 );
 208                                 } else {
 209                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 210                                 }
 211                                 $marker = $rnd . sprintf('%08X', $n++);
 212                                 $content[$marker] = $q[0];
 213                                 $stripped .= $marker;
 214                                 $text = $q[1];
 215                         }
 216                 }
 217                 return $stripped;
 218         }
 219
 220         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 221         # If $render is set, performs necessary rendering operations on plugins
 222         # Returns the text, and fills an array with data needed in unstrip()
 223         # If the $state is already a valid strip state, it adds to the state
 224
 225         # When $stripcomments is set, HTML comments <!-- like this -->
 226         # will be stripped in addition to other tags. This is important
 227         # for section editing, where these comments cause confusion when
 228         # counting the sections in the wikisource
 229         function strip( $text, &$state, $stripcomments = false ) {
 230                 $render = ($this->mOutputType == OT_HTML);
 231                 $html_content = array();
 232                 $nowiki_content = array();
 233                 $math_content = array();
 234                 $pre_content = array();
 235                 $comment_content = array();
 236                 $ext_content = array();
 237
 238                 # Replace any instances of the placeholders
 239                 $uniq_prefix = UNIQ_PREFIX;
 240                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 241
 242                 # html
 243                 global $wgRawHtml, $wgWhitelistEdit;
 244                 if( $wgRawHtml && $wgWhitelistEdit ) {
 245                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 246                         foreach( $html_content as $marker => $content ) {
 247                                 if ($render ) {
 248                                         # Raw and unchecked for validity.
 249                                         $html_content[$marker] = $content;
 250                                 } else {
 251                                         $html_content[$marker] = '<html>'.$content.'</html>';
 252                                 }
 253                         }
 254                 }
 255
 256                 # nowiki
 257                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 258                 foreach( $nowiki_content as $marker => $content ) {
 259                         if( $render ){
 260                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 261                         } else {
 262                                 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
 263                         }
 264                 }
 265
 266                 # math
 267                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 268                 foreach( $math_content as $marker => $content ){
 269                         if( $render ) {
 270                                 if( $this->mOptions->getUseTeX() ) {
 271                                         $math_content[$marker] = renderMath( $content );
 272                                 } else {
 273                                         $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
 274                                 }
 275                         } else {
 276                                 $math_content[$marker] = '<math>'.$content.'</math>';
 277                         }
 278                 }
 279
 280                 # pre
 281                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 282                 foreach( $pre_content as $marker => $content ){
 283                         if( $render ){
 284                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 285                         } else {
 286                                 $pre_content[$marker] = '<pre>'.$content.'</pre>';
 287                         }
 288                 }
 289
 290                 # Comments
 291                 if($stripcomments) {
 292                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 293                         foreach( $comment_content as $marker => $content ){
 294                                 $comment_content[$marker] = '<!--'.$content.'-->';
 295                         }
 296                 }
 297
 298                 # Extensions
 299                 foreach ( $this->mTagHooks as $tag => $callback ) {
 300                         $ext_contents[$tag] = array();
 301                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 302                         foreach( $ext_content[$tag] as $marker => $content ) {
 303                                 if ( $render ) {
 304                                         $ext_content[$tag][$marker] = $callback( $content );
 305                                 } else {
 306                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 307                                 }
 308                         }
 309                 }
 310
 311                 # Merge state with the pre-existing state, if there is one
 312                 if ( $state ) {
 313                         $state['html'] = $state['html'] + $html_content;
 314                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 315                         $state['math'] = $state['math'] + $math_content;
 316                         $state['pre'] = $state['pre'] + $pre_content;
 317                         $state['comment'] = $state['comment'] + $comment_content;
 318
 319                         foreach( $ext_content as $tag => $array ) {
 320                                 if ( array_key_exists( $tag, $state ) ) {
 321                                         $state[$tag] = $state[$tag] + $array;
 322                                 }
 323                         }
 324                 } else {
 325                         $state = array(
 326                           'html' => $html_content,
 327                           'nowiki' => $nowiki_content,
 328                           'math' => $math_content,
 329                           'pre' => $pre_content,
 330                           'comment' => $comment_content,
 331                         ) + $ext_content;
 332                 }
 333                 return $text;
 334         }
 335
 336         # always call unstripNoWiki() after this one
 337         function unstrip( $text, &$state ) {
 338                 # Must expand in reverse order, otherwise nested tags will be corrupted
 339                 $contentDict = end( $state );
 340                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 341                         if( key($state) != 'nowiki' && key($state) != 'html') {
 342                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 343                                         $text = str_replace( key( $contentDict ), $content, $text );
 344                                 }
 345                         }
 346                 }
 347
 348                 return $text;
 349         }
 350         # always call this after unstrip() to preserve the order
 351         function unstripNoWiki( $text, &$state ) {
 352                 # Must expand in reverse order, otherwise nested tags will be corrupted
 353                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 354                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 355                 }
 356
 357                 global $wgRawHtml;
 358                 if ($wgRawHtml) {
 359                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 360                                 $text = str_replace( key( $state['html'] ), $content, $text );
 361                         }
 362                 }
 363
 364                 return $text;
 365         }
 366
 367         # Add an item to the strip state
 368         # Returns the unique tag which must be inserted into the stripped text
 369         # The tag will be replaced with the original text in unstrip()
 370         function insertStripItem( $text, &$state ) {
 371                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 372                 if ( !$state ) {
 373                         $state = array(
 374                           'html' => array(),
 375                           'nowiki' => array(),
 376                           'math' => array(),
 377                           'pre' => array()
 378                         );
 379                 }
 380                 $state['item'][$rnd] = $text;
 381                 return $rnd;
 382         }
 383
 384         # Return allowed HTML attributes
 385         function getHTMLattrs () {
 386                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 387                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 388                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 389                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 390                                 /* FONT */ 'type', 'start', 'value', 'compact',
 391                                 /* For various lists, mostly deprecated but safe */
 392                                 'summary', 'width', 'border', 'frame', 'rules',
 393                                 'cellspacing', 'cellpadding', 'valign', 'char',
 394                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 395                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 396                                 'id', 'class', 'name', 'style' /* For CSS */
 397                                 );
 398                 return $htmlattrs ;
 399         }
 400
 401         # Remove non approved attributes and javascript in css
 402         function fixTagAttributes ( $t ) {
 403                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 404                 $htmlattrs = $this->getHTMLattrs() ;
 405
 406                 # Strip non-approved attributes from the tag
 407                 $t = preg_replace(
 408                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 409                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 410                         $t);
 411
 412                 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
 413
 414                 # Strip javascript "expression" from stylesheets. Brute force approach:
 415                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 416
 417                 if( preg_match(
 418                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 419                         wfMungeToUtf8( $t ) ) )
 420                 {
 421                         $t='';
 422                 }
 423
 424                 return trim ( $t ) ;
 425         }
 426
 427         # interface with html tidy, used if $wgUseTidy = true
 428         function tidy ( $text ) {
 429                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 430                 global $wgInputEncoding, $wgOutputEncoding;
 431                 $fname = 'Parser::tidy';
 432                 wfProfileIn( $fname );
 433
 434                 $cleansource = '';
 435                 switch(strtoupper($wgOutputEncoding)) {
 436                         case 'ISO-8859-1':
 437                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 438                                 break;
 439                         case 'UTF-8':
 440                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 441                                 break;
 442                         default:
 443                                 $wgTidyOpts .= ' -raw';
 444                         }
 445
 446                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 447 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 448 '<head><title>test</title></head><body>'.$text.'</body></html>';
 449                 $descriptorspec = array(
 450                         0 => array('pipe', 'r'),
 451                         1 => array('pipe', 'w'),
 452                         2 => array('file', '/dev/null', 'a')
 453                 );
 454                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 455                 if (is_resource($process)) {
 456                         fwrite($pipes[0], $wrappedtext);
 457                         fclose($pipes[0]);
 458                         while (!feof($pipes[1])) {
 459                                 $cleansource .= fgets($pipes[1], 1024);
 460                         }
 461                         fclose($pipes[1]);
 462                         $return_value = proc_close($process);
 463                 }
 464
 465                 wfProfileOut( $fname );
 466
 467                 if( $cleansource == '' && $text != '') {
 468                         wfDebug( "Tidy error detected!\n" );
 469                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 470                 } else {
 471                         return $cleansource;
 472                 }
 473         }
 474
 475         # parse the wiki syntax used to render tables
 476         function doTableStuff ( $t ) {
 477                 $fname = 'Parser::doTableStuff';
 478                 wfProfileIn( $fname );
 479
 480                 $t = explode ( "\n" , $t ) ;
 481                 $td = array () ; # Is currently a td tag open?
 482                 $ltd = array () ; # Was it TD or TH?
 483                 $tr = array () ; # Is currently a tr tag open?
 484                 $ltr = array () ; # tr attributes
 485                 $indent_level = 0; # indent level of the table
 486                 foreach ( $t AS $k => $x )
 487                 {
 488                         $x = trim ( $x ) ;
 489                         $fc = substr ( $x , 0 , 1 ) ;
 490                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 491                                 $indent_level = strlen( $matches[1] );
 492                                 $t[$k] = "\n" .
 493                                         str_repeat( '<dl><dd>', $indent_level ) .
 494                                         '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 495                                 array_push ( $td , false ) ;
 496                                 array_push ( $ltd , '' ) ;
 497                                 array_push ( $tr , false ) ;
 498                                 array_push ( $ltr , '' ) ;
 499                         }
 500                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 501                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 502                                 $z = "</table>\n" ;
 503                                 $l = array_pop ( $ltd ) ;
 504                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 505                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 506                                 array_pop ( $ltr ) ;
 507                                 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
 508                         }
 509                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 510                                 $x = substr ( $x , 1 ) ;
 511                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 512                                 $z = '' ;
 513                                 $l = array_pop ( $ltd ) ;
 514                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 515                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 516                                 array_pop ( $ltr ) ;
 517                                 $t[$k] = $z ;
 518                                 array_push ( $tr , false ) ;
 519                                 array_push ( $td , false ) ;
 520                                 array_push ( $ltd , '' ) ;
 521                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 522                         }
 523                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 524                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 525                                         $fc = '+' ;
 526                                         $x = substr ( $x , 1 ) ;
 527                                 }
 528                                 $after = substr ( $x , 1 ) ;
 529                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 530                                 $after = explode ( '||' , $after ) ;
 531                                 $t[$k] = '' ;
 532                                 foreach ( $after AS $theline )
 533                                 {
 534                                         $z = '' ;
 535                                         if ( $fc != '+' )
 536                                         {
 537                                                 $tra = array_pop ( $ltr ) ;
 538                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 539                                                 array_push ( $tr , true ) ;
 540                                                 array_push ( $ltr , '' ) ;
 541                                         }
 542
 543                                         $l = array_pop ( $ltd ) ;
 544                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 545                                         if ( $fc == '|' ) $l = 'td' ;
 546                                         else if ( $fc == '!' ) $l = 'th' ;
 547                                         else if ( $fc == '+' ) $l = 'caption' ;
 548                                         else $l = '' ;
 549                                         array_push ( $ltd , $l ) ;
 550                                         $y = explode ( '|' , $theline , 2 ) ;
 551                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 552                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 553                                         $t[$k] .= $y ;
 554                                         array_push ( $td , true ) ;
 555                                 }
 556                         }
 557                 }
 558
 559                 # Closing open td, tr && table
 560                 while ( count ( $td ) > 0 )
 561                 {
 562                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 563                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 564                         $t[] = '</table>' ;
 565                 }
 566
 567                 $t = implode ( "\n" , $t ) ;
 568                 #               $t = $this->removeHTMLtags( $t );
 569                 wfProfileOut( $fname );
 570                 return $t ;
 571         }
 572
 573         # Parses the text and adds the result to the strip state
 574         # Returns the strip tag
 575         function stripParse( $text, $newline, $args ) {
 576                 $text = $this->strip( $text, $this->mStripState );
 577                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 578                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 579         }
 580
 581         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 582         global $wgLang;
 583
 584                 $fname = 'Parser::internalParse';
 585                 wfProfileIn( $fname );
 586
 587                 $text = $this->removeHTMLtags( $text );
 588                 $text = $this->replaceVariables( $text, $args );
 589
 590                 $text = $wgLang->convert($text);
 591
 592                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 593
 594                 $text = $this->doHeadings( $text );
 595                 if($this->mOptions->getUseDynamicDates()) {
 596                         global $wgDateFormatter;
 597                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 598                 }
 599                 $text = $this->doAllQuotes( $text );
 600                 $text = $this->replaceExternalLinks( $text );
 601                 $text = $this->doMagicLinks( $text );
 602                 $text = $this->replaceInternalLinks ( $text );
 603                 $text = $this->replaceInternalLinks ( $text );
 604
 605                 $text = $this->unstrip( $text, $this->mStripState );
 606                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 607
 608                 $text = $this->doTableStuff( $text );
 609                 $text = $this->formatHeadings( $text, $isMain );
 610                 $sk =& $this->mOptions->getSkin();
 611                 $text = $sk->transformContent( $text );
 612
 613                 wfProfileOut( $fname );
 614                 return $text;
 615         }
 616
 617         /* private */ function &doMagicLinks( &$text ) {
 618                 global $wgUseGeoMode;
 619                 $text = $this->magicISBN( $text );
 620                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 621                         $text = $this->magicGEO( $text );
 622                 }
 623                 $text = $this->magicRFC( $text );
 624                 return $text;
 625         }
 626
 627         # Parse ^^ tokens and return html
 628         /* private */ function doExponent ( $text ) {
 629                 $fname = 'Parser::doExponent';
 630                 wfProfileIn( $fname);
 631                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 632                 wfProfileOut( $fname);
 633                 return $text;
 634         }
 635
 636         # Parse headers and return html
 637         /* private */ function doHeadings( $text ) {
 638                 $fname = 'Parser::doHeadings';
 639                 wfProfileIn( $fname );
 640                 for ( $i = 6; $i >= 1; --$i ) {
 641                         $h = substr( '======', 0, $i );
 642                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 643                           "<h{$i}>\\1</h{$i}>\\2", $text );
 644                 }
 645                 wfProfileOut( $fname );
 646                 return $text;
 647         }
 648
 649         /* private */ function doAllQuotes( $text ) {
 650                 $fname = 'Parser::doAllQuotes';
 651                 wfProfileIn( $fname );
 652                 $outtext = '';
 653                 $lines = explode( "\n", $text );
 654                 foreach ( $lines as $line ) {
 655                         $outtext .= $this->doQuotes ( $line ) . "\n";
 656                 }
 657                 $outtext = substr($outtext, 0,-1);
 658                 wfProfileOut( $fname );
 659                 return $outtext;
 660         }
 661
 662         /* private */ function doQuotes( $text ) {
 663                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 664                 if (count ($arr) == 1)
 665                         return $text;
 666                 else
 667                 {
 668                         # First, do some preliminary work. This may shift some apostrophes from
 669                         # being mark-up to being text. It also counts the number of occurrences
 670                         # of bold and italics mark-ups.
 671                         $i = 0;
 672                         $numbold = 0;
 673                         $numitalics = 0;
 674                         foreach ($arr as $r)
 675                         {
 676                                 if (($i % 2) == 1)
 677                                 {
 678                                         # If there are ever four apostrophes, assume the first is supposed to
 679                                         # be text, and the remaining three constitute mark-up for bold text.
 680                                         if (strlen ($arr[$i]) == 4)
 681                                         {
 682                                                 $arr[$i-1] .= "'";
 683                                                 $arr[$i] = "'''";
 684                                         }
 685                                         # If there are more than 5 apostrophes in a row, assume they're all
 686                                         # text except for the last 5.
 687                                         else if (strlen ($arr[$i]) > 5)
 688                                         {
 689                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 690                                                 $arr[$i] = "'''''";
 691                                         }
 692                                         # Count the number of occurrences of bold and italics mark-ups.
 693                                         # We are not counting sequences of five apostrophes.
 694                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 695                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 696                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 697                                 }
 698                                 $i++;
 699                         }
 700
 701                         # If there is an odd number of both bold and italics, it is likely
 702                         # that one of the bold ones was meant to be an apostrophe followed
 703                         # by italics. Which one we cannot know for certain, but it is more
 704                         # likely to be one that has a single-letter word before it.
 705                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 706                         {
 707                                 $i = 0;
 708                                 $firstsingleletterword = -1;
 709                                 $firstmultiletterword = -1;
 710                                 $firstspace = -1;
 711                                 foreach ($arr as $r)
 712                                 {
 713                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 714                                         {
 715                                                 $x1 = substr ($arr[$i-1], -1);
 716                                                 $x2 = substr ($arr[$i-1], -2, 1);
 717                                                 if ($x1 == ' ') {
 718                                                         if ($firstspace == -1) $firstspace = $i;
 719                                                 } else if ($x2 == ' ') {
 720                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 721                                                 } else {
 722                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 723                                                 }
 724                                         }
 725                                         $i++;
 726                                 }
 727
 728                                 # If there is a single-letter word, use it!
 729                                 if ($firstsingleletterword > -1)
 730                                 {
 731                                         $arr [ $firstsingleletterword ] = "''";
 732                                         $arr [ $firstsingleletterword-1 ] .= "'";
 733                                 }
 734                                 # If not, but there's a multi-letter word, use that one.
 735                                 else if ($firstmultiletterword > -1)
 736                                 {
 737                                         $arr [ $firstmultiletterword ] = "''";
 738                                         $arr [ $firstmultiletterword-1 ] .= "'";
 739                                 }
 740                                 # ... otherwise use the first one that has neither.
 741                                 # (notice that it is possible for all three to be -1 if, for example,
 742                                 # there is only one pentuple-apostrophe in the line)
 743                                 else if ($firstspace > -1)
 744                                 {
 745                                         $arr [ $firstspace ] = "''";
 746                                         $arr [ $firstspace-1 ] .= "'";
 747                                 }
 748                         }
 749
 750                         # Now let's actually convert our apostrophic mush to HTML!
 751                         $output = '';
 752                         $buffer = '';
 753                         $state = '';
 754                         $i = 0;
 755                         foreach ($arr as $r)
 756                         {
 757                                 if (($i % 2) == 0)
 758                                 {
 759                                         if ($state == 'both')
 760                                                 $buffer .= $r;
 761                                         else
 762                                                 $output .= $r;
 763                                 }
 764                                 else
 765                                 {
 766                                         if (strlen ($r) == 2)
 767                                         {
 768                                                 if ($state == 'i')
 769                                                 { $output .= '</i>'; $state = ''; }
 770                                                 else if ($state == 'bi')
 771                                                 { $output .= '</i>'; $state = 'b'; }
 772                                                 else if ($state == 'ib')
 773                                                 { $output .= '</b></i><b>'; $state = 'b'; }
 774                                                 else if ($state == 'both')
 775                                                 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
 776                                                 else # $state can be 'b' or ''
 777                                                 { $output .= '<i>'; $state .= 'i'; }
 778                                         }
 779                                         else if (strlen ($r) == 3)
 780                                         {
 781                                                 if ($state == 'b')
 782                                                 { $output .= '</b>'; $state = ''; }
 783                                                 else if ($state == 'bi')
 784                                                 { $output .= '</i></b><i>'; $state = 'i'; }
 785                                                 else if ($state == 'ib')
 786                                                 { $output .= '</b>'; $state = 'i'; }
 787                                                 else if ($state == 'both')
 788                                                 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
 789                                                 else # $state can be 'i' or ''
 790                                                 { $output .= '<b>'; $state .= 'b'; }
 791                                         }
 792                                         else if (strlen ($r) == 5)
 793                                         {
 794                                                 if ($state == 'b')
 795                                                 { $output .= '</b><i>'; $state = 'i'; }
 796                                                 else if ($state == 'i')
 797                                                 { $output .= '</i><b>'; $state = 'b'; }
 798                                                 else if ($state == 'bi')
 799                                                 { $output .= '</i></b>'; $state = ''; }
 800                                                 else if ($state == 'ib')
 801                                                 { $output .= '</b></i>'; $state = ''; }
 802                                                 else if ($state == 'both')
 803                                                 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
 804                                                 else # ($state == '')
 805                                                 { $buffer = ''; $state = 'both'; }
 806                                         }
 807                                 }
 808                                 $i++;
 809                         }
 810                         # Now close all remaining tags.  Notice that the order is important.
 811                         if ($state == 'b' || $state == 'ib')
 812                                 $output .= '</b>';
 813                         if ($state == 'i' || $state == 'bi' || $state == 'ib')
 814                                 $output .= '</i>';
 815                         if ($state == 'bi')
 816                                 $output .= '</b>';
 817                         if ($state == 'both')
 818                                 $output .= '<b><i>'.$buffer.'</i></b>';
 819                         return $output;
 820                 }
 821         }
 822
 823         # Note: we have to do external links before the internal ones,
 824         # and otherwise take great care in the order of things here, so
 825         # that we don't end up interpreting some URLs twice.
 826
 827         /* private */ function replaceExternalLinks( $text ) {
 828                 $fname = 'Parser::replaceExternalLinks';
 829                 wfProfileIn( $fname );
 830
 831                 $sk =& $this->mOptions->getSkin();
 832                 $linktrail = wfMsg('linktrail');
 833                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 834
 835                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 836
 837                 $i = 0;
 838                 while ( $i<count( $bits ) ) {
 839                         $url = $bits[$i++];
 840                         $protocol = $bits[$i++];
 841                         $text = $bits[$i++];
 842                         $trail = $bits[$i++];
 843
 844                         # If the link text is an image URL, replace it with an <img> tag
 845                         # This happened by accident in the original parser, but some people used it extensively
 846                         $img = $this->maybeMakeImageLink( $text );
 847                         if ( $img !== false ) {
 848                                 $text = $img;
 849                         }
 850
 851                         $dtrail = '';
 852
 853                         # No link text, e.g. [http://domain.tld/some.link]
 854                         if ( $text == '' ) {
 855                                 # Autonumber if allowed
 856                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 857                                         $text = '[' . ++$this->mAutonumber . ']';
 858                                 } else {
 859                                         # Otherwise just use the URL
 860                                         $text = htmlspecialchars( $url );
 861                                 }
 862                         } else {
 863                                 # Have link text, e.g. [http://domain.tld/some.link text]s
 864                                 # Check for trail
 865                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
 866                                         $dtrail = $m2[1];
 867                                         $trail = $m2[2];
 868                                 }
 869                         }
 870
 871                         $encUrl = htmlspecialchars( $url );
 872                         # Bit in parentheses showing the URL for the printable version
 873                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
 874                                 $paren = '';
 875                         } else {
 876                                 # Expand the URL for printable version
 877                                 if ( ! $sk->suppressUrlExpansion() ) {
 878                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
 879                                 } else {
 880                                         $paren = '';
 881                                 }
 882                         }
 883
 884                         # Process the trail (i.e. everything after this link up until start of the next link),
 885                         # replacing any non-bracketed links
 886                         $trail = $this->replaceFreeExternalLinks( $trail );
 887
 888                         $la = $sk->getExternalLinkAttributes( $url, $text );
 889
 890                         # Use the encoded URL
 891                         # This means that users can paste URLs directly into the text
 892                         # Funny characters like &ouml; aren't valid in URLs anyway
 893                         # This was changed in August 2004
 894                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
 895                 }
 896
 897                 wfProfileOut( $fname );
 898                 return $s;
 899         }
 900
 901         # Replace anything that looks like a URL with a link
 902         function replaceFreeExternalLinks( $text ) {
 903                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 904                 $s = array_shift( $bits );
 905                 $i = 0;
 906
 907                 $sk =& $this->mOptions->getSkin();
 908
 909                 while ( $i < count( $bits ) ){
 910                         $protocol = $bits[$i++];
 911                         $remainder = $bits[$i++];
 912
 913                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
 914                                 # Found some characters after the protocol that look promising
 915                                 $url = $protocol . $m[1];
 916                                 $trail = $m[2];
 917
 918                                 # Move trailing punctuation to $trail
 919                                 $sep = ',;\.:!?';
 920                                 # If there is no left bracket, then consider right brackets fair game too
 921                                 if ( strpos( $url, '(' ) === false ) {
 922                                         $sep .= ')';
 923                                 }
 924
 925                                 $numSepChars = strspn( strrev( $url ), $sep );
 926                                 if ( $numSepChars ) {
 927                                         $trail = substr( $url, -$numSepChars ) . $trail;
 928                                         $url = substr( $url, 0, -$numSepChars );
 929                                 }
 930
 931                                 # Replace &amp; from obsolete syntax with &
 932                                 $url = str_replace( '&amp;', '&', $url );
 933
 934                                 # Is this an external image?
 935                                 $text = $this->maybeMakeImageLink( $url );
 936                                 if ( $text === false ) {
 937                                         # Not an image, make a link
 938                                         $text = $sk->makeExternalLink( $url, $url );
 939                                 }
 940                                 $s .= $text . $trail;
 941                         } else {
 942                                 $s .= $protocol . $remainder;
 943                         }
 944                 }
 945                 return $s;
 946         }
 947
 948         # make an image if it's allowed
 949         function maybeMakeImageLink( $url ) {
 950                 $sk =& $this->mOptions->getSkin();
 951                 $text = false;
 952                 if ( $this->mOptions->getAllowExternalImages() ) {
 953                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
 954                                 # Image found
 955                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
 956                         }
 957                 }
 958                 return $text;
 959         }
 960
 961         # The wikilinks [[ ]] are procedeed here.
 962         /* private */ function replaceInternalLinks( $s ) {
 963                 global $wgLang, $wgLinkCache;
 964                 global $wgNamespacesWithSubpages;
 965                 static $fname = 'Parser::replaceInternalLinks' ;
 966                 wfProfileIn( $fname );
 967
 968                 wfProfileIn( $fname.'-setup' );
 969                 static $tc = FALSE;
 970                 # the % is needed to support urlencoded titles as well
 971                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
 972                 $sk =& $this->mOptions->getSkin();
 973
 974                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
 975
 976                 $a = explode( '[[', ' ' . $s );
 977                 $s = array_shift( $a );
 978                 $s = substr( $s, 1 );
 979
 980                 # Match a link having the form [[namespace:link|alternate]]trail
 981                 static $e1 = FALSE;
 982                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 983                 # Match the end of a line for a word that's not followed by whitespace,
 984                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 985                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
 986
 987                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
 988                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 989
 990                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 991
 992                 if ( $useLinkPrefixExtension ) {
 993                         if ( preg_match( $e2, $s, $m ) ) {
 994                                 $first_prefix = $m[2];
 995                                 $s = $m[1];
 996                         } else {
 997                                 $first_prefix = false;
 998                         }
 999                 } else {
1000                         $prefix = '';
1001                 }
1002
1003                 wfProfileOut( $fname.'-setup' );
1004
1005                 # start procedeeding each line
1006                 foreach ( $a as $line ) {
1007                         wfProfileIn( $fname.'-prefixhandling' );
1008                         if ( $useLinkPrefixExtension ) {
1009                                 if ( preg_match( $e2, $s, $m ) ) {
1010                                         $prefix = $m[2];
1011                                         $s = $m[1];
1012                                 } else {
1013                                         $prefix='';
1014                                 }
1015                                 # first link
1016                                 if($first_prefix) {
1017                                         $prefix = $first_prefix;
1018                                         $first_prefix = false;
1019                                 }
1020                         }
1021                         wfProfileOut( $fname.'-prefixhandling' );
1022
1023                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1024                                 $text = $m[2];
1025                                 # fix up urlencoded title texts
1026                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1027                                 $trail = $m[3];
1028                         } else { # Invalid form; output directly
1029                                 $s .= $prefix . '[[' . $line ;
1030                                 continue;
1031                         }
1032
1033                         # Valid link forms:
1034                         # Foobar -- normal
1035                         # :Foobar -- override special treatment of prefix (images, language links)
1036                         # /Foobar -- convert to CurrentPage/Foobar
1037                         # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1038
1039                         # Look at the first character
1040                         $c = substr($m[1],0,1);
1041                         $noforce = ($c != ':');
1042
1043                         # subpage
1044                         if( $c == '/' ) {
1045                                 # / at end means we don't want the slash to be shown
1046                                 if(substr($m[1],-1,1)=='/') {
1047                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1048                                         $noslash=$m[1];
1049                                 } else {
1050                                         $noslash=substr($m[1],1);
1051                                 }
1052
1053                                 # Some namespaces don't allow subpages
1054                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1055                                         # subpages allowed here
1056                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1057                                         if( '' == $text ) {
1058                                                 $text= $m[1];
1059                                         } # this might be changed for ugliness reasons
1060                                 } else {
1061                                         # no subpage allowed, use standard link
1062                                         $link = $noslash;
1063                                 }
1064
1065                         } elseif( $noforce ) { # no subpage
1066                                 $link = $m[1];
1067                         } else {
1068                                 # We don't want to keep the first character
1069                                 $link = substr( $m[1], 1 );
1070                         }
1071
1072                         $wasblank = ( '' == $text );
1073                         if( $wasblank ) $text = $link;
1074
1075                         $nt = Title::newFromText( $link );
1076                         if( !$nt ) {
1077                                 $s .= $prefix . '[[' . $line;
1078                                 continue;
1079                         }
1080
1081                         $ns = $nt->getNamespace();
1082                         $iw = $nt->getInterWiki();
1083
1084                         # Link not escaped by : , create the various objects
1085                         if( $noforce ) {
1086
1087                                 # Interwikis
1088                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1089                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1090                                         $tmp = $prefix . $trail ;
1091                                         $s .= (trim($tmp) == '')? '': $tmp;
1092                                         continue;
1093                                 }
1094
1095                                 if ( $ns == NS_IMAGE ) {
1096                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1097                                         $wgLinkCache->addImageLinkObj( $nt );
1098                                         continue;
1099                                 }
1100
1101                                 if ( $ns == NS_CATEGORY ) {
1102                                         $t = $nt->getText() ;
1103                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1104
1105                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1106                                         $pPLC=$sk->postParseLinkColour();
1107                                         $sk->postParseLinkColour( false );
1108                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1109                                         $sk->postParseLinkColour( $pPLC );
1110                                         $wgLinkCache->resume();
1111
1112                                         if ( $wasblank ) {
1113                                                 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1114                                                         $sortkey = $this->mTitle->getText();
1115                                                 } else {
1116                                                         $sortkey = $this->mTitle->getPrefixedText();
1117                                                 }
1118                                         } else {
1119                                                 $sortkey = $text;
1120                                         }
1121                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1122                                         $this->mOutput->mCategoryLinks[] = $t ;
1123                                         $s .= $prefix . $trail ;
1124                                         continue;
1125                                 }
1126                         }
1127
1128                         if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1129                             ( strpos( $link, '#' ) === FALSE ) ) {
1130                                 # Self-links are handled specially; generally de-link and change to bold.
1131                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1132                                 continue;
1133                         }
1134
1135                         if( $ns == NS_MEDIA ) {
1136                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1137                                 $wgLinkCache->addImageLinkObj( $nt );
1138                                 continue;
1139                         } elseif( $ns == NS_SPECIAL ) {
1140                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1141                                 continue;
1142                         }
1143                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1144                 }
1145                 wfProfileOut( $fname );
1146                 return $s;
1147         }
1148
1149         # Some functions here used by doBlockLevels()
1150         #
1151         /* private */ function closeParagraph() {
1152                 $result = '';
1153                 if ( '' != $this->mLastSection ) {
1154                         $result = '</' . $this->mLastSection  . ">\n";
1155                 }
1156                 $this->mInPre = false;
1157                 $this->mLastSection = '';
1158                 return $result;
1159         }
1160         # getCommon() returns the length of the longest common substring
1161         # of both arguments, starting at the beginning of both.
1162         #
1163         /* private */ function getCommon( $st1, $st2 ) {
1164                 $fl = strlen( $st1 );
1165                 $shorter = strlen( $st2 );
1166                 if ( $fl < $shorter ) { $shorter = $fl; }
1167
1168                 for ( $i = 0; $i < $shorter; ++$i ) {
1169                         if ( $st1{$i} != $st2{$i} ) { break; }
1170                 }
1171                 return $i;
1172         }
1173         # These next three functions open, continue, and close the list
1174         # element appropriate to the prefix character passed into them.
1175         #
1176         /* private */ function openList( $char ) {
1177                 $result = $this->closeParagraph();
1178
1179                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1180                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1181                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1182                 else if ( ';' == $char ) {
1183                         $result .= '<dl><dt>';
1184                         $this->mDTopen = true;
1185                 }
1186                 else { $result = '<!-- ERR 1 -->'; }
1187
1188                 return $result;
1189         }
1190
1191         /* private */ function nextItem( $char ) {
1192                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1193                 else if ( ':' == $char || ';' == $char ) {
1194                         $close = '</dd>';
1195                         if ( $this->mDTopen ) { $close = '</dt>'; }
1196                         if ( ';' == $char ) {
1197                                 $this->mDTopen = true;
1198                                 return $close . '<dt>';
1199                         } else {
1200                                 $this->mDTopen = false;
1201                                 return $close . '<dd>';
1202                         }
1203                 }
1204                 return '<!-- ERR 2 -->';
1205         }
1206
1207         /* private */ function closeList( $char ) {
1208                 if ( '*' == $char ) { $text = '</li></ul>'; }
1209                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1210                 else if ( ':' == $char ) {
1211                         if ( $this->mDTopen ) {
1212                                 $this->mDTopen = false;
1213                                 $text = '</dt></dl>';
1214                         } else {
1215                                 $text = '</dd></dl>';
1216                         }
1217                 }
1218                 else {  return '<!-- ERR 3 -->'; }
1219                 return $text."\n";
1220         }
1221
1222         /* private */ function doBlockLevels( $text, $linestart ) {
1223                 $fname = 'Parser::doBlockLevels';
1224                 wfProfileIn( $fname );
1225
1226                 # Parsing through the text line by line.  The main thing
1227                 # happening here is handling of block-level elements p, pre,
1228                 # and making lists from lines starting with * # : etc.
1229                 #
1230                 $textLines = explode( "\n", $text );
1231
1232                 $lastPrefix = $output = $lastLine = '';
1233                 $this->mDTopen = $inBlockElem = false;
1234                 $prefixLength = 0;
1235                 $paragraphStack = false;
1236
1237                 if ( !$linestart ) {
1238                         $output .= array_shift( $textLines );
1239                 }
1240                 foreach ( $textLines as $oLine ) {
1241                         $lastPrefixLength = strlen( $lastPrefix );
1242                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1243                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1244                         if ( !$this->mInPre ) {
1245                                 # Multiple prefixes may abut each other for nested lists.
1246                                 $prefixLength = strspn( $oLine, '*#:;' );
1247                                 $pref = substr( $oLine, 0, $prefixLength );
1248
1249                                 # eh?
1250                                 $pref2 = str_replace( ';', ':', $pref );
1251                                 $t = substr( $oLine, $prefixLength );
1252                                 $this->mInPre = !empty($preOpenMatch);
1253                         } else {
1254                                 # Don't interpret any other prefixes in preformatted text
1255                                 $prefixLength = 0;
1256                                 $pref = $pref2 = '';
1257                                 $t = $oLine;
1258                         }
1259
1260                         # List generation
1261                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1262                                 # Same as the last item, so no need to deal with nesting or opening stuff
1263                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1264                                 $paragraphStack = false;
1265
1266                                 if ( substr( $pref, -1 ) == ';') {
1267                                         # The one nasty exception: definition lists work like this:
1268                                         # ; title : definition text
1269                                         # So we check for : in the remainder text to split up the
1270                                         # title and definition, without b0rking links.
1271                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1272                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1273                                                 $term = $match[1];
1274                                                 $output .= $term . $this->nextItem( ':' );
1275                                                 $t = $match[2];
1276                                         }
1277                                 }
1278                         } elseif( $prefixLength || $lastPrefixLength ) {
1279                                 # Either open or close a level...
1280                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1281                                 $paragraphStack = false;
1282
1283                                 while( $commonPrefixLength < $lastPrefixLength ) {
1284                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1285                                         --$lastPrefixLength;
1286                                 }
1287                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1288                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1289                                 }
1290                                 while ( $prefixLength > $commonPrefixLength ) {
1291                                         $char = substr( $pref, $commonPrefixLength, 1 );
1292                                         $output .= $this->openList( $char );
1293
1294                                         if ( ';' == $char ) {
1295                                                 # FIXME: This is dupe of code above
1296                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1297                                                         $term = $match[1];
1298                                                         $output .= $term . $this->nextItem( ':' );
1299                                                         $t = $match[2];
1300                                                 }
1301                                         }
1302                                         ++$commonPrefixLength;
1303                                 }
1304                                 $lastPrefix = $pref2;
1305                         }
1306                         if( 0 == $prefixLength ) {
1307                                 # No prefix (not in list)--go to paragraph mode
1308                                 $uniq_prefix = UNIQ_PREFIX;
1309                                 // XXX: use a stack for nestable elements like span, table and div
1310                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1311                                 $closematch = preg_match(
1312                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1313                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1314                                 if ( $openmatch or $closematch ) {
1315                                         $paragraphStack = false;
1316                                         $output .= $this->closeParagraph();
1317                                         if($preOpenMatch and !$preCloseMatch) {
1318                                                 $this->mInPre = true;
1319                                         }
1320                                         if ( $closematch ) {
1321                                                 $inBlockElem = false;
1322                                         } else {
1323                                                 $inBlockElem = true;
1324                                         }
1325                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1326                                         if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1327                                                 // pre
1328                                                 if ($this->mLastSection != 'pre') {
1329                                                         $paragraphStack = false;
1330                                                         $output .= $this->closeParagraph().'<pre>';
1331                                                         $this->mLastSection = 'pre';
1332                                                 }
1333                                                 $t = substr( $t, 1 );
1334                                         } else {
1335                                                 // paragraph
1336                                                 if ( '' == trim($t) ) {
1337                                                         if ( $paragraphStack ) {
1338                                                                 $output .= $paragraphStack.'<br />';
1339                                                                 $paragraphStack = false;
1340                                                                 $this->mLastSection = 'p';
1341                                                         } else {
1342                                                                 if ($this->mLastSection != 'p' ) {
1343                                                                         $output .= $this->closeParagraph();
1344                                                                         $this->mLastSection = '';
1345                                                                         $paragraphStack = '<p>';
1346                                                                 } else {
1347                                                                         $paragraphStack = '</p><p>';
1348                                                                 }
1349                                                         }
1350                                                 } else {
1351                                                         if ( $paragraphStack ) {
1352                                                                 $output .= $paragraphStack;
1353                                                                 $paragraphStack = false;
1354                                                                 $this->mLastSection = 'p';
1355                                                         } else if ($this->mLastSection != 'p') {
1356                                                                 $output .= $this->closeParagraph().'<p>';
1357                                                                 $this->mLastSection = 'p';
1358                                                         }
1359                                                 }
1360                                         }
1361                                 }
1362                         }
1363                         if ($paragraphStack === false) {
1364                                 $output .= $t."\n";
1365                         }
1366                 }
1367                 while ( $prefixLength ) {
1368                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1369                         --$prefixLength;
1370                 }
1371                 if ( '' != $this->mLastSection ) {
1372                         $output .= '</' . $this->mLastSection . '>';
1373                         $this->mLastSection = '';
1374                 }
1375
1376                 wfProfileOut( $fname );
1377                 return $output;
1378         }
1379
1380         # Return value of a magic variable (like PAGENAME)
1381         function getVariableValue( $index ) {
1382                 global $wgLang, $wgSitename, $wgServer;
1383
1384                 switch ( $index ) {
1385                         case MAG_CURRENTMONTH:
1386                                 return $wgLang->formatNum( date( 'm' ) );
1387                         case MAG_CURRENTMONTHNAME:
1388                                 return $wgLang->getMonthName( date('n') );
1389                         case MAG_CURRENTMONTHNAMEGEN:
1390                                 return $wgLang->getMonthNameGen( date('n') );
1391                         case MAG_CURRENTDAY:
1392                                 return $wgLang->formatNum( date('j') );
1393                         case MAG_PAGENAME:
1394                                 return $this->mTitle->getText();
1395                         case MAG_PAGENAMEE:
1396                                 return $this->mTitle->getPartialURL();
1397                         case MAG_NAMESPACE:
1398                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1399                                 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1400                         case MAG_CURRENTDAYNAME:
1401                                 return $wgLang->getWeekdayName( date('w')+1 );
1402                         case MAG_CURRENTYEAR:
1403                                 return $wgLang->formatNum( date( 'Y' ) );
1404                         case MAG_CURRENTTIME:
1405                                 return $wgLang->time( wfTimestampNow(), false );
1406                         case MAG_NUMBEROFARTICLES:
1407                                 return $wgLang->formatNum( wfNumberOfArticles() );
1408                         case MAG_SITENAME:
1409                                 return $wgSitename;
1410                         case MAG_SERVER:
1411                                 return $wgServer;
1412                         default:
1413                                 return NULL;
1414                 }
1415         }
1416
1417         # initialise the magic variables (like CURRENTMONTHNAME)
1418         function initialiseVariables() {
1419                 global $wgVariableIDs;
1420                 $this->mVariables = array();
1421                 foreach ( $wgVariableIDs as $id ) {
1422                         $mw =& MagicWord::get( $id );
1423                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1424                 }
1425         }
1426
1427         /* private */ function replaceVariables( $text, $args = array() ) {
1428                 global $wgLang, $wgScript, $wgArticlePath;
1429
1430                 # Prevent too big inclusions
1431                 if(strlen($text)> MAX_INCLUDE_SIZE)
1432                 return $text;
1433
1434                 $fname = 'Parser::replaceVariables';
1435                 wfProfileIn( $fname );
1436
1437                 $bail = false;
1438                 $titleChars = Title::legalChars();
1439                 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1440
1441                 # This function is called recursively. To keep track of arguments we need a stack:
1442                 array_push( $this->mArgStack, $args );
1443
1444                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1445                 $GLOBALS['wgCurParser'] =& $this;
1446
1447                 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_MSG ) {
1448                         # Variable substitution
1449                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1450                 }
1451
1452                 if ( $this->mOutputType == OT_HTML ) {
1453                         # Argument substitution
1454                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1455                 }
1456                 # Template substitution
1457                 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1458                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1459
1460                 array_pop( $this->mArgStack );
1461
1462                 wfProfileOut( $fname );
1463                 return $text;
1464         }
1465
1466         function variableSubstitution( $matches ) {
1467                 if ( !$this->mVariables ) {
1468                         $this->initialiseVariables();
1469                 }
1470                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1471                         $text = $this->mVariables[$matches[1]];
1472                         $this->mOutput->mContainsOldMagic = true;
1473                 } else {
1474                         $text = $matches[0];
1475                 }
1476                 return $text;
1477         }
1478
1479         # Split template arguments
1480         function getTemplateArgs( $argsString ) {
1481                 if ( $argsString === '' ) {
1482                         return array();
1483                 }
1484
1485                 $args = explode( '|', substr( $argsString, 1 ) );
1486
1487                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1488                 # merged with the next arg because the '|' character between belongs
1489                 # to the link syntax and not the template parameter syntax.
1490                 $argc = count($args);
1491                 $i = 0;
1492                 for ( $i = 0; $i < $argc-1; $i++ ) {
1493                         if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1494                                 $args[$i] .= '|'.$args[$i+1];
1495                                 array_splice($args, $i+1, 1);
1496                                 $i--;
1497                                 $argc--;
1498                         }
1499                 }
1500
1501                 return $args;
1502         }
1503
1504         function braceSubstitution( $matches ) {
1505                 global $wgLinkCache, $wgLang;
1506                 $fname = 'Parser::braceSubstitution';
1507                 $found = false;
1508                 $nowiki = false;
1509                 $noparse = false;
1510
1511                 $title = NULL;
1512
1513                 # $newline is an optional newline character before the braces
1514                 # $part1 is the bit before the first |, and must contain only title characters
1515                 # $args is a list of arguments, starting from index 0, not including $part1
1516
1517                 $newline = $matches[1];
1518                 $part1 = $matches[2];
1519                 # If the third subpattern matched anything, it will start with |
1520
1521                 $args = $this->getTemplateArgs($matches[3]);
1522                 $argc = count( $args );
1523
1524                 # {{{}}}
1525                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1526                         $text = $matches[0];
1527                         $found = true;
1528                         $noparse = true;
1529                 }
1530
1531                 # SUBST
1532                 if ( !$found ) {
1533                         $mwSubst =& MagicWord::get( MAG_SUBST );
1534                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1535                                 if ( $this->mOutputType != OT_WIKI ) {
1536                                         # Invalid SUBST not replaced at PST time
1537                                         # Return without further processing
1538                                         $text = $matches[0];
1539                                         $found = true;
1540                                         $noparse= true;
1541                                 }
1542                         } elseif ( $this->mOutputType == OT_WIKI ) {
1543                                 # SUBST not found in PST pass, do nothing
1544                                 $text = $matches[0];
1545                                 $found = true;
1546                         }
1547                 }
1548
1549                 # MSG, MSGNW and INT
1550                 if ( !$found ) {
1551                         # Check for MSGNW:
1552                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1553                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1554                                 $nowiki = true;
1555                         } else {
1556                                 # Remove obsolete MSG:
1557                                 $mwMsg =& MagicWord::get( MAG_MSG );
1558                                 $mwMsg->matchStartAndRemove( $part1 );
1559                         }
1560
1561                         # Check if it is an internal message
1562                         $mwInt =& MagicWord::get( MAG_INT );
1563                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1564                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1565                                         $text = wfMsgReal( $part1, $args, true );
1566                                         $found = true;
1567                                 }
1568                         }
1569                 }
1570
1571                 # NS
1572                 if ( !$found ) {
1573                         # Check for NS: (namespace expansion)
1574                         $mwNs = MagicWord::get( MAG_NS );
1575                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1576                                 if ( intval( $part1 ) ) {
1577                                         $text = $wgLang->getNsText( intval( $part1 ) );
1578                                         $found = true;
1579                                 } else {
1580                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1581                                         if ( !is_null( $index ) ) {
1582                                                 $text = $wgLang->getNsText( $index );
1583                                                 $found = true;
1584                                         }
1585                                 }
1586                         }
1587                 }
1588
1589                 # LOCALURL and LOCALURLE
1590                 if ( !$found ) {
1591                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1592                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1593
1594                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1595                                 $func = 'getLocalURL';
1596                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1597                                 $func = 'escapeLocalURL';
1598                         } else {
1599                                 $func = '';
1600                         }
1601
1602                         if ( $func !== '' ) {
1603                                 $title = Title::newFromText( $part1 );
1604                                 if ( !is_null( $title ) ) {
1605                                         if ( $argc > 0 ) {
1606                                                 $text = $title->$func( $args[0] );
1607                                         } else {
1608                                                 $text = $title->$func();
1609                                         }
1610                                         $found = true;
1611                                 }
1612                         }
1613                 }
1614
1615                 # Internal variables
1616                 if ( !$this->mVariables ) {
1617                         $this->initialiseVariables();
1618                 }
1619                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1620                         $text = $this->mVariables[$part1];
1621                         $found = true;
1622                         $this->mOutput->mContainsOldMagic = true;
1623                 }
1624
1625                 # GRAMMAR
1626                 if ( !$found && $argc == 1 ) {
1627                         $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1628                         if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1629                                 $text = $wgLang->convertGrammar( $args[0], $part1 );
1630                                 $found = true;
1631                         }
1632                 }
1633
1634                 # Template table test
1635
1636                 # Did we encounter this template already? If yes, it is in the cache
1637                 # and we need to check for loops.
1638                 if ( isset( $this->mTemplates[$part1] ) ) {
1639                         # Infinite loop test
1640                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1641                                 $noparse = true;
1642                                 $found = true;
1643                         }
1644                         # set $text to cached message.
1645                         $text = $this->mTemplates[$part1];
1646                         $found = true;
1647                 }
1648
1649                 # Load from database
1650                 if ( !$found ) {
1651                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1652                         if ( !is_null( $title ) && !$title->isExternal() ) {
1653                                 # Check for excessive inclusion
1654                                 $dbk = $title->getPrefixedDBkey();
1655                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1656                                         # This should never be reached.
1657                                         $article = new Article( $title );
1658                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1659                                         if ( $articleContent !== false ) {
1660                                                 $found = true;
1661                                                 $text = $articleContent;
1662                                         }
1663                                 }
1664
1665                                 # If the title is valid but undisplayable, make a link to it
1666                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1667                                         $text = '[['.$title->getPrefixedText().']]';
1668                                         $found = true;
1669                                 }
1670
1671                                 # Template cache array insertion
1672                                 $this->mTemplates[$part1] = $text;
1673                         }
1674                 }
1675
1676                 # Recursive parsing, escaping and link table handling
1677                 # Only for HTML output
1678                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1679                         $text = wfEscapeWikiText( $text );
1680                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1681                         # Clean up argument array
1682                         $assocArgs = array();
1683                         $index = 1;
1684                         foreach( $args as $arg ) {
1685                                 $eqpos = strpos( $arg, '=' );
1686                                 if ( $eqpos === false ) {
1687                                         $assocArgs[$index++] = $arg;
1688                                 } else {
1689                                         $name = trim( substr( $arg, 0, $eqpos ) );
1690                                         $value = trim( substr( $arg, $eqpos+1 ) );
1691                                         if ( $value === false ) {
1692                                                 $value = '';
1693                                         }
1694                                         if ( $name !== false ) {
1695                                                 $assocArgs[$name] = $value;
1696                                         }
1697                                 }
1698                         }
1699
1700                         # Do not enter included links in link table
1701                         if ( !is_null( $title ) ) {
1702                                 $wgLinkCache->suspend();
1703                         }
1704
1705                         # Add a new element to the templace recursion path
1706                         $this->mTemplatePath[$part1] = 1;
1707
1708                         $text = $this->stripParse( $text, $newline, $assocArgs );
1709
1710                         # Resume the link cache and register the inclusion as a link
1711                         if ( !is_null( $title ) ) {
1712                                 $wgLinkCache->resume();
1713                                 $wgLinkCache->addLinkObj( $title );
1714                         }
1715                 }
1716
1717                 # Empties the template path
1718                 $this->mTemplatePath = array();
1719
1720                 if ( !$found ) {
1721                         return $matches[0];
1722                 } else {
1723                         return $text;
1724                 }
1725         }
1726
1727         # Triple brace replacement -- used for template arguments
1728         function argSubstitution( $matches ) {
1729                 $newline = $matches[1];
1730                 $arg = trim( $matches[2] );
1731                 $text = $matches[0];
1732                 $inputArgs = end( $this->mArgStack );
1733
1734                 if ( array_key_exists( $arg, $inputArgs ) ) {
1735                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1736                 }
1737
1738                 return $text;
1739         }
1740
1741         # Returns true if the function is allowed to include this entity
1742         function incrementIncludeCount( $dbk ) {
1743                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1744                         $this->mIncludeCount[$dbk] = 0;
1745                 }
1746                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1747                         return true;
1748                 } else {
1749                         return false;
1750                 }
1751         }
1752
1753
1754         # Cleans up HTML, removes dangerous tags and attributes
1755         /* private */ function removeHTMLtags( $text ) {
1756                 global $wgUseTidy, $wgUserHtml;
1757                 $fname = 'Parser::removeHTMLtags';
1758                 wfProfileIn( $fname );
1759
1760                 if( $wgUserHtml ) {
1761                         $htmlpairs = array( # Tags that must be closed
1762                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1763                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1764                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
1765                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1766                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
1767                         );
1768                         $htmlsingle = array(
1769                                 'br', 'hr', 'li', 'dt', 'dd'
1770                         );
1771                         $htmlnest = array( # Tags that can be nested--??
1772                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1773                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
1774                         );
1775                         $tabletags = array( # Can only appear inside table
1776                                 'td', 'th', 'tr'
1777                         );
1778                 } else {
1779                         $htmlpairs = array();
1780                         $htmlsingle = array();
1781                         $htmlnest = array();
1782                         $tabletags = array();
1783                 }
1784
1785                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1786                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1787
1788                 $htmlattrs = $this->getHTMLattrs () ;
1789
1790                 # Remove HTML comments
1791                 $text = preg_replace( '/(\\n *<!--.*--> *|<!--.*?-->)/sU', '', $text );
1792
1793                 $bits = explode( '<', $text );
1794                 $text = array_shift( $bits );
1795                 if(!$wgUseTidy) {
1796                         $tagstack = array(); $tablestack = array();
1797                         foreach ( $bits as $x ) {
1798                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1799                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1800                                 $x, $regs );
1801                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1802                                 error_reporting( $prev );
1803
1804                                 $badtag = 0 ;
1805                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1806                                         # Check our stack
1807                                         if ( $slash ) {
1808                                                 # Closing a tag...
1809                                                 if ( ! in_array( $t, $htmlsingle ) &&
1810                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1811                                                         @array_push( $tagstack, $ot );
1812                                                         $badtag = 1;
1813                                                 } else {
1814                                                         if ( $t == 'table' ) {
1815                                                                 $tagstack = array_pop( $tablestack );
1816                                                         }
1817                                                         $newparams = '';
1818                                                 }
1819                                         } else {
1820                                                 # Keep track for later
1821                                                 if ( in_array( $t, $tabletags ) &&
1822                                                 ! in_array( 'table', $tagstack ) ) {
1823                                                         $badtag = 1;
1824                                                 } else if ( in_array( $t, $tagstack ) &&
1825                                                 ! in_array ( $t , $htmlnest ) ) {
1826                                                         $badtag = 1 ;
1827                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1828                                                         if ( $t == 'table' ) {
1829                                                                 array_push( $tablestack, $tagstack );
1830                                                                 $tagstack = array();
1831                                                         }
1832                                                         array_push( $tagstack, $t );
1833                                                 }
1834                                                 # Strip non-approved attributes from the tag
1835                                                 $newparams = $this->fixTagAttributes($params);
1836
1837                                         }
1838                                         if ( ! $badtag ) {
1839                                                 $rest = str_replace( '>', '&gt;', $rest );
1840                                                 $text .= "<$slash$t $newparams$brace$rest";
1841                                                 continue;
1842                                         }
1843                                 }
1844                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1845                         }
1846                         # Close off any remaining tags
1847                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1848                                 $text .= "</$t>\n";
1849                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1850                         }
1851                 } else {
1852                         # this might be possible using tidy itself
1853                         foreach ( $bits as $x ) {
1854                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1855                                 $x, $regs );
1856                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1857                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1858                                         $newparams = $this->fixTagAttributes($params);
1859                                         $rest = str_replace( '>', '&gt;', $rest );
1860                                         $text .= "<$slash$t $newparams$brace$rest";
1861                                 } else {
1862                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1863                                 }
1864                         }
1865                 }
1866                 wfProfileOut( $fname );
1867                 return $text;
1868         }
1869
1870
1871         # This function accomplishes several tasks:
1872         # 1) Auto-number headings if that option is enabled
1873         # 2) Add an [edit] link to sections for logged in users who have enabled the option
1874         # 3) Add a Table of contents on the top for users who have enabled the option
1875         # 4) Auto-anchor headings
1876         #
1877         # It loops through all headlines, collects the necessary data, then splits up the
1878         # string and re-inserts the newly formatted headlines.
1879         /* private */ function formatHeadings( $text, $isMain=true ) {
1880                 global $wgInputEncoding, $wgMaxTocLevel, $wgLang;
1881
1882                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1883                 $doShowToc = $this->mOptions->getShowToc();
1884                 $forceTocHere = false;
1885                 if( !$this->mTitle->userCanEdit() ) {
1886                         $showEditLink = 0;
1887                         $rightClickHack = 0;
1888                 } else {
1889                         $showEditLink = $this->mOptions->getEditSection();
1890                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1891                 }
1892
1893                 # Inhibit editsection links if requested in the page
1894                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1895                 if( $esw->matchAndRemove( $text ) ) {
1896                         $showEditLink = 0;
1897                 }
1898                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1899                 # do not add TOC
1900                 $mw =& MagicWord::get( MAG_NOTOC );
1901                 if( $mw->matchAndRemove( $text ) ) {
1902                         $doShowToc = 0;
1903                 }
1904
1905                 # never add the TOC to the Main Page. This is an entry page that should not
1906                 # be more than 1-2 screens large anyway
1907                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1908                         $doShowToc = 0;
1909                 }
1910
1911                 # Get all headlines for numbering them and adding funky stuff like [edit]
1912                 # links - this is for later, but we need the number of headlines right now
1913                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1914
1915                 # if there are fewer than 4 headlines in the article, do not show TOC
1916                 if( $numMatches < 4 ) {
1917                         $doShowToc = 0;
1918                 }
1919
1920                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
1921                 # override above conditions and always show TOC at that place
1922                 $mw =& MagicWord::get( MAG_TOC );
1923                 if ($mw->match( $text ) ) {
1924                         $doShowToc = 1;
1925                         $forceTocHere = true;
1926                 } else {
1927                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1928                         # override above conditions and always show TOC above first header
1929                         $mw =& MagicWord::get( MAG_FORCETOC );
1930                         if ($mw->matchAndRemove( $text ) ) {
1931                                 $doShowToc = 1;
1932                         }
1933                 }
1934
1935
1936
1937                 # We need this to perform operations on the HTML
1938                 $sk =& $this->mOptions->getSkin();
1939
1940                 # headline counter
1941                 $headlineCount = 0;
1942
1943                 # Ugh .. the TOC should have neat indentation levels which can be
1944                 # passed to the skin functions. These are determined here
1945                 $toclevel = 0;
1946                 $toc = '';
1947                 $full = '';
1948                 $head = array();
1949                 $sublevelCount = array();
1950                 $level = 0;
1951                 $prevlevel = 0;
1952                 foreach( $matches[3] as $headline ) {
1953                         $numbering = '';
1954                         if( $level ) {
1955                                 $prevlevel = $level;
1956                         }
1957                         $level = $matches[1][$headlineCount];
1958                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1959                                 # reset when we enter a new level
1960                                 $sublevelCount[$level] = 0;
1961                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1962                                 $toclevel += $level - $prevlevel;
1963                         }
1964                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1965                                 # reset when we step back a level
1966                                 $sublevelCount[$level+1]=0;
1967                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1968                                 $toclevel -= $prevlevel - $level;
1969                         }
1970                         # count number of headlines for each level
1971                         @$sublevelCount[$level]++;
1972                         if( $doNumberHeadings || $doShowToc ) {
1973                                 $dot = 0;
1974                                 for( $i = 1; $i <= $level; $i++ ) {
1975                                         if( !empty( $sublevelCount[$i] ) ) {
1976                                                 if( $dot ) {
1977                                                         $numbering .= '.';
1978                                                 }
1979                                                 $numbering .= $wgLang->formatNum( $sublevelCount[$i] );
1980                                                 $dot = 1;
1981                                         }
1982                                 }
1983                         }
1984
1985                         # The canonized header is a version of the header text safe to use for links
1986                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1987                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1988                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1989
1990                         # Remove link placeholders by the link text.
1991                         #     <!--LINK namespace page_title link text with suffix-->
1992                         # turns into
1993                         #     link text with suffix
1994                         $canonized_headline = preg_replace( '/<!--LINK [0-9]* [^ ]* *(.*?)-->/','$1', $canonized_headline );
1995                         # strip out HTML
1996                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1997                         $tocline = trim( $canonized_headline );
1998                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1999                         $replacearray = array(
2000                                 '%3A' => ':',
2001                                 '%' => '.'
2002                         );
2003                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2004                         $refer[$headlineCount] = $canonized_headline;
2005
2006                         # count how many in assoc. array so we can track dupes in anchors
2007                         @$refers[$canonized_headline]++;
2008                         $refcount[$headlineCount]=$refers[$canonized_headline];
2009
2010                         # Prepend the number to the heading text
2011
2012                         if( $doNumberHeadings || $doShowToc ) {
2013                                 $tocline = $numbering . ' ' . $tocline;
2014
2015                                 # Don't number the heading if it is the only one (looks silly)
2016                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2017                                         # the two are different if the line contains a link
2018                                         $headline=$numbering . ' ' . $headline;
2019                                 }
2020                         }
2021
2022                         # Create the anchor for linking from the TOC to the section
2023                         $anchor = $canonized_headline;
2024                         if($refcount[$headlineCount] > 1 ) {
2025                                 $anchor .= '_' . $refcount[$headlineCount];
2026                         }
2027                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2028                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2029                         }
2030                         if( $showEditLink ) {
2031                                 if ( empty( $head[$headlineCount] ) ) {
2032                                         $head[$headlineCount] = '';
2033                                 }
2034                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2035                         }
2036
2037                         # Add the edit section span
2038                         if( $rightClickHack ) {
2039                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2040                         }
2041
2042                         # give headline the correct <h#> tag
2043                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2044
2045                         $headlineCount++;
2046                 }
2047
2048                 if( $doShowToc ) {
2049                         $toclines = $headlineCount;
2050                         $toc .= $sk->tocUnindent( $toclevel );
2051                         $toc = $sk->tocTable( $toc );
2052                 }
2053
2054                 # split up and insert constructed headlines
2055
2056                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2057                 $i = 0;
2058
2059                 foreach( $blocks as $block ) {
2060                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2061                                 # This is the [edit] link that appears for the top block of text when
2062                                 # section editing is enabled
2063
2064                                 # Disabled because it broke block formatting
2065                                 # For example, a bullet point in the top line
2066                                 # $full .= $sk->editSectionLink(0);
2067                         }
2068                         $full .= $block;
2069                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2070                         # Top anchor now in skin
2071                                 $full = $full.$toc;
2072                         }
2073
2074                         if( !empty( $head[$i] ) ) {
2075                                 $full .= $head[$i];
2076                         }
2077                         $i++;
2078                 }
2079                 if($forceTocHere) {
2080                         $mw =& MagicWord::get( MAG_TOC );
2081                         return $mw->replace( $toc, $full );
2082                 } else {
2083                         return $full;
2084                 }
2085         }
2086
2087         # Return an HTML link for the "ISBN 123456" text
2088         /* private */ function magicISBN( $text ) {
2089                 global $wgLang;
2090                 $fname = 'Parser::magicISBN';
2091                 wfProfileIn( $fname );
2092
2093                 $a = split( 'ISBN ', ' '.$text );
2094                 if ( count ( $a ) < 2 ) {
2095                         wfProfileOut( $fname );
2096                         return $text;
2097                 }
2098                 $text = substr( array_shift( $a ), 1);
2099                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2100
2101                 foreach ( $a as $x ) {
2102                         $isbn = $blank = '' ;
2103                         while ( ' ' == $x{0} ) {
2104                                 $blank .= ' ';
2105                                 $x = substr( $x, 1 );
2106                         }
2107                         if ( $x == '' ) { # blank isbn
2108                                 $text .= "ISBN $blank";
2109                                 continue;
2110                         }
2111                         while ( strstr( $valid, $x{0} ) != false ) {
2112                                 $isbn .= $x{0};
2113                                 $x = substr( $x, 1 );
2114                         }
2115                         $num = str_replace( '-', '', $isbn );
2116                         $num = str_replace( ' ', '', $num );
2117
2118                         if ( '' == $num ) {
2119                                 $text .= "ISBN $blank$x";
2120                         } else {
2121                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2122                                 $text .= '<a href="' .
2123                                 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2124                                         "\" class=\"internal\">ISBN $isbn</a>";
2125                                 $text .= $x;
2126                         }
2127                 }
2128                 wfProfileOut( $fname );
2129                 return $text;
2130         }
2131
2132         # Return an HTML link for the "GEO ..." text
2133         /* private */ function magicGEO( $text ) {
2134                 global $wgLang, $wgUseGeoMode;
2135                 $fname = 'Parser::magicGEO';
2136                 wfProfileIn( $fname );
2137
2138                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2139                 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2140                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2141                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2142                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2143                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2144
2145                 $a = split( 'GEO ', ' '.$text );
2146                 if ( count ( $a ) < 2 ) {
2147                         wfProfileOut( $fname );
2148                         return $text;
2149                 }
2150                 $text = substr( array_shift( $a ), 1);
2151                 $valid = '0123456789.+-:';
2152
2153                 foreach ( $a as $x ) {
2154                         $geo = $blank = '' ;
2155                         while ( ' ' == $x{0} ) {
2156                                 $blank .= ' ';
2157                                 $x = substr( $x, 1 );
2158                         }
2159                         while ( strstr( $valid, $x{0} ) != false ) {
2160                                 $geo .= $x{0};
2161                                 $x = substr( $x, 1 );
2162                         }
2163                         $num = str_replace( '+', '', $geo );
2164                         $num = str_replace( ' ', '', $num );
2165
2166                         if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2167                                 $text .= "GEO $blank$x";
2168                         } else {
2169                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2170                                 $text .= '<a href="' .
2171                                 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2172                                         "\" class=\"internal\">GEO $geo</a>";
2173                                 $text .= $x;
2174                         }
2175                 }
2176                 wfProfileOut( $fname );
2177                 return $text;
2178         }
2179
2180         # Return an HTML link for the "RFC 1234" text
2181         /* private */ function magicRFC( $text ) {
2182                 global $wgLang;
2183
2184                 $a = split( 'RFC ', ' '.$text );
2185                 if ( count ( $a ) < 2 ) return $text;
2186                 $text = substr( array_shift( $a ), 1);
2187                 $valid = '0123456789';
2188
2189                 foreach ( $a as $x ) {
2190                         $rfc = $blank = '' ;
2191                         while ( ' ' == $x{0} ) {
2192                                 $blank .= ' ';
2193                                 $x = substr( $x, 1 );
2194                         }
2195                         while ( strstr( $valid, $x{0} ) != false ) {
2196                                 $rfc .= $x{0};
2197                                 $x = substr( $x, 1 );
2198                         }
2199
2200                         if ( '' == $rfc ) {
2201                                 $text .= "RFC $blank$x";
2202                         } else {
2203                                 $url = wfmsg( 'rfcurl' );
2204                                 $url = str_replace( '$1', $rfc, $url);
2205                                 $sk =& $this->mOptions->getSkin();
2206                                 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2207                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2208                         }
2209                 }
2210                 return $text;
2211         }
2212
2213         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2214                 $this->mOptions = $options;
2215                 $this->mTitle =& $title;
2216                 $this->mOutputType = OT_WIKI;
2217
2218                 if ( $clearState ) {
2219                         $this->clearState();
2220                 }
2221
2222                 $stripState = false;
2223                 $pairs = array(
2224                         "\r\n" => "\n",
2225                         );
2226                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2227                 // now with regexes
2228                 /*
2229                 $pairs = array(
2230                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2231                         "/<br *?>/i" => "<br />",
2232                 );
2233                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2234                 */
2235                 $text = $this->strip( $text, $stripState, false );
2236                 $text = $this->pstPass2( $text, $user );
2237                 $text = $this->unstrip( $text, $stripState );
2238                 $text = $this->unstripNoWiki( $text, $stripState );
2239                 return $text;
2240         }
2241
2242         /* private */ function pstPass2( $text, &$user ) {
2243                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2244
2245                 # Variable replacement
2246                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2247                 $text = $this->replaceVariables( $text );
2248
2249                 # Signatures
2250                 #
2251                 $n = $user->getName();
2252                 $k = $user->getOption( 'nickname' );
2253                 if ( '' == $k ) { $k = $n; }
2254                 if(isset($wgLocaltimezone)) {
2255                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2256                 }
2257                 /* Note: this is an ugly timezone hack for the European wikis */
2258                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2259                   ' (' . date( 'T' ) . ')';
2260                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2261
2262                 $text = preg_replace( '/~~~~~/', $d, $text );
2263                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2264                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2265
2266                 # Context links: [[|name]] and [[name (context)|]]
2267                 #
2268                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2269                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2270                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2271                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2272
2273                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2274                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2275                 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/";                # [[namespace:page|]] and [[:namespace:page|]]
2276                 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2277                 $context = '';
2278                 $t = $this->mTitle->getText();
2279                 if ( preg_match( $conpat, $t, $m ) ) {
2280                         $context = $m[2];
2281                 }
2282                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2283                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2284                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2285
2286                 if ( '' == $context ) {
2287                         $text = preg_replace( $p2, '[[\\1]]', $text );
2288                 } else {
2289                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2290                 }
2291
2292                 /*
2293                 $mw =& MagicWord::get( MAG_SUBST );
2294                 $wgCurParser = $this->fork();
2295                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2296                 $this->merge( $wgCurParser );
2297                 */
2298
2299                 # Trim trailing whitespace
2300                 # MAG_END (__END__) tag allows for trailing
2301                 # whitespace to be deliberately included
2302                 $text = rtrim( $text );
2303                 $mw =& MagicWord::get( MAG_END );
2304                 $mw->matchAndRemove( $text );
2305
2306                 return $text;
2307         }
2308
2309         # Set up some variables which are usually set up in parse()
2310         # so that an external function can call some class members with confidence
2311         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2312                 $this->mTitle =& $title;
2313                 $this->mOptions = $options;
2314                 $this->mOutputType = $outputType;
2315                 if ( $clearState ) {
2316                         $this->clearState();
2317                 }
2318         }
2319
2320         function transformMsg( $text, $options ) {
2321                 global $wgTitle;
2322                 static $executing = false;
2323
2324                 # Guard against infinite recursion
2325                 if ( $executing ) {
2326                         return $text;
2327                 }
2328                 $executing = true;
2329
2330                 $this->mTitle = $wgTitle;
2331                 $this->mOptions = $options;
2332                 $this->mOutputType = OT_MSG;
2333                 $this->clearState();
2334                 $text = $this->replaceVariables( $text );
2335
2336                 $executing = false;
2337                 return $text;
2338         }
2339
2340         # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2341         # Callback will be called with the text within
2342         # Transform and return the text within
2343         function setHook( $tag, $callback ) {
2344                 $oldVal = @$this->mTagHooks[$tag];
2345                 $this->mTagHooks[$tag] = $callback;
2346                 return $oldVal;
2347         }
2348 }
2349
2350 /**
2351  * @todo document
2352  * @package MediaWiki
2353  */
2354 class ParserOutput
2355 {
2356         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2357         var $mCacheTime; # Used in ParserCache
2358
2359         function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2360                 $containsOldMagic = false )
2361         {
2362                 $this->mText = $text;
2363                 $this->mLanguageLinks = $languageLinks;
2364                 $this->mCategoryLinks = $categoryLinks;
2365                 $this->mContainsOldMagic = $containsOldMagic;
2366                 $this->mCacheTime = '';
2367         }
2368
2369         function getText() { return $this->mText; }
2370         function getLanguageLinks() { return $this->mLanguageLinks; }
2371         function getCategoryLinks() { return $this->mCategoryLinks; }
2372         function getCacheTime() { return $this->mCacheTime; }
2373         function containsOldMagic() { return $this->mContainsOldMagic; }
2374         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2375         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2376         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2377         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2378         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2379
2380         function merge( $other ) {
2381                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2382                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2383                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2384         }
2385
2386 }
2387
2388 /**
2389  * Set options of the Parser
2390  * @todo document
2391  * @package MediaWiki
2392  */
2393 class ParserOptions
2394 {
2395         # All variables are private
2396         var $mUseTeX;                    # Use texvc to expand <math> tags
2397         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2398         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2399         var $mAllowExternalImages;       # Allow external images inline
2400         var $mSkin;                      # Reference to the preferred skin
2401         var $mDateFormat;                # Date format index
2402         var $mEditSection;               # Create "edit section" links
2403         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2404         var $mNumberHeadings;            # Automatically number headings
2405         var $mShowToc;                   # Show table of contents
2406
2407         function getUseTeX()                        { return $this->mUseTeX; }
2408         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2409         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2410         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2411         function getSkin()                          { return $this->mSkin; }
2412         function getDateFormat()                    { return $this->mDateFormat; }
2413         function getEditSection()                   { return $this->mEditSection; }
2414         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2415         function getNumberHeadings()                { return $this->mNumberHeadings; }
2416         function getShowToc()                       { return $this->mShowToc; }
2417
2418         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2419         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2420         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2421         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2422         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2423         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2424         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2425         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2426         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2427
2428         function setSkin( &$x ) { $this->mSkin =& $x; }
2429
2430         # Get parser options
2431         /* static */ function newFromUser( &$user ) {
2432                 $popts = new ParserOptions;
2433                 $popts->initialiseFromUser( $user );
2434                 return $popts;
2435         }
2436
2437         # Get user options
2438         function initialiseFromUser( &$userInput ) {
2439                 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2440
2441                 $fname = 'ParserOptions::initialiseFromUser';
2442                 wfProfileIn( $fname );
2443                 if ( !$userInput ) {
2444                         $user = new User;
2445                         $user->setLoaded( true );
2446                 } else {
2447                         $user =& $userInput;
2448                 }
2449
2450                 $this->mUseTeX = $wgUseTeX;
2451                 $this->mUseDynamicDates = $wgUseDynamicDates;
2452                 $this->mInterwikiMagic = $wgInterwikiMagic;
2453                 $this->mAllowExternalImages = $wgAllowExternalImages;
2454                 wfProfileIn( $fname.'-skin' );
2455                 $this->mSkin =& $user->getSkin();
2456                 wfProfileOut( $fname.'-skin' );
2457                 $this->mDateFormat = $user->getOption( 'date' );
2458                 $this->mEditSection = $user->getOption( 'editsection' );
2459                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2460                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2461                 $this->mShowToc = $user->getOption( 'showtoc' );
2462                 wfProfileOut( $fname );
2463         }
2464
2465
2466 }
2467
2468 # Regex callbacks, used in Parser::replaceVariables
2469 function wfBraceSubstitution( $matches ) {
2470         global $wgCurParser;
2471         return $wgCurParser->braceSubstitution( $matches );
2472 }
2473
2474 function wfArgSubstitution( $matches ) {
2475         global $wgCurParser;
2476         return $wgCurParser->argSubstitution( $matches );
2477 }
2478
2479 function wfVariableSubstitution( $matches ) {
2480         global $wgCurParser;
2481         return $wgCurParser->variableSubstitution( $matches );
2482 }
2483
2484 /**
2485  * Return the total number of articles
2486  */
2487 function wfNumberOfArticles() {
2488         global $wgNumberOfArticles;
2489
2490         wfLoadSiteStats();
2491         return $wgNumberOfArticles;
2492 }
2493
2494 /**
2495  * Get various statistics from the database
2496  * @private
2497  */
2498 function wfLoadSiteStats() {
2499         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2500         $fname = 'wfLoadSiteStats';
2501
2502         if ( -1 != $wgNumberOfArticles ) return;
2503         $dbr =& wfGetDB( DB_SLAVE );
2504         $s = $dbr->getArray( 'site_stats',
2505                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2506                 array( 'ss_row_id' => 1 ), $fname
2507         );
2508
2509         if ( $s === false ) {
2510                 return;
2511         } else {
2512                 $wgTotalViews = $s->ss_total_views;
2513                 $wgTotalEdits = $s->ss_total_edits;
2514                 $wgNumberOfArticles = $s->ss_good_articles;
2515         }
2516 }
2517
2518 function wfEscapeHTMLTagsOnly( $in ) {
2519         return str_replace(
2520                 array( '"', '>', '<' ),
2521                 array( '&quot;', '&gt;', '&lt;' ),
2522                 $in );
2523 }
2524
2525
2526 ?>