includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Processes wiki markup
   8 #
   9 # There are two main entry points into the Parser class:
  10 # parse()
  11 #   produces HTML output
  12 # preSaveTransform().
  13 #   produces altered wiki markup.
  14 #
  15 # Globals used:
  16 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  17 #
  18 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  19 #
  20 # settings:
  21 #   $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #   $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #   $wgLocaltimezone
  24 #
  25 #   * only within ParserOptions
  26 #
  27 #----------------------------------------
  28 #    Variable substitution O(N^2) attack
  29 #-----------------------------------------
  30 # Without countermeasures, it would be possible to attack the parser by saving
  31 # a page filled with a large number of inclusions of large pages. The size of
  32 # the generated page would be proportional to the square of the input size.
  33 # Hence, we limit the number of inclusions of any given page, thus bringing any
  34 # attack back to O(N).
  35 define( "MAX_INCLUDE_REPEAT", 100 );
  36 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
  37
  38 # Allowed values for $mOutputType
  39 define( "OT_HTML", 1 );
  40 define( "OT_WIKI", 2 );
  41 define( "OT_MSG" , 3 );
  42
  43 # string parameter for extractTags which will cause it
  44 # to strip HTML comments in addition to regular
  45 # <XML>-style tags. This should not be anything we
  46 # may want to use in wikisyntax
  47 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  48
  49 # prefix for escaping, used in two functions at least
  50 define( 'UNIQ_PREFIX', 'NaodW29');
  51
  52 # Constants needed for external link processing
  53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  54 define( 'HTTP_PROTOCOLS', 'http|https' );
  55 # Everything except bracket, space, or control characters
  56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  58 # Including space
  59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  62 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  63 define( 'EXT_IMAGE_REGEX',
  64         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  65         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  66         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  67 );
  68
  69 class Parser
  70 {
  71         # Persistent:
  72         var $mTagHooks;
  73
  74         # Cleared with clearState():
  75         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  76         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  77
  78         # Temporary:
  79         var $mOptions, $mTitle, $mOutputType,
  80             $mTemplates,        // cache of already loaded templates, avoids
  81                                 // multiple SQL queries for the same string
  82             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  83                                 // in this path. Used for loop detection.
  84
  85         function Parser() {
  86                 $this->mTemplates = array();
  87                 $this->mTemplatePath = array();
  88                 $this->mTagHooks = array();
  89                 $this->clearState();
  90         }
  91
  92         function clearState() {
  93                 $this->mOutput = new ParserOutput;
  94                 $this->mAutonumber = 0;
  95                 $this->mLastSection = "";
  96                 $this->mDTopen = false;
  97                 $this->mVariables = false;
  98                 $this->mIncludeCount = array();
  99                 $this->mStripState = array();
 100                 $this->mArgStack = array();
 101                 $this->mInPre = false;
 102         }
 103
 104         # First pass--just handle <nowiki> sections, pass the rest off
 105         # to internalParse() which does all the real work.
 106         #
 107         # Returns a ParserOutput
 108         #
 109         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 110                 global $wgUseTidy;
 111                 $fname = "Parser::parse";
 112                 wfProfileIn( $fname );
 113
 114                 if ( $clearState ) {
 115                         $this->clearState();
 116                 }
 117
 118                 $this->mOptions = $options;
 119                 $this->mTitle =& $title;
 120                 $this->mOutputType = OT_HTML;
 121
 122                 $stripState = NULL;
 123                 $text = $this->strip( $text, $this->mStripState );
 124                 $text = $this->internalParse( $text, $linestart );
 125                 $text = $this->unstrip( $text, $this->mStripState );
 126                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 127                 if(!$wgUseTidy) {
 128                         $fixtags = array(
 129                                 # french spaces, last one Guillemet-left
 130                                 # only if there is something before the space
 131                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 132                                 # french spaces, Guillemet-right
 133                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 134                                 '/<hr *>/i' => '<hr />',
 135                                 '/<br *>/i' => '<br />',
 136                                 '/<center *>/i' => '<div class="center">',
 137                                 '/<\\/center *>/i' => '</div>',
 138                                 # Clean up spare ampersands; note that we probably ought to be
 139                                 # more careful about named entities.
 140                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 141                         );
 142                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 143                 } else {
 144                         $fixtags = array(
 145                                 # french spaces, last one Guillemet-left
 146                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 147                                 # french spaces, Guillemet-right
 148                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 149                                 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
 150                                 '/<center *>/i' => '<div class="center">',
 151                                 '/<\\/center *>/i' => '</div>'
 152                         );
 153                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 154                 }
 155                 # only once and last
 156                 $text = $this->doBlockLevels( $text, $linestart );
 157                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 158                 if($wgUseTidy) {
 159                         $text = $this->tidy($text);
 160                 }
 161                 $this->mOutput->setText( $text );
 162                 wfProfileOut( $fname );
 163                 return $this->mOutput;
 164         }
 165
 166         /* static */ function getRandomString() {
 167                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 168         }
 169
 170         # Replaces all occurrences of <$tag>content</$tag> in the text
 171         # with a random marker and returns the new text. the output parameter
 172         # $content will be an associative array filled with data on the form
 173         # $unique_marker => content.
 174
 175         # If $content is already set, the additional entries will be appended
 176
 177         # If $tag is set to STRIP_COMMENTS, the function will extract
 178         # <!-- HTML comments -->
 179
 180         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 181                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 182                 if ( !$content ) {
 183                         $content = array( );
 184                 }
 185                 $n = 1;
 186                 $stripped = '';
 187
 188                 while ( '' != $text ) {
 189                         if($tag==STRIP_COMMENTS) {
 190                                 $p = preg_split( '/<!--/i', $text, 2 );
 191                         } else {
 192                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 193                         }
 194                         $stripped .= $p[0];
 195                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 196                                 $text = '';
 197                         } else {
 198                                 if($tag==STRIP_COMMENTS) {
 199                                         $q = preg_split( '/-->/i', $p[1], 2 );
 200                                 } else {
 201                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 202                                 }
 203                                 $marker = $rnd . sprintf('%08X', $n++);
 204                                 $content[$marker] = $q[0];
 205                                 $stripped .= $marker;
 206                                 $text = $q[1];
 207                         }
 208                 }
 209                 return $stripped;
 210         }
 211
 212         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 213         # If $render is set, performs necessary rendering operations on plugins
 214         # Returns the text, and fills an array with data needed in unstrip()
 215         # If the $state is already a valid strip state, it adds to the state
 216
 217         # When $stripcomments is set, HTML comments <!-- like this -->
 218         # will be stripped in addition to other tags. This is important
 219         # for section editing, where these comments cause confusion when
 220         # counting the sections in the wikisource
 221         function strip( $text, &$state, $stripcomments = false ) {
 222                 $render = ($this->mOutputType == OT_HTML);
 223                 $html_content = array();
 224                 $nowiki_content = array();
 225                 $math_content = array();
 226                 $pre_content = array();
 227                 $comment_content = array();
 228                 $ext_content = array();
 229
 230                 # Replace any instances of the placeholders
 231                 $uniq_prefix = UNIQ_PREFIX;
 232                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 233
 234                 # html
 235                 global $wgRawHtml;
 236                 if( $wgRawHtml ) {
 237                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 238                         foreach( $html_content as $marker => $content ) {
 239                                 if ($render ) {
 240                                         # Raw and unchecked for validity.
 241                                         $html_content[$marker] = $content;
 242                                 } else {
 243                                         $html_content[$marker] = "<html>$content</html>";
 244                                 }
 245                         }
 246                 }
 247
 248                 # nowiki
 249                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 250                 foreach( $nowiki_content as $marker => $content ) {
 251                         if( $render ){
 252                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 253                         } else {
 254                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 255                         }
 256                 }
 257
 258                 # math
 259                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 260                 foreach( $math_content as $marker => $content ){
 261                         if( $render ) {
 262                                 if( $this->mOptions->getUseTeX() ) {
 263                                         $math_content[$marker] = renderMath( $content );
 264                                 } else {
 265                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 266                                 }
 267                         } else {
 268                                 $math_content[$marker] = "<math>$content</math>";
 269                         }
 270                 }
 271
 272                 # pre
 273                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 274                 foreach( $pre_content as $marker => $content ){
 275                         if( $render ){
 276                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 277                         } else {
 278                                 $pre_content[$marker] = "<pre>$content</pre>";
 279                         }
 280                 }
 281
 282                 # Comments
 283                 if($stripcomments) {
 284                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 285                         foreach( $comment_content as $marker => $content ){
 286                                 $comment_content[$marker] = "<!--$content-->";
 287                         }
 288                 }
 289
 290                 # Extensions
 291                 foreach ( $this->mTagHooks as $tag => $callback ) {
 292                         $ext_contents[$tag] = array();
 293                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 294                         foreach( $ext_content[$tag] as $marker => $content ) {
 295                                 if ( $render ) {
 296                                         $ext_content[$tag][$marker] = $callback( $content );
 297                                 } else {
 298                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 299                                 }
 300                         }
 301                 }
 302
 303                 # Merge state with the pre-existing state, if there is one
 304                 if ( $state ) {
 305                         $state['html'] = $state['html'] + $html_content;
 306                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 307                         $state['math'] = $state['math'] + $math_content;
 308                         $state['pre'] = $state['pre'] + $pre_content;
 309                         $state['comment'] = $state['comment'] + $comment_content;
 310
 311                         foreach( $ext_content as $tag => $array ) {
 312                                 if ( array_key_exists( $tag, $state ) ) {
 313                                         $state[$tag] = $state[$tag] + $array;
 314                                 }
 315                         }
 316                 } else {
 317                         $state = array(
 318                           'html' => $html_content,
 319                           'nowiki' => $nowiki_content,
 320                           'math' => $math_content,
 321                           'pre' => $pre_content,
 322                           'comment' => $comment_content,
 323                         ) + $ext_content;
 324                 }
 325                 return $text;
 326         }
 327
 328         # always call unstripNoWiki() after this one
 329         function unstrip( $text, &$state ) {
 330                 # Must expand in reverse order, otherwise nested tags will be corrupted
 331                 $contentDict = end( $state );
 332                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 333                         if( key($state) != 'nowiki' && key($state) != 'html') {
 334                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 335                                         $text = str_replace( key( $contentDict ), $content, $text );
 336                                 }
 337                         }
 338                 }
 339
 340                 return $text;
 341         }
 342         # always call this after unstrip() to preserve the order
 343         function unstripNoWiki( $text, &$state ) {
 344                 # Must expand in reverse order, otherwise nested tags will be corrupted
 345                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 346                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 347                 }
 348
 349                 global $wgRawHtml;
 350                 if ($wgRawHtml) {
 351                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 352                                 $text = str_replace( key( $state['html'] ), $content, $text );
 353                         }
 354                 }
 355
 356                 return $text;
 357         }
 358
 359         # Add an item to the strip state
 360         # Returns the unique tag which must be inserted into the stripped text
 361         # The tag will be replaced with the original text in unstrip()
 362         function insertStripItem( $text, &$state ) {
 363                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 364                 if ( !$state ) {
 365                         $state = array(
 366                           'html' => array(),
 367                           'nowiki' => array(),
 368                           'math' => array(),
 369                           'pre' => array()
 370                         );
 371                 }
 372                 $state['item'][$rnd] = $text;
 373                 return $rnd;
 374         }
 375
 376         # Return allowed HTML attributes
 377         function getHTMLattrs () {
 378                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 379                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 380                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 381                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 382                                 /* FONT */ 'type', 'start', 'value', 'compact',
 383                                 /* For various lists, mostly deprecated but safe */
 384                                 'summary', 'width', 'border', 'frame', 'rules',
 385                                 'cellspacing', 'cellpadding', 'valign', 'char',
 386                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 387                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 388                                 'id', 'class', 'name', 'style' /* For CSS */
 389                                 );
 390                 return $htmlattrs ;
 391         }
 392
 393         # Remove non approved attributes and javascript in css
 394         function fixTagAttributes ( $t ) {
 395                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 396                 $htmlattrs = $this->getHTMLattrs() ;
 397
 398                 # Strip non-approved attributes from the tag
 399                 $t = preg_replace(
 400                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 401                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 402                         $t);
 403
 404                 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
 405
 406                 # Strip javascript "expression" from stylesheets. Brute force approach:
 407                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 408
 409                 if( preg_match(
 410                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 411                         wfMungeToUtf8( $t ) ) )
 412                 {
 413                         $t='';
 414                 }
 415
 416                 return trim ( $t ) ;
 417         }
 418
 419         # interface with html tidy, used if $wgUseTidy = true
 420         function tidy ( $text ) {
 421                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 422                 global $wgInputEncoding, $wgOutputEncoding;
 423                 $fname = 'Parser::tidy';
 424                 wfProfileIn( $fname );
 425
 426                 $cleansource = '';
 427                 switch(strtoupper($wgOutputEncoding)) {
 428                         case 'ISO-8859-1':
 429                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 430                                 break;
 431                         case 'UTF-8':
 432                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 433                                 break;
 434                         default:
 435                                 $wgTidyOpts .= ' -raw';
 436                         }
 437
 438                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 439 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 440 '<head><title>test</title></head><body>'.$text.'</body></html>';
 441                 $descriptorspec = array(
 442                         0 => array('pipe', 'r'),
 443                         1 => array('pipe', 'w'),
 444                         2 => array('file', '/dev/null', 'a')
 445                 );
 446                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 447                 if (is_resource($process)) {
 448                         fwrite($pipes[0], $wrappedtext);
 449                         fclose($pipes[0]);
 450                         while (!feof($pipes[1])) {
 451                                 $cleansource .= fgets($pipes[1], 1024);
 452                         }
 453                         fclose($pipes[1]);
 454                         $return_value = proc_close($process);
 455                 }
 456
 457                 wfProfileOut( $fname );
 458
 459                 if( $cleansource == '' && $text != '') {
 460                         wfDebug( "Tidy error detected!\n" );
 461                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 462                 } else {
 463                         return $cleansource;
 464                 }
 465         }
 466
 467         # parse the wiki syntax used to render tables
 468         function doTableStuff ( $t ) {
 469                 $fname = 'Parser::doTableStuff';
 470                 wfProfileIn( $fname );
 471
 472                 $t = explode ( "\n" , $t ) ;
 473                 $td = array () ; # Is currently a td tag open?
 474                 $ltd = array () ; # Was it TD or TH?
 475                 $tr = array () ; # Is currently a tr tag open?
 476                 $ltr = array () ; # tr attributes
 477                 $indent_level = 0; # indent level of the table
 478                 foreach ( $t AS $k => $x )
 479                 {
 480                         $x = trim ( $x ) ;
 481                         $fc = substr ( $x , 0 , 1 ) ;
 482                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 483                                 $indent_level = strlen( $matches[1] );
 484                                 $t[$k] = "\n" .
 485                                         str_repeat( "<dl><dd>", $indent_level ) .
 486                                         "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 487                                 array_push ( $td , false ) ;
 488                                 array_push ( $ltd , '' ) ;
 489                                 array_push ( $tr , false ) ;
 490                                 array_push ( $ltr , '' ) ;
 491                         }
 492                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 493                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 494                                 $z = "</table>\n" ;
 495                                 $l = array_pop ( $ltd ) ;
 496                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 497                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 498                                 array_pop ( $ltr ) ;
 499                                 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
 500                         }
 501                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 502                                 $x = substr ( $x , 1 ) ;
 503                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 504                                 $z = '' ;
 505                                 $l = array_pop ( $ltd ) ;
 506                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 507                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 508                                 array_pop ( $ltr ) ;
 509                                 $t[$k] = $z ;
 510                                 array_push ( $tr , false ) ;
 511                                 array_push ( $td , false ) ;
 512                                 array_push ( $ltd , '' ) ;
 513                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 514                         }
 515                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 516                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 517                                         $fc = '+' ;
 518                                         $x = substr ( $x , 1 ) ;
 519                                 }
 520                                 $after = substr ( $x , 1 ) ;
 521                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 522                                 $after = explode ( '||' , $after ) ;
 523                                 $t[$k] = '' ;
 524                                 foreach ( $after AS $theline )
 525                                 {
 526                                         $z = '' ;
 527                                         if ( $fc != '+' )
 528                                         {
 529                                                 $tra = array_pop ( $ltr ) ;
 530                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 531                                                 array_push ( $tr , true ) ;
 532                                                 array_push ( $ltr , '' ) ;
 533                                         }
 534
 535                                         $l = array_pop ( $ltd ) ;
 536                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 537                                         if ( $fc == '|' ) $l = 'td' ;
 538                                         else if ( $fc == '!' ) $l = 'th' ;
 539                                         else if ( $fc == '+' ) $l = 'caption' ;
 540                                         else $l = '' ;
 541                                         array_push ( $ltd , $l ) ;
 542                                         $y = explode ( '|' , $theline , 2 ) ;
 543                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 544                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 545                                         $t[$k] .= $y ;
 546                                         array_push ( $td , true ) ;
 547                                 }
 548                         }
 549                 }
 550
 551                 # Closing open td, tr && table
 552                 while ( count ( $td ) > 0 )
 553                 {
 554                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 555                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 556                         $t[] = '</table>' ;
 557                 }
 558
 559                 $t = implode ( "\n" , $t ) ;
 560                 #               $t = $this->removeHTMLtags( $t );
 561                 wfProfileOut( $fname );
 562                 return $t ;
 563         }
 564
 565         # Parses the text and adds the result to the strip state
 566         # Returns the strip tag
 567         function stripParse( $text, $newline, $args ) {
 568                 $text = $this->strip( $text, $this->mStripState );
 569                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 570                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 571         }
 572
 573         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 574                 $fname = 'Parser::internalParse';
 575                 wfProfileIn( $fname );
 576
 577                 $text = $this->removeHTMLtags( $text );
 578                 $text = $this->replaceVariables( $text, $args );
 579
 580                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 581
 582                 $text = $this->doHeadings( $text );
 583                 if($this->mOptions->getUseDynamicDates()) {
 584                         global $wgDateFormatter;
 585                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 586                 }
 587                 $text = $this->doAllQuotes( $text );
 588                 $text = $this->replaceExternalLinks( $text );
 589                 $text = $this->doMagicLinks( $text );
 590                 $text = $this->replaceInternalLinks ( $text );
 591                 $text = $this->replaceInternalLinks ( $text );
 592
 593                 $text = $this->unstrip( $text, $this->mStripState );
 594                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 595
 596                 $text = $this->doTableStuff( $text );
 597                 $text = $this->formatHeadings( $text, $isMain );
 598                 $sk =& $this->mOptions->getSkin();
 599                 $text = $sk->transformContent( $text );
 600
 601                 // if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
 602                         // $text .= $this->categoryMagic () ;
 603                         // $this->categoryMagicDone = true ;
 604                 // }
 605
 606                 wfProfileOut( $fname );
 607                 return $text;
 608         }
 609
 610         /* private */ function &doMagicLinks( &$text ) {
 611                 global $wgUseGeoMode;
 612                 $text = $this->magicISBN( $text );
 613                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 614                         $text = $this->magicGEO( $text );
 615                 }
 616                 $text = $this->magicRFC( $text );
 617                 return $text;
 618         }
 619
 620         # Parse ^^ tokens and return html
 621         /* private */ function doExponent ( $text ) {
 622                 $fname = 'Parser::doExponent';
 623                 wfProfileIn( $fname);
 624                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 625                 wfProfileOut( $fname);
 626                 return $text;
 627         }
 628
 629         # Parse headers and return html
 630         /* private */ function doHeadings( $text ) {
 631                 $fname = 'Parser::doHeadings';
 632                 wfProfileIn( $fname );
 633                 for ( $i = 6; $i >= 1; --$i ) {
 634                         $h = substr( '======', 0, $i );
 635                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 636                           "<h{$i}>\\1</h{$i}>\\2", $text );
 637                 }
 638                 wfProfileOut( $fname );
 639                 return $text;
 640         }
 641
 642         /* private */ function doAllQuotes( $text ) {
 643                 $fname = 'Parser::doAllQuotes';
 644                 wfProfileIn( $fname );
 645                 $outtext = '';
 646                 $lines = explode( "\n", $text );
 647                 foreach ( $lines as $line ) {
 648                         $outtext .= $this->doQuotes ( $line ) . "\n";
 649                 }
 650                 $outtext = substr($outtext, 0,-1);
 651                 wfProfileOut( $fname );
 652                 return $outtext;
 653         }
 654
 655         /* private */ function doQuotes( $text ) {
 656                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 657                 if (count ($arr) == 1)
 658                         return $text;
 659                 else
 660                 {
 661                         # First, do some preliminary work. This may shift some apostrophes from
 662                         # being mark-up to being text. It also counts the number of occurrences
 663                         # of bold and italics mark-ups.
 664                         $i = 0;
 665                         $numbold = 0;
 666                         $numitalics = 0;
 667                         foreach ($arr as $r)
 668                         {
 669                                 if (($i % 2) == 1)
 670                                 {
 671                                         # If there are ever four apostrophes, assume the first is supposed to
 672                                         # be text, and the remaining three constitute mark-up for bold text.
 673                                         if (strlen ($arr[$i]) == 4)
 674                                         {
 675                                                 $arr[$i-1] .= "'";
 676                                                 $arr[$i] = "'''";
 677                                         }
 678                                         # If there are more than 5 apostrophes in a row, assume they're all
 679                                         # text except for the last 5.
 680                                         else if (strlen ($arr[$i]) > 5)
 681                                         {
 682                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 683                                                 $arr[$i] = "'''''";
 684                                         }
 685                                         # Count the number of occurrences of bold and italics mark-ups.
 686                                         # We are not counting sequences of five apostrophes.
 687                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 688                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 689                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 690                                 }
 691                                 $i++;
 692                         }
 693
 694                         # If there is an odd number of both bold and italics, it is likely
 695                         # that one of the bold ones was meant to be an apostrophe followed
 696                         # by italics. Which one we cannot know for certain, but it is more
 697                         # likely to be one that has a single-letter word before it.
 698                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 699                         {
 700                                 $i = 0;
 701                                 $firstsingleletterword = -1;
 702                                 $firstmultiletterword = -1;
 703                                 $firstspace = -1;
 704                                 foreach ($arr as $r)
 705                                 {
 706                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 707                                         {
 708                                                 $x1 = substr ($arr[$i-1], -1);
 709                                                 $x2 = substr ($arr[$i-1], -2, 1);
 710                                                 if ($x1 == " ") {
 711                                                         if ($firstspace == -1) $firstspace = $i;
 712                                                 } else if ($x2 == " ") {
 713                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 714                                                 } else {
 715                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 716                                                 }
 717                                         }
 718                                         $i++;
 719                                 }
 720
 721                                 # If there is a single-letter word, use it!
 722                                 if ($firstsingleletterword > -1)
 723                                 {
 724                                         $arr [ $firstsingleletterword ] = "''";
 725                                         $arr [ $firstsingleletterword-1 ] .= "'";
 726                                 }
 727                                 # If not, but there's a multi-letter word, use that one.
 728                                 else if ($firstmultiletterword > -1)
 729                                 {
 730                                         $arr [ $firstmultiletterword ] = "''";
 731                                         $arr [ $firstmultiletterword-1 ] .= "'";
 732                                 }
 733                                 # ... otherwise use the first one that has neither.
 734                                 # (notice that it is possible for all three to be -1 if, for example,
 735                                 # there is only one pentuple-apostrophe in the line)
 736                                 else if ($firstspace > -1)
 737                                 {
 738                                         $arr [ $firstspace ] = "''";
 739                                         $arr [ $firstspace-1 ] .= "'";
 740                                 }
 741                         }
 742
 743                         # Now let's actually convert our apostrophic mush to HTML!
 744                         $output = '';
 745                         $buffer = '';
 746                         $state = '';
 747                         $i = 0;
 748                         foreach ($arr as $r)
 749                         {
 750                                 if (($i % 2) == 0)
 751                                 {
 752                                         if ($state == 'both')
 753                                                 $buffer .= $r;
 754                                         else
 755                                                 $output .= $r;
 756                                 }
 757                                 else
 758                                 {
 759                                         if (strlen ($r) == 2)
 760                                         {
 761                                                 if ($state == 'em')
 762                                                 { $output .= "</em>"; $state = ''; }
 763                                                 else if ($state == 'strongem')
 764                                                 { $output .= "</em>"; $state = 'strong'; }
 765                                                 else if ($state == 'emstrong')
 766                                                 { $output .= "</strong></em><strong>"; $state = 'strong'; }
 767                                                 else if ($state == 'both')
 768                                                 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
 769                                                 else # $state can be 'strong' or ''
 770                                                 { $output .= "<em>"; $state .= 'em'; }
 771                                         }
 772                                         else if (strlen ($r) == 3)
 773                                         {
 774                                                 if ($state == 'strong')
 775                                                 { $output .= "</strong>"; $state = ''; }
 776                                                 else if ($state == 'strongem')
 777                                                 { $output .= "</em></strong><em>"; $state = 'em'; }
 778                                                 else if ($state == 'emstrong')
 779                                                 { $output .= "</strong>"; $state = 'em'; }
 780                                                 else if ($state == 'both')
 781                                                 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
 782                                                 else # $state can be 'em' or ''
 783                                                 { $output .= "<strong>"; $state .= 'strong'; }
 784                                         }
 785                                         else if (strlen ($r) == 5)
 786                                         {
 787                                                 if ($state == 'strong')
 788                                                 { $output .= "</strong><em>"; $state = 'em'; }
 789                                                 else if ($state == 'em')
 790                                                 { $output .= "</em><strong>"; $state = 'strong'; }
 791                                                 else if ($state == 'strongem')
 792                                                 { $output .= "</em></strong>"; $state = ''; }
 793                                                 else if ($state == 'emstrong')
 794                                                 { $output .= "</strong></em>"; $state = ''; }
 795                                                 else if ($state == 'both')
 796                                                 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
 797                                                 else # ($state == '')
 798                                                 { $buffer = ''; $state = 'both'; }
 799                                         }
 800                                 }
 801                                 $i++;
 802                         }
 803                         # Now close all remaining tags.  Notice that the order is important.
 804                         if ($state == 'strong' || $state == 'emstrong')
 805                                 $output .= '</strong>';
 806                         if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
 807                                 $output .= '</em>';
 808                         if ($state == 'strongem')
 809                                 $output .= '</strong>';
 810                         if ($state == 'both')
 811                                 $output .= "<strong><em>{$buffer}</em></strong>";
 812                         return $output;
 813                 }
 814         }
 815
 816         # Note: we have to do external links before the internal ones,
 817         # and otherwise take great care in the order of things here, so
 818         # that we don't end up interpreting some URLs twice.
 819
 820         /* private */ function replaceExternalLinks( $text ) {
 821                 $fname = 'Parser::replaceExternalLinks';
 822                 wfProfileIn( $fname );
 823
 824                 $sk =& $this->mOptions->getSkin();
 825                 $linktrail = wfMsg('linktrail');
 826                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 827
 828                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 829
 830                 $i = 0;
 831                 while ( $i<count( $bits ) ) {
 832                         $url = $bits[$i++];
 833                         $protocol = $bits[$i++];
 834                         $text = $bits[$i++];
 835                         $trail = $bits[$i++];
 836
 837                         # If the link text is an image URL, replace it with an <img> tag
 838                         # This happened by accident in the original parser, but some people used it extensively
 839                         $img = $this->maybeMakeImageLink( $text );
 840                         if ( $img !== false ) {
 841                                 $text = $img;
 842                         }
 843
 844                         $dtrail = '';
 845
 846                         # No link text, e.g. [http://domain.tld/some.link]
 847                         if ( $text == '' ) {
 848                                 # Autonumber if allowed
 849                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 850                                         $text = "[" . ++$this->mAutonumber . "]";
 851                                 } else {
 852                                         # Otherwise just use the URL
 853                                         $text = htmlspecialchars( $url );
 854                                 }
 855                         } else {
 856                                 # Have link text, e.g. [http://domain.tld/some.link text]s
 857                                 # Check for trail
 858                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
 859                                         $dtrail = $m2[1];
 860                                         $trail = $m2[2];
 861                                 }
 862                         }
 863
 864                         $encUrl = htmlspecialchars( $url );
 865                         # Bit in parentheses showing the URL for the printable version
 866                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
 867                                 $paren = '';
 868                         } else {
 869                                 # Expand the URL for printable version
 870                                 if ( ! $sk->suppressUrlExpansion() ) {
 871                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
 872                                 } else {
 873                                         $paren = '';
 874                                 }
 875                         }
 876
 877                         # Process the trail (i.e. everything after this link up until start of the next link),
 878                         # replacing any non-bracketed links
 879                         $trail = $this->replaceFreeExternalLinks( $trail );
 880
 881                         $la = $sk->getExternalLinkAttributes( $url, $text );
 882
 883                         # Use the encoded URL
 884                         # This means that users can paste URLs directly into the text
 885                         # Funny characters like &ouml; aren't valid in URLs anyway
 886                         # This was changed in August 2004
 887                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
 888                 }
 889
 890                 wfProfileOut( $fname );
 891                 return $s;
 892         }
 893
 894         # Replace anything that looks like a URL with a link
 895         function replaceFreeExternalLinks( $text ) {
 896                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 897                 $s = array_shift( $bits );
 898                 $i = 0;
 899
 900                 $sk =& $this->mOptions->getSkin();
 901
 902                 while ( $i < count( $bits ) ){
 903                         $protocol = $bits[$i++];
 904                         $remainder = $bits[$i++];
 905
 906                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
 907                                 # Found some characters after the protocol that look promising
 908                                 $url = $protocol . $m[1];
 909                                 $trail = $m[2];
 910
 911                                 # Move trailing punctuation to $trail
 912                                 $sep = ',;\.:!?';
 913                                 # If there is no left bracket, then consider right brackets fair game too
 914                                 if ( strpos( $url, '(' ) === false ) {
 915                                         $sep .= ')';
 916                                 }
 917
 918                                 $numSepChars = strspn( strrev( $url ), $sep );
 919                                 if ( $numSepChars ) {
 920                                         $trail = substr( $url, -$numSepChars ) . $trail;
 921                                         $url = substr( $url, 0, -$numSepChars );
 922                                 }
 923
 924                                 # Replace &amp; from obsolete syntax with &
 925                                 $url = str_replace( '&amp;', '&', $url );
 926
 927                                 # Is this an external image?
 928                                 $text = $this->maybeMakeImageLink( $url );
 929                                 if ( $text === false ) {
 930                                         # Not an image, make a link
 931                                         $text = $sk->makeExternalLink( $url, $url );
 932                                 }
 933                                 $s .= $text . $trail;
 934                         } else {
 935                                 $s .= $protocol . $remainder;
 936                         }
 937                 }
 938                 return $s;
 939         }
 940
 941         # make an image if it's allowed
 942         function maybeMakeImageLink( $url ) {
 943                 $sk =& $this->mOptions->getSkin();
 944                 $text = false;
 945                 if ( $this->mOptions->getAllowExternalImages() ) {
 946                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
 947                                 # Image found
 948                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
 949                         }
 950                 }
 951                 return $text;
 952         }
 953
 954         # The wikilinks [[ ]] are procedeed here.
 955         /* private */ function replaceInternalLinks( $s ) {
 956                 global $wgLang, $wgLinkCache;
 957                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 958                 static $fname = 'Parser::replaceInternalLinks' ;
 959                 wfProfileIn( $fname );
 960
 961                 wfProfileIn( $fname.'-setup' );
 962                 static $tc = FALSE;
 963                 # the % is needed to support urlencoded titles as well
 964                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
 965                 $sk =& $this->mOptions->getSkin();
 966
 967                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
 968
 969                 $a = explode( '[[', ' ' . $s );
 970                 $s = array_shift( $a );
 971                 $s = substr( $s, 1 );
 972
 973                 # Match a link having the form [[namespace:link|alternate]]trail
 974                 static $e1 = FALSE;
 975                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 976                 # Match the end of a line for a word that's not followed by whitespace,
 977                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 978                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
 979
 980                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
 981                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 982
 983                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 984
 985                 if ( $useLinkPrefixExtension ) {
 986                         if ( preg_match( $e2, $s, $m ) ) {
 987                                 $first_prefix = $m[2];
 988                                 $s = $m[1];
 989                         } else {
 990                                 $first_prefix = false;
 991                         }
 992                 } else {
 993                         $prefix = '';
 994                 }
 995
 996                 wfProfileOut( $fname.'-setup' );
 997
 998                 # start procedeeding each line
 999                 foreach ( $a as $line ) {
1000                         wfProfileIn( $fname.'-prefixhandling' );
1001                         if ( $useLinkPrefixExtension ) {
1002                                 if ( preg_match( $e2, $s, $m ) ) {
1003                                         $prefix = $m[2];
1004                                         $s = $m[1];
1005                                 } else {
1006                                         $prefix='';
1007                                 }
1008                                 # first link
1009                                 if($first_prefix) {
1010                                         $prefix = $first_prefix;
1011                                         $first_prefix = false;
1012                                 }
1013                         }
1014                         wfProfileOut( $fname.'-prefixhandling' );
1015
1016                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1017                                 $text = $m[2];
1018                                 # fix up urlencoded title texts
1019                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1020                                 $trail = $m[3];
1021                         } else { # Invalid form; output directly
1022                                 $s .= $prefix . '[[' . $line ;
1023                                 continue;
1024                         }
1025
1026                         # Valid link forms:
1027                         # Foobar -- normal
1028                         # :Foobar -- override special treatment of prefix (images, language links)
1029                         # /Foobar -- convert to CurrentPage/Foobar
1030                         # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1031
1032                         # Look at the first character
1033                         $c = substr($m[1],0,1);
1034                         $noforce = ($c != ':');
1035
1036                         # subpage
1037                         if( $c == '/' ) {
1038                                 # / at end means we don't want the slash to be shown
1039                                 if(substr($m[1],-1,1)=='/') {
1040                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1041                                         $noslash=$m[1];
1042                                 } else {
1043                                         $noslash=substr($m[1],1);
1044                                 }
1045
1046                                 # Some namespaces don't allow subpages
1047                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1048                                         # subpages allowed here
1049                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1050                                         if( '' == $text ) {
1051                                                 $text= $m[1];
1052                                         } # this might be changed for ugliness reasons
1053                                 } else {
1054                                         # no subpage allowed, use standard link
1055                                         $link = $noslash;
1056                                 }
1057
1058                         } elseif( $noforce ) { # no subpage
1059                                 $link = $m[1];
1060                         } else {
1061                                 # We don't want to keep the first character
1062                                 $link = substr( $m[1], 1 );
1063                         }
1064
1065                         $wasblank = ( '' == $text );
1066                         if( $wasblank ) $text = $link;
1067
1068                         $nt = Title::newFromText( $link );
1069                         if( !$nt ) {
1070                                 $s .= $prefix . '[[' . $line;
1071                                 continue;
1072                         }
1073
1074                         $ns = $nt->getNamespace();
1075                         $iw = $nt->getInterWiki();
1076
1077                         # Link not escaped by : , create the various objects
1078                         if( $noforce ) {
1079
1080                                 # Interwikis
1081                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1082                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1083                                         $tmp = $prefix . $trail ;
1084                                         $s .= (trim($tmp) == '')? '': $tmp;
1085                                         continue;
1086                                 }
1087
1088                                 if ( $ns == NS_IMAGE ) {
1089                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1090                                         $wgLinkCache->addImageLinkObj( $nt );
1091                                         continue;
1092                                 }
1093
1094                                 if ( $ns == NS_CATEGORY ) {
1095                                         $t = $nt->getText() ;
1096                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).":".$t ) ;
1097
1098                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1099                                         $pPLC=$sk->postParseLinkColour();
1100                                         $sk->postParseLinkColour( false );
1101                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1102                                         $sk->postParseLinkColour( $pPLC );
1103                                         $wgLinkCache->resume();
1104
1105                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1106                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1107                                         $this->mOutput->mCategoryLinks[] = $t ;
1108                                         $s .= $prefix . $trail ;
1109                                         continue;
1110                                 }
1111                         }
1112
1113                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1114                             ( strpos( $link, '#' ) == FALSE ) ) {
1115                                 # Self-links are handled specially; generally de-link and change to bold.
1116                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1117                                 continue;
1118                         }
1119
1120                         if( $ns == NS_MEDIA ) {
1121                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1122                                 $wgLinkCache->addImageLinkObj( $nt );
1123                                 continue;
1124                         } elseif( $ns == NS_SPECIAL ) {
1125                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1126                                 continue;
1127                         }
1128                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1129                 }
1130                 wfProfileOut( $fname );
1131                 return $s;
1132         }
1133
1134         # Some functions here used by doBlockLevels()
1135         #
1136         /* private */ function closeParagraph() {
1137                 $result = '';
1138                 if ( '' != $this->mLastSection ) {
1139                         $result = '</' . $this->mLastSection  . ">\n";
1140                 }
1141                 $this->mInPre = false;
1142                 $this->mLastSection = '';
1143                 return $result;
1144         }
1145         # getCommon() returns the length of the longest common substring
1146         # of both arguments, starting at the beginning of both.
1147         #
1148         /* private */ function getCommon( $st1, $st2 ) {
1149                 $fl = strlen( $st1 );
1150                 $shorter = strlen( $st2 );
1151                 if ( $fl < $shorter ) { $shorter = $fl; }
1152
1153                 for ( $i = 0; $i < $shorter; ++$i ) {
1154                         if ( $st1{$i} != $st2{$i} ) { break; }
1155                 }
1156                 return $i;
1157         }
1158         # These next three functions open, continue, and close the list
1159         # element appropriate to the prefix character passed into them.
1160         #
1161         /* private */ function openList( $char ) {
1162                 $result = $this->closeParagraph();
1163
1164                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1165                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1166                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1167                 else if ( ';' == $char ) {
1168                         $result .= '<dl><dt>';
1169                         $this->mDTopen = true;
1170                 }
1171                 else { $result = '<!-- ERR 1 -->'; }
1172
1173                 return $result;
1174         }
1175
1176         /* private */ function nextItem( $char ) {
1177                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1178                 else if ( ':' == $char || ';' == $char ) {
1179                         $close = '</dd>';
1180                         if ( $this->mDTopen ) { $close = '</dt>'; }
1181                         if ( ';' == $char ) {
1182                                 $this->mDTopen = true;
1183                                 return $close . '<dt>';
1184                         } else {
1185                                 $this->mDTopen = false;
1186                                 return $close . '<dd>';
1187                         }
1188                 }
1189                 return '<!-- ERR 2 -->';
1190         }
1191
1192         /* private */ function closeList( $char ) {
1193                 if ( '*' == $char ) { $text = '</li></ul>'; }
1194                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1195                 else if ( ':' == $char ) {
1196                         if ( $this->mDTopen ) {
1197                                 $this->mDTopen = false;
1198                                 $text = '</dt></dl>';
1199                         } else {
1200                                 $text = '</dd></dl>';
1201                         }
1202                 }
1203                 else {  return '<!-- ERR 3 -->'; }
1204                 return $text."\n";
1205         }
1206
1207         /* private */ function doBlockLevels( $text, $linestart ) {
1208                 $fname = 'Parser::doBlockLevels';
1209                 wfProfileIn( $fname );
1210
1211                 # Parsing through the text line by line.  The main thing
1212                 # happening here is handling of block-level elements p, pre,
1213                 # and making lists from lines starting with * # : etc.
1214                 #
1215                 $textLines = explode( "\n", $text );
1216
1217                 $lastPrefix = $output = $lastLine = '';
1218                 $this->mDTopen = $inBlockElem = false;
1219                 $prefixLength = 0;
1220                 $paragraphStack = false;
1221
1222                 if ( !$linestart ) {
1223                         $output .= array_shift( $textLines );
1224                 }
1225                 foreach ( $textLines as $oLine ) {
1226                         $lastPrefixLength = strlen( $lastPrefix );
1227                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1228                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1229                         if ( !$this->mInPre ) {
1230                                 # Multiple prefixes may abut each other for nested lists.
1231                                 $prefixLength = strspn( $oLine, '*#:;' );
1232                                 $pref = substr( $oLine, 0, $prefixLength );
1233
1234                                 # eh?
1235                                 $pref2 = str_replace( ';', ':', $pref );
1236                                 $t = substr( $oLine, $prefixLength );
1237                                 $this->mInPre = !empty($preOpenMatch);
1238                         } else {
1239                                 # Don't interpret any other prefixes in preformatted text
1240                                 $prefixLength = 0;
1241                                 $pref = $pref2 = '';
1242                                 $t = $oLine;
1243                         }
1244
1245                         # List generation
1246                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1247                                 # Same as the last item, so no need to deal with nesting or opening stuff
1248                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1249                                 $paragraphStack = false;
1250
1251                                 if ( substr( $pref, -1 ) == ';') {
1252                                         # The one nasty exception: definition lists work like this:
1253                                         # ; title : definition text
1254                                         # So we check for : in the remainder text to split up the
1255                                         # title and definition, without b0rking links.
1256                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1257                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1258                                                 $term = $match[1];
1259                                                 $output .= $term . $this->nextItem( ':' );
1260                                                 $t = $match[2];
1261                                         }
1262                                 }
1263                         } elseif( $prefixLength || $lastPrefixLength ) {
1264                                 # Either open or close a level...
1265                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1266                                 $paragraphStack = false;
1267
1268                                 while( $commonPrefixLength < $lastPrefixLength ) {
1269                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1270                                         --$lastPrefixLength;
1271                                 }
1272                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1273                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1274                                 }
1275                                 while ( $prefixLength > $commonPrefixLength ) {
1276                                         $char = substr( $pref, $commonPrefixLength, 1 );
1277                                         $output .= $this->openList( $char );
1278
1279                                         if ( ';' == $char ) {
1280                                                 # FIXME: This is dupe of code above
1281                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1282                                                         $term = $match[1];
1283                                                         $output .= $term . $this->nextItem( ":" );
1284                                                         $t = $match[2];
1285                                                 }
1286                                         }
1287                                         ++$commonPrefixLength;
1288                                 }
1289                                 $lastPrefix = $pref2;
1290                         }
1291                         if( 0 == $prefixLength ) {
1292                                 # No prefix (not in list)--go to paragraph mode
1293                                 $uniq_prefix = UNIQ_PREFIX;
1294                                 // XXX: use a stack for nestable elements like span, table and div
1295                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1296                                 $closematch = preg_match(
1297                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1298                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1299                                 if ( $openmatch or $closematch ) {
1300                                         $paragraphStack = false;
1301                                         $output .= $this->closeParagraph();
1302                                         if($preOpenMatch and !$preCloseMatch) {
1303                                                 $this->mInPre = true;
1304                                         }
1305                                         if ( $closematch ) {
1306                                                 $inBlockElem = false;
1307                                         } else {
1308                                                 $inBlockElem = true;
1309                                         }
1310                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1311                                         if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1312                                                 // pre
1313                                                 if ($this->mLastSection != 'pre') {
1314                                                         $paragraphStack = false;
1315                                                         $output .= $this->closeParagraph().'<pre>';
1316                                                         $this->mLastSection = 'pre';
1317                                                 }
1318                                         } else {
1319                                                 // paragraph
1320                                                 if ( '' == trim($t) ) {
1321                                                         if ( $paragraphStack ) {
1322                                                                 $output .= $paragraphStack.'<br />';
1323                                                                 $paragraphStack = false;
1324                                                                 $this->mLastSection = 'p';
1325                                                         } else {
1326                                                                 if ($this->mLastSection != 'p' ) {
1327                                                                         $output .= $this->closeParagraph();
1328                                                                         $this->mLastSection = '';
1329                                                                         $paragraphStack = '<p>';
1330                                                                 } else {
1331                                                                         $paragraphStack = '</p><p>';
1332                                                                 }
1333                                                         }
1334                                                 } else {
1335                                                         if ( $paragraphStack ) {
1336                                                                 $output .= $paragraphStack;
1337                                                                 $paragraphStack = false;
1338                                                                 $this->mLastSection = 'p';
1339                                                         } else if ($this->mLastSection != 'p') {
1340                                                                 $output .= $this->closeParagraph().'<p>';
1341                                                                 $this->mLastSection = 'p';
1342                                                         }
1343                                                 }
1344                                         }
1345                                 }
1346                         }
1347                         if ($paragraphStack === false) {
1348                                 $output .= $t."\n";
1349                         }
1350                 }
1351                 while ( $prefixLength ) {
1352                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1353                         --$prefixLength;
1354                 }
1355                 if ( '' != $this->mLastSection ) {
1356                         $output .= '</' . $this->mLastSection . '>';
1357                         $this->mLastSection = '';
1358                 }
1359
1360                 wfProfileOut( $fname );
1361                 return $output;
1362         }
1363
1364         # Return value of a magic variable (like PAGENAME)
1365         function getVariableValue( $index ) {
1366                 global $wgLang, $wgSitename, $wgServer;
1367
1368                 switch ( $index ) {
1369                         case MAG_CURRENTMONTH:
1370                                 return $wgLang->formatNum( date( 'm' ) );
1371                         case MAG_CURRENTMONTHNAME:
1372                                 return $wgLang->getMonthName( date('n') );
1373                         case MAG_CURRENTMONTHNAMEGEN:
1374                                 return $wgLang->getMonthNameGen( date('n') );
1375                         case MAG_CURRENTDAY:
1376                                 return $wgLang->formatNum( date('j') );
1377                         case MAG_PAGENAME:
1378                                 return $this->mTitle->getText();
1379                         case MAG_PAGENAMEE:
1380                                 return $this->mTitle->getPartialURL();
1381                         case MAG_NAMESPACE:
1382                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1383                                 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1384                         case MAG_CURRENTDAYNAME:
1385                                 return $wgLang->getWeekdayName( date('w')+1 );
1386                         case MAG_CURRENTYEAR:
1387                                 return $wgLang->formatNum( date( 'Y' ) );
1388                         case MAG_CURRENTTIME:
1389                                 return $wgLang->time( wfTimestampNow(), false );
1390                         case MAG_NUMBEROFARTICLES:
1391                                 return $wgLang->formatNum( wfNumberOfArticles() );
1392                         case MAG_SITENAME:
1393                                 return $wgSitename;
1394                         case MAG_SERVER:
1395                                 return $wgServer;
1396                         default:
1397                                 return NULL;
1398                 }
1399         }
1400
1401         # initialise the magic variables (like CURRENTMONTHNAME)
1402         function initialiseVariables() {
1403                 global $wgVariableIDs;
1404                 $this->mVariables = array();
1405                 foreach ( $wgVariableIDs as $id ) {
1406                         $mw =& MagicWord::get( $id );
1407                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1408                 }
1409         }
1410
1411         /* private */ function replaceVariables( $text, $args = array() ) {
1412                 global $wgLang, $wgScript, $wgArticlePath;
1413
1414                 # Prevent too big inclusions
1415                 if(strlen($text)> MAX_INCLUDE_SIZE)
1416                 return $text;
1417
1418                 $fname = 'Parser::replaceVariables';
1419                 wfProfileIn( $fname );
1420
1421                 $bail = false;
1422                 $titleChars = Title::legalChars();
1423                 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1424
1425                 # This function is called recursively. To keep track of arguments we need a stack:
1426                 array_push( $this->mArgStack, $args );
1427
1428                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1429                 $GLOBALS['wgCurParser'] =& $this;
1430
1431
1432                 if ( $this->mOutputType == OT_HTML ) {
1433                         # Variable substitution
1434                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1435
1436                         # Argument substitution
1437                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1438                 }
1439                 # Template substitution
1440                 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1441                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1442
1443                 array_pop( $this->mArgStack );
1444
1445                 wfProfileOut( $fname );
1446                 return $text;
1447         }
1448
1449         function variableSubstitution( $matches ) {
1450                 if ( !$this->mVariables ) {
1451                         $this->initialiseVariables();
1452                 }
1453                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1454                         $text = $this->mVariables[$matches[1]];
1455                         $this->mOutput->mContainsOldMagic = true;
1456                 } else {
1457                         $text = $matches[0];
1458                 }
1459                 return $text;
1460         }
1461
1462         # Split template arguments
1463         function getTemplateArgs( $argsString ) {
1464                 if ( $argsString === '' ) {
1465                         return array();
1466                 }
1467
1468                 $args = explode( '|', substr( $argsString, 1 ) );
1469
1470                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1471                 # merged with the next arg because the '|' character between belongs
1472                 # to the link syntax and not the template parameter syntax.
1473                 $argc = count($args);
1474                 $i = 0;
1475                 for ( $i = 0; $i < $argc-1; $i++ ) {
1476                         if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1477                                 $args[$i] .= "|".$args[$i+1];
1478                                 array_splice($args, $i+1, 1);
1479                                 $i--;
1480                                 $argc--;
1481                         }
1482                 }
1483
1484                 return $args;
1485         }
1486
1487         function braceSubstitution( $matches ) {
1488                 global $wgLinkCache, $wgLang;
1489                 $fname = 'Parser::braceSubstitution';
1490                 $found = false;
1491                 $nowiki = false;
1492                 $noparse = false;
1493
1494                 $title = NULL;
1495
1496                 # $newline is an optional newline character before the braces
1497                 # $part1 is the bit before the first |, and must contain only title characters
1498                 # $args is a list of arguments, starting from index 0, not including $part1
1499
1500                 $newline = $matches[1];
1501                 $part1 = $matches[2];
1502                 # If the third subpattern matched anything, it will start with |
1503
1504                 $args = $this->getTemplateArgs($matches[3]);
1505                 $argc = count( $args );
1506
1507                 # {{{}}}
1508                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1509                         $text = $matches[0];
1510                         $found = true;
1511                         $noparse = true;
1512                 }
1513
1514                 # SUBST
1515                 if ( !$found ) {
1516                         $mwSubst =& MagicWord::get( MAG_SUBST );
1517                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1518                                 if ( $this->mOutputType != OT_WIKI ) {
1519                                         # Invalid SUBST not replaced at PST time
1520                                         # Return without further processing
1521                                         $text = $matches[0];
1522                                         $found = true;
1523                                         $noparse= true;
1524                                 }
1525                         } elseif ( $this->mOutputType == OT_WIKI ) {
1526                                 # SUBST not found in PST pass, do nothing
1527                                 $text = $matches[0];
1528                                 $found = true;
1529                         }
1530                 }
1531
1532                 # MSG, MSGNW and INT
1533                 if ( !$found ) {
1534                         # Check for MSGNW:
1535                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1536                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1537                                 $nowiki = true;
1538                         } else {
1539                                 # Remove obsolete MSG:
1540                                 $mwMsg =& MagicWord::get( MAG_MSG );
1541                                 $mwMsg->matchStartAndRemove( $part1 );
1542                         }
1543
1544                         # Check if it is an internal message
1545                         $mwInt =& MagicWord::get( MAG_INT );
1546                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1547                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1548                                         $text = wfMsgReal( $part1, $args, true );
1549                                         $found = true;
1550                                 }
1551                         }
1552                 }
1553
1554                 # NS
1555                 if ( !$found ) {
1556                         # Check for NS: (namespace expansion)
1557                         $mwNs = MagicWord::get( MAG_NS );
1558                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1559                                 if ( intval( $part1 ) ) {
1560                                         $text = $wgLang->getNsText( intval( $part1 ) );
1561                                         $found = true;
1562                                 } else {
1563                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1564                                         if ( !is_null( $index ) ) {
1565                                                 $text = $wgLang->getNsText( $index );
1566                                                 $found = true;
1567                                         }
1568                                 }
1569                         }
1570                 }
1571
1572                 # LOCALURL and LOCALURLE
1573                 if ( !$found ) {
1574                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1575                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1576
1577                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1578                                 $func = 'getLocalURL';
1579                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1580                                 $func = 'escapeLocalURL';
1581                         } else {
1582                                 $func = '';
1583                         }
1584
1585                         if ( $func !== '' ) {
1586                                 $title = Title::newFromText( $part1 );
1587                                 if ( !is_null( $title ) ) {
1588                                         if ( $argc > 0 ) {
1589                                                 $text = $title->$func( $args[0] );
1590                                         } else {
1591                                                 $text = $title->$func();
1592                                         }
1593                                         $found = true;
1594                                 }
1595                         }
1596                 }
1597
1598                 # Internal variables
1599                 if ( !$this->mVariables ) {
1600                         $this->initialiseVariables();
1601                 }
1602                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1603                         $text = $this->mVariables[$part1];
1604                         $found = true;
1605                         $this->mOutput->mContainsOldMagic = true;
1606                 }
1607
1608                 # Template table test
1609
1610                 # Did we encounter this template already? If yes, it is in the cache
1611                 # and we need to check for loops.
1612                 if ( isset( $this->mTemplates[$part1] ) ) {
1613                         # Infinite loop test
1614                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1615                                 $noparse = true;
1616                                 $found = true;
1617                         }
1618                         # set $text to cached message.
1619                         $text = $this->mTemplates[$part1];
1620                         $found = true;
1621                 }
1622
1623                 # Load from database
1624                 if ( !$found ) {
1625                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1626                         if ( !is_null( $title ) && !$title->isExternal() ) {
1627                                 # Check for excessive inclusion
1628                                 $dbk = $title->getPrefixedDBkey();
1629                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1630                                         # This should never be reached.
1631                                         $article = new Article( $title );
1632                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1633                                         if ( $articleContent !== false ) {
1634                                                 $found = true;
1635                                                 $text = $articleContent;
1636                                         }
1637                                 }
1638
1639                                 # If the title is valid but undisplayable, make a link to it
1640                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1641                                         $text = '[['.$title->getPrefixedText().']]';
1642                                         $found = true;
1643                                 }
1644
1645                                 # Template cache array insertion
1646                                 $this->mTemplates[$part1] = $text;
1647                         }
1648                 }
1649
1650                 # Recursive parsing, escaping and link table handling
1651                 # Only for HTML output
1652                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1653                         $text = wfEscapeWikiText( $text );
1654                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1655                         # Clean up argument array
1656                         $assocArgs = array();
1657                         $index = 1;
1658                         foreach( $args as $arg ) {
1659                                 $eqpos = strpos( $arg, '=' );
1660                                 if ( $eqpos === false ) {
1661                                         $assocArgs[$index++] = $arg;
1662                                 } else {
1663                                         $name = trim( substr( $arg, 0, $eqpos ) );
1664                                         $value = trim( substr( $arg, $eqpos+1 ) );
1665                                         if ( $value === false ) {
1666                                                 $value = '';
1667                                         }
1668                                         if ( $name !== false ) {
1669                                                 $assocArgs[$name] = $value;
1670                                         }
1671                                 }
1672                         }
1673
1674                         # Do not enter included links in link table
1675                         if ( !is_null( $title ) ) {
1676                                 $wgLinkCache->suspend();
1677                         }
1678
1679                         # Add a new element to the templace recursion path
1680                         $this->mTemplatePath[$part1] = 1;
1681
1682                         $text = $this->stripParse( $text, $newline, $assocArgs );
1683
1684                         # Resume the link cache and register the inclusion as a link
1685                         if ( !is_null( $title ) ) {
1686                                 $wgLinkCache->resume();
1687                                 $wgLinkCache->addLinkObj( $title );
1688                         }
1689                 }
1690                                 # Empties the template path
1691                                 $this->mTemplatePath = array();
1692
1693                 if ( !$found ) {
1694                         return $matches[0];
1695                 } else {
1696                         return $text;
1697                 }
1698         }
1699
1700         # Triple brace replacement -- used for template arguments
1701         function argSubstitution( $matches ) {
1702                 $newline = $matches[1];
1703                 $arg = trim( $matches[2] );
1704                 $text = $matches[0];
1705                 $inputArgs = end( $this->mArgStack );
1706
1707                 if ( array_key_exists( $arg, $inputArgs ) ) {
1708                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1709                 }
1710
1711                 return $text;
1712         }
1713
1714         # Returns true if the function is allowed to include this entity
1715         function incrementIncludeCount( $dbk ) {
1716                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1717                         $this->mIncludeCount[$dbk] = 0;
1718                 }
1719                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1720                         return true;
1721                 } else {
1722                         return false;
1723                 }
1724         }
1725
1726
1727         # Cleans up HTML, removes dangerous tags and attributes
1728         /* private */ function removeHTMLtags( $text ) {
1729                 global $wgUseTidy, $wgUserHtml;
1730                 $fname = 'Parser::removeHTMLtags';
1731                 wfProfileIn( $fname );
1732
1733                 if( $wgUserHtml ) {
1734                         $htmlpairs = array( # Tags that must be closed
1735                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1736                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1737                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
1738                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1739                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
1740                         );
1741                         $htmlsingle = array(
1742                                 'br', 'hr', 'li', 'dt', 'dd'
1743                         );
1744                         $htmlnest = array( # Tags that can be nested--??
1745                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1746                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
1747                         );
1748                         $tabletags = array( # Can only appear inside table
1749                                 'td', 'th', 'tr'
1750                         );
1751                 } else {
1752                         $htmlpairs = array();
1753                         $htmlsingle = array();
1754                         $htmlnest = array();
1755                         $tabletags = array();
1756                 }
1757
1758                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1759                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1760
1761                 $htmlattrs = $this->getHTMLattrs () ;
1762
1763                 # Remove HTML comments
1764                 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
1765
1766                 $bits = explode( '<', $text );
1767                 $text = array_shift( $bits );
1768                 if(!$wgUseTidy) {
1769                         $tagstack = array(); $tablestack = array();
1770                         foreach ( $bits as $x ) {
1771                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1772                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1773                                 $x, $regs );
1774                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1775                                 error_reporting( $prev );
1776
1777                                 $badtag = 0 ;
1778                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1779                                         # Check our stack
1780                                         if ( $slash ) {
1781                                                 # Closing a tag...
1782                                                 if ( ! in_array( $t, $htmlsingle ) &&
1783                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1784                                                         @array_push( $tagstack, $ot );
1785                                                         $badtag = 1;
1786                                                 } else {
1787                                                         if ( $t == 'table' ) {
1788                                                                 $tagstack = array_pop( $tablestack );
1789                                                         }
1790                                                         $newparams = '';
1791                                                 }
1792                                         } else {
1793                                                 # Keep track for later
1794                                                 if ( in_array( $t, $tabletags ) &&
1795                                                 ! in_array( 'table', $tagstack ) ) {
1796                                                         $badtag = 1;
1797                                                 } else if ( in_array( $t, $tagstack ) &&
1798                                                 ! in_array ( $t , $htmlnest ) ) {
1799                                                         $badtag = 1 ;
1800                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1801                                                         if ( $t == 'table' ) {
1802                                                                 array_push( $tablestack, $tagstack );
1803                                                                 $tagstack = array();
1804                                                         }
1805                                                         array_push( $tagstack, $t );
1806                                                 }
1807                                                 # Strip non-approved attributes from the tag
1808                                                 $newparams = $this->fixTagAttributes($params);
1809
1810                                         }
1811                                         if ( ! $badtag ) {
1812                                                 $rest = str_replace( '>', '&gt;', $rest );
1813                                                 $text .= "<$slash$t $newparams$brace$rest";
1814                                                 continue;
1815                                         }
1816                                 }
1817                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1818                         }
1819                         # Close off any remaining tags
1820                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1821                                 $text .= "</$t>\n";
1822                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1823                         }
1824                 } else {
1825                         # this might be possible using tidy itself
1826                         foreach ( $bits as $x ) {
1827                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1828                                 $x, $regs );
1829                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1830                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1831                                         $newparams = $this->fixTagAttributes($params);
1832                                         $rest = str_replace( '>', '&gt;', $rest );
1833                                         $text .= "<$slash$t $newparams$brace$rest";
1834                                 } else {
1835                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1836                                 }
1837                         }
1838                 }
1839                 wfProfileOut( $fname );
1840                 return $text;
1841         }
1842
1843
1844         # This function accomplishes several tasks:
1845         # 1) Auto-number headings if that option is enabled
1846         # 2) Add an [edit] link to sections for logged in users who have enabled the option
1847         # 3) Add a Table of contents on the top for users who have enabled the option
1848         # 4) Auto-anchor headings
1849         #
1850         # It loops through all headlines, collects the necessary data, then splits up the
1851         # string and re-inserts the newly formatted headlines.
1852         /* private */ function formatHeadings( $text, $isMain=true ) {
1853                 global $wgInputEncoding, $wgMaxTocLevel;
1854
1855                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1856                 $doShowToc = $this->mOptions->getShowToc();
1857                 $forceTocHere = false;
1858                 if( !$this->mTitle->userCanEdit() ) {
1859                         $showEditLink = 0;
1860                         $rightClickHack = 0;
1861                 } else {
1862                         $showEditLink = $this->mOptions->getEditSection();
1863                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1864                 }
1865
1866                 # Inhibit editsection links if requested in the page
1867                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1868                 if( $esw->matchAndRemove( $text ) ) {
1869                         $showEditLink = 0;
1870                 }
1871                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1872                 # do not add TOC
1873                 $mw =& MagicWord::get( MAG_NOTOC );
1874                 if( $mw->matchAndRemove( $text ) ) {
1875                         $doShowToc = 0;
1876                 }
1877
1878                 # never add the TOC to the Main Page. This is an entry page that should not
1879                 # be more than 1-2 screens large anyway
1880                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1881                         $doShowToc = 0;
1882                 }
1883
1884                 # Get all headlines for numbering them and adding funky stuff like [edit]
1885                 # links - this is for later, but we need the number of headlines right now
1886                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1887
1888                 # if there are fewer than 4 headlines in the article, do not show TOC
1889                 if( $numMatches < 4 ) {
1890                         $doShowToc = 0;
1891                 }
1892
1893                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
1894                 # override above conditions and always show TOC at that place
1895                 $mw =& MagicWord::get( MAG_TOC );
1896                 if ($mw->match( $text ) ) {
1897                         $doShowToc = 1;
1898                         $forceTocHere = true;
1899                 } else {
1900                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1901                         # override above conditions and always show TOC above first header
1902                         $mw =& MagicWord::get( MAG_FORCETOC );
1903                         if ($mw->matchAndRemove( $text ) ) {
1904                                 $doShowToc = 1;
1905                         }
1906                 }
1907
1908
1909
1910                 # We need this to perform operations on the HTML
1911                 $sk =& $this->mOptions->getSkin();
1912
1913                 # headline counter
1914                 $headlineCount = 0;
1915
1916                 # Ugh .. the TOC should have neat indentation levels which can be
1917                 # passed to the skin functions. These are determined here
1918                 $toclevel = 0;
1919                 $toc = '';
1920                 $full = '';
1921                 $head = array();
1922                 $sublevelCount = array();
1923                 $level = 0;
1924                 $prevlevel = 0;
1925                 foreach( $matches[3] as $headline ) {
1926                         $numbering = '';
1927                         if( $level ) {
1928                                 $prevlevel = $level;
1929                         }
1930                         $level = $matches[1][$headlineCount];
1931                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1932                                 # reset when we enter a new level
1933                                 $sublevelCount[$level] = 0;
1934                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1935                                 $toclevel += $level - $prevlevel;
1936                         }
1937                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1938                                 # reset when we step back a level
1939                                 $sublevelCount[$level+1]=0;
1940                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1941                                 $toclevel -= $prevlevel - $level;
1942                         }
1943                         # count number of headlines for each level
1944                         @$sublevelCount[$level]++;
1945                         if( $doNumberHeadings || $doShowToc ) {
1946                                 $dot = 0;
1947                                 for( $i = 1; $i <= $level; $i++ ) {
1948                                         if( !empty( $sublevelCount[$i] ) ) {
1949                                                 if( $dot ) {
1950                                                         $numbering .= '.';
1951                                                 }
1952                                                 $numbering .= $sublevelCount[$i];
1953                                                 $dot = 1;
1954                                         }
1955                                 }
1956                         }
1957
1958                         # The canonized header is a version of the header text safe to use for links
1959                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1960                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1961                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1962
1963                         # strip out HTML
1964                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1965                         $tocline = trim( $canonized_headline );
1966                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1967                         $replacearray = array(
1968                                 '%3A' => ':',
1969                                 '%' => '.'
1970                         );
1971                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1972                         $refer[$headlineCount] = $canonized_headline;
1973
1974                         # count how many in assoc. array so we can track dupes in anchors
1975                         @$refers[$canonized_headline]++;
1976                         $refcount[$headlineCount]=$refers[$canonized_headline];
1977
1978                         # Prepend the number to the heading text
1979
1980                         if( $doNumberHeadings || $doShowToc ) {
1981                                 $tocline = $numbering . ' ' . $tocline;
1982
1983                                 # Don't number the heading if it is the only one (looks silly)
1984                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1985                                         # the two are different if the line contains a link
1986                                         $headline=$numbering . ' ' . $headline;
1987                                 }
1988                         }
1989
1990                         # Create the anchor for linking from the TOC to the section
1991                         $anchor = $canonized_headline;
1992                         if($refcount[$headlineCount] > 1 ) {
1993                                 $anchor .= '_' . $refcount[$headlineCount];
1994                         }
1995                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
1996                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1997                         }
1998                         if( $showEditLink ) {
1999                                 if ( empty( $head[$headlineCount] ) ) {
2000                                         $head[$headlineCount] = '';
2001                                 }
2002                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2003                         }
2004
2005                         # Add the edit section span
2006                         if( $rightClickHack ) {
2007                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2008                         }
2009
2010                         # give headline the correct <h#> tag
2011                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2012
2013                         $headlineCount++;
2014                 }
2015
2016                 if( $doShowToc ) {
2017                         $toclines = $headlineCount;
2018                         $toc .= $sk->tocUnindent( $toclevel );
2019                         $toc = $sk->tocTable( $toc );
2020                 }
2021
2022                 # split up and insert constructed headlines
2023
2024                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2025                 $i = 0;
2026
2027                 foreach( $blocks as $block ) {
2028                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2029                                 # This is the [edit] link that appears for the top block of text when
2030                                 # section editing is enabled
2031
2032                                 # Disabled because it broke block formatting
2033                                 # For example, a bullet point in the top line
2034                                 # $full .= $sk->editSectionLink(0);
2035                         }
2036                         $full .= $block;
2037                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2038                         # Top anchor now in skin
2039                                 $full = $full.$toc;
2040                         }
2041
2042                         if( !empty( $head[$i] ) ) {
2043                                 $full .= $head[$i];
2044                         }
2045                         $i++;
2046                 }
2047                 if($forceTocHere) {
2048                         $mw =& MagicWord::get( MAG_TOC );
2049                         return $mw->replace( $toc, $full );
2050                 } else {
2051                         return $full;
2052                 }
2053         }
2054
2055         # Return an HTML link for the "ISBN 123456" text
2056         /* private */ function magicISBN( $text ) {
2057                 global $wgLang;
2058                 $fname = 'Parser::magicISBN';
2059                 wfProfileIn( $fname );
2060
2061                 $a = split( 'ISBN ', " $text" );
2062                 if ( count ( $a ) < 2 ) {
2063                         wfProfileOut( $fname );
2064                         return $text;
2065                 }
2066                 $text = substr( array_shift( $a ), 1);
2067                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2068
2069                 foreach ( $a as $x ) {
2070                         $isbn = $blank = '' ;
2071                         while ( ' ' == $x{0} ) {
2072                                 $blank .= ' ';
2073                                 $x = substr( $x, 1 );
2074                         }
2075                         while ( strstr( $valid, $x{0} ) != false ) {
2076                                 $isbn .= $x{0};
2077                                 $x = substr( $x, 1 );
2078                         }
2079                         $num = str_replace( '-', '', $isbn );
2080                         $num = str_replace( ' ', '', $num );
2081
2082                         if ( '' == $num ) {
2083                                 $text .= "ISBN $blank$x";
2084                         } else {
2085                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2086                                 $text .= '<a href="' .
2087                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2088                                         "\" class=\"internal\">ISBN $isbn</a>";
2089                                 $text .= $x;
2090                         }
2091                 }
2092                 wfProfileOut( $fname );
2093                 return $text;
2094         }
2095
2096         # Return an HTML link for the "GEO ..." text
2097         /* private */ function magicGEO( $text ) {
2098                 global $wgLang, $wgUseGeoMode;
2099                 $fname = 'Parser::magicGEO';
2100                 wfProfileIn( $fname );
2101
2102                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2103                 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2104                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2105                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2106                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2107                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2108
2109                 $a = split( 'GEO ', " $text" );
2110                 if ( count ( $a ) < 2 ) {
2111                         wfProfileOut( $fname );
2112                         return $text;
2113                 }
2114                 $text = substr( array_shift( $a ), 1);
2115                 $valid = '0123456789.+-:';
2116
2117                 foreach ( $a as $x ) {
2118                         $geo = $blank = '' ;
2119                         while ( ' ' == $x{0} ) {
2120                                 $blank .= ' ';
2121                                 $x = substr( $x, 1 );
2122                         }
2123                         while ( strstr( $valid, $x{0} ) != false ) {
2124                                 $geo .= $x{0};
2125                                 $x = substr( $x, 1 );
2126                         }
2127                         $num = str_replace( '+', '', $geo );
2128                         $num = str_replace( ' ', '', $num );
2129
2130                         if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2131                                 $text .= "GEO $blank$x";
2132                         } else {
2133                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2134                                 $text .= '<a href="' .
2135                                 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2136                                         "\" class=\"internal\">GEO $geo</a>";
2137                                 $text .= $x;
2138                         }
2139                 }
2140                 wfProfileOut( $fname );
2141                 return $text;
2142         }
2143
2144         # Return an HTML link for the "RFC 1234" text
2145         /* private */ function magicRFC( $text ) {
2146                 global $wgLang;
2147
2148                 $a = split( 'RFC ', ' '.$text );
2149                 if ( count ( $a ) < 2 ) return $text;
2150                 $text = substr( array_shift( $a ), 1);
2151                 $valid = '0123456789';
2152
2153                 foreach ( $a as $x ) {
2154                         $rfc = $blank = '' ;
2155                         while ( ' ' == $x{0} ) {
2156                                 $blank .= ' ';
2157                                 $x = substr( $x, 1 );
2158                         }
2159                         while ( strstr( $valid, $x{0} ) != false ) {
2160                                 $rfc .= $x{0};
2161                                 $x = substr( $x, 1 );
2162                         }
2163
2164                         if ( '' == $rfc ) {
2165                                 $text .= "RFC $blank$x";
2166                         } else {
2167                                 $url = wfmsg( 'rfcurl' );
2168                                 $url = str_replace( '$1', $rfc, $url);
2169                                 $sk =& $this->mOptions->getSkin();
2170                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2171                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2172                         }
2173                 }
2174                 return $text;
2175         }
2176
2177         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2178                 $this->mOptions = $options;
2179                 $this->mTitle =& $title;
2180                 $this->mOutputType = OT_WIKI;
2181
2182                 if ( $clearState ) {
2183                         $this->clearState();
2184                 }
2185
2186                 $stripState = false;
2187                 $pairs = array(
2188                         "\r\n" => "\n",
2189                         );
2190                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2191                 // now with regexes
2192                 /*
2193                 $pairs = array(
2194                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2195                         "/<br *?>/i" => "<br />",
2196                 );
2197                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2198                 */
2199                 $text = $this->strip( $text, $stripState, false );
2200                 $text = $this->pstPass2( $text, $user );
2201                 $text = $this->unstrip( $text, $stripState );
2202                 $text = $this->unstripNoWiki( $text, $stripState );
2203                 return $text;
2204         }
2205
2206         /* private */ function pstPass2( $text, &$user ) {
2207                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2208
2209                 # Variable replacement
2210                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2211                 $text = $this->replaceVariables( $text );
2212
2213                 # Signatures
2214                 #
2215                 $n = $user->getName();
2216                 $k = $user->getOption( 'nickname' );
2217                 if ( '' == $k ) { $k = $n; }
2218                 if(isset($wgLocaltimezone)) {
2219                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2220                 }
2221                 /* Note: this is an ugly timezone hack for the European wikis */
2222                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2223                   ' (' . date( 'T' ) . ')';
2224                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2225
2226                 $text = preg_replace( '/~~~~~/', $d, $text );
2227                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2228                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2229
2230                 # Context links: [[|name]] and [[name (context)|]]
2231                 #
2232                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2233                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2234                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2235                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2236
2237                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2238                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2239                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
2240                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2241                                                                                                                 # [[ns:page (cont)|]]
2242                 $context = '';
2243                 $t = $this->mTitle->getText();
2244                 if ( preg_match( $conpat, $t, $m ) ) {
2245                         $context = $m[2];
2246                 }
2247                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2248                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2249                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2250
2251                 if ( '' == $context ) {
2252                         $text = preg_replace( $p2, '[[\\1]]', $text );
2253                 } else {
2254                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2255                 }
2256
2257                 /*
2258                 $mw =& MagicWord::get( MAG_SUBST );
2259                 $wgCurParser = $this->fork();
2260                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2261                 $this->merge( $wgCurParser );
2262                 */
2263
2264                 # Trim trailing whitespace
2265                 # MAG_END (__END__) tag allows for trailing
2266                 # whitespace to be deliberately included
2267                 $text = rtrim( $text );
2268                 $mw =& MagicWord::get( MAG_END );
2269                 $mw->matchAndRemove( $text );
2270
2271                 return $text;
2272         }
2273
2274         # Set up some variables which are usually set up in parse()
2275         # so that an external function can call some class members with confidence
2276         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2277                 $this->mTitle =& $title;
2278                 $this->mOptions = $options;
2279                 $this->mOutputType = $outputType;
2280                 if ( $clearState ) {
2281                         $this->clearState();
2282                 }
2283         }
2284
2285         function transformMsg( $text, $options ) {
2286                 global $wgTitle;
2287                 static $executing = false;
2288
2289                 # Guard against infinite recursion
2290                 if ( $executing ) {
2291                         return $text;
2292                 }
2293                 $executing = true;
2294
2295                 $this->mTitle = $wgTitle;
2296                 $this->mOptions = $options;
2297                 $this->mOutputType = OT_MSG;
2298                 $this->clearState();
2299                 $text = $this->replaceVariables( $text );
2300
2301                 $executing = false;
2302                 return $text;
2303         }
2304
2305         # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2306         # Callback will be called with the text within
2307         # Transform and return the text within
2308         function setHook( $tag, $callback ) {
2309                 $oldVal = @$this->mTagHooks[$tag];
2310                 $this->mTagHooks[$tag] = $callback;
2311                 return $oldVal;
2312         }
2313 }
2314
2315 class ParserOutput
2316 {
2317         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2318         var $mCacheTime; # Used in ParserCache
2319
2320         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2321                 $containsOldMagic = false )
2322         {
2323                 $this->mText = $text;
2324                 $this->mLanguageLinks = $languageLinks;
2325                 $this->mCategoryLinks = $categoryLinks;
2326                 $this->mContainsOldMagic = $containsOldMagic;
2327                 $this->mCacheTime = "";
2328         }
2329
2330         function getText() { return $this->mText; }
2331         function getLanguageLinks() { return $this->mLanguageLinks; }
2332         function getCategoryLinks() { return $this->mCategoryLinks; }
2333         function getCacheTime() { return $this->mCacheTime; }
2334         function containsOldMagic() { return $this->mContainsOldMagic; }
2335         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2336         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2337         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2338         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2339         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2340
2341         function merge( $other ) {
2342                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2343                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2344                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2345         }
2346
2347 }
2348
2349 class ParserOptions
2350 {
2351         # All variables are private
2352         var $mUseTeX;                    # Use texvc to expand <math> tags
2353         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2354         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2355         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2356         var $mAllowExternalImages;       # Allow external images inline
2357         var $mSkin;                      # Reference to the preferred skin
2358         var $mDateFormat;                # Date format index
2359         var $mEditSection;               # Create "edit section" links
2360         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2361         var $mNumberHeadings;            # Automatically number headings
2362         var $mShowToc;                   # Show table of contents
2363
2364         function getUseTeX()                        { return $this->mUseTeX; }
2365         function getUseCategoryMagic()              { return $this->mUseCategoryMagic; }
2366         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2367         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2368         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2369         function getSkin()                          { return $this->mSkin; }
2370         function getDateFormat()                    { return $this->mDateFormat; }
2371         function getEditSection()                   { return $this->mEditSection; }
2372         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2373         function getNumberHeadings()                { return $this->mNumberHeadings; }
2374         function getShowToc()                       { return $this->mShowToc; }
2375
2376         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2377         function setUseCategoryMagic( $x )          { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2378         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2379         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2380         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2381         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2382         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2383         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2384         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2385         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2386
2387         function setSkin( &$x ) { $this->mSkin =& $x; }
2388
2389         # Get parser options
2390         /* static */ function newFromUser( &$user ) {
2391                 $popts = new ParserOptions;
2392                 $popts->initialiseFromUser( $user );
2393                 return $popts;
2394         }
2395
2396         # Get user options
2397         function initialiseFromUser( &$userInput ) {
2398                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2399
2400                 $fname = "ParserOptions::initialiseFromUser";
2401                 wfProfileIn( $fname );
2402                 if ( !$userInput ) {
2403                         $user = new User;
2404                         $user->setLoaded( true );
2405                 } else {
2406                         $user =& $userInput;
2407                 }
2408
2409                 $this->mUseTeX = $wgUseTeX;
2410                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2411                 $this->mUseDynamicDates = $wgUseDynamicDates;
2412                 $this->mInterwikiMagic = $wgInterwikiMagic;
2413                 $this->mAllowExternalImages = $wgAllowExternalImages;
2414                 wfProfileIn( "$fname-skin" );
2415                 $this->mSkin =& $user->getSkin();
2416                 wfProfileOut( "$fname-skin" );
2417                 $this->mDateFormat = $user->getOption( 'date' );
2418                 $this->mEditSection = $user->getOption( 'editsection' );
2419                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2420                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2421                 $this->mShowToc = $user->getOption( 'showtoc' );
2422                 wfProfileOut( $fname );
2423         }
2424
2425
2426 }
2427
2428 # Regex callbacks, used in Parser::replaceVariables
2429 function wfBraceSubstitution( $matches ) {
2430         global $wgCurParser;
2431         return $wgCurParser->braceSubstitution( $matches );
2432 }
2433
2434 function wfArgSubstitution( $matches ) {
2435         global $wgCurParser;
2436         return $wgCurParser->argSubstitution( $matches );
2437 }
2438
2439 function wfVariableSubstitution( $matches ) {
2440         global $wgCurParser;
2441         return $wgCurParser->variableSubstitution( $matches );
2442 }
2443
2444 # Return the total number of articles
2445 function wfNumberOfArticles() {
2446         global $wgNumberOfArticles;
2447
2448         wfLoadSiteStats();
2449         return $wgNumberOfArticles;
2450 }
2451
2452 # Get various statistics from the database
2453 /* private */ function wfLoadSiteStats() {
2454         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2455         $fname = 'wfLoadSiteStats';
2456
2457         if ( -1 != $wgNumberOfArticles ) return;
2458         $dbr =& wfGetDB( DB_SLAVE );
2459         $s = $dbr->getArray( 'site_stats',
2460                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2461                 array( 'ss_row_id' => 1 ), $fname
2462         );
2463
2464         if ( $s === false ) {
2465                 return;
2466         } else {
2467                 $wgTotalViews = $s->ss_total_views;
2468                 $wgTotalEdits = $s->ss_total_edits;
2469                 $wgNumberOfArticles = $s->ss_good_articles;
2470         }
2471 }
2472
2473 function wfEscapeHTMLTagsOnly( $in ) {
2474         return str_replace(
2475                 array( '"', '>', '<' ),
2476                 array( '&quot;', '&gt;', '&lt;' ),
2477                 $in );
2478 }
2479
2480
2481 ?>