includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Processes wiki markup
   8 #
   9 # There are two main entry points into the Parser class:
  10 # parse()
  11 #   produces HTML output
  12 # preSaveTransform().
  13 #   produces altered wiki markup.
  14 #
  15 # Globals used:
  16 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  17 #
  18 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  19 #
  20 # settings:
  21 #   $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #   $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #   $wgLocaltimezone
  24 #
  25 #   * only within ParserOptions
  26 #
  27 #----------------------------------------
  28 #    Variable substitution O(N^2) attack
  29 #-----------------------------------------
  30 # Without countermeasures, it would be possible to attack the parser by saving
  31 # a page filled with a large number of inclusions of large pages. The size of
  32 # the generated page would be proportional to the square of the input size.
  33 # Hence, we limit the number of inclusions of any given page, thus bringing any
  34 # attack back to O(N).
  35 define( 'MAX_INCLUDE_REPEAT', 100 );
  36 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
  37
  38 # Allowed values for $mOutputType
  39 define( 'OT_HTML', 1 );
  40 define( 'OT_WIKI', 2 );
  41 define( 'OT_MSG' , 3 );
  42
  43 # string parameter for extractTags which will cause it
  44 # to strip HTML comments in addition to regular
  45 # <XML>-style tags. This should not be anything we
  46 # may want to use in wikisyntax
  47 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  48
  49 # prefix for escaping, used in two functions at least
  50 define( 'UNIQ_PREFIX', 'NaodW29');
  51
  52 # Constants needed for external link processing
  53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  54 define( 'HTTP_PROTOCOLS', 'http|https' );
  55 # Everything except bracket, space, or control characters
  56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  58 # Including space
  59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  62 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  63 define( 'EXT_IMAGE_REGEX',
  64         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  65         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  66         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  67 );
  68
  69 class Parser
  70 {
  71         # Persistent:
  72         var $mTagHooks;
  73
  74         # Cleared with clearState():
  75         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  76         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  77
  78         # Temporary:
  79         var $mOptions, $mTitle, $mOutputType,
  80             $mTemplates,        // cache of already loaded templates, avoids
  81                                 // multiple SQL queries for the same string
  82             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  83                                 // in this path. Used for loop detection.
  84
  85         function Parser() {
  86                 $this->mTemplates = array();
  87                 $this->mTemplatePath = array();
  88                 $this->mTagHooks = array();
  89                 $this->clearState();
  90         }
  91
  92         function clearState() {
  93                 $this->mOutput = new ParserOutput;
  94                 $this->mAutonumber = 0;
  95                 $this->mLastSection = "";
  96                 $this->mDTopen = false;
  97                 $this->mVariables = false;
  98                 $this->mIncludeCount = array();
  99                 $this->mStripState = array();
 100                 $this->mArgStack = array();
 101                 $this->mInPre = false;
 102         }
 103
 104         # First pass--just handle <nowiki> sections, pass the rest off
 105         # to internalParse() which does all the real work.
 106         #
 107         # Returns a ParserOutput
 108         #
 109         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 110                 global $wgUseTidy;
 111                 $fname = 'Parser::parse';
 112                 wfProfileIn( $fname );
 113
 114                 if ( $clearState ) {
 115                         $this->clearState();
 116                 }
 117
 118                 $this->mOptions = $options;
 119                 $this->mTitle =& $title;
 120                 $this->mOutputType = OT_HTML;
 121
 122                 $stripState = NULL;
 123                 $text = $this->strip( $text, $this->mStripState );
 124                 $text = $this->internalParse( $text, $linestart );
 125                 $text = $this->unstrip( $text, $this->mStripState );
 126                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 127                 if(!$wgUseTidy) {
 128                         $fixtags = array(
 129                                 # french spaces, last one Guillemet-left
 130                                 # only if there is something before the space
 131                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 132                                 # french spaces, Guillemet-right
 133                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 134                                 '/<hr *>/i' => '<hr />',
 135                                 '/<br *>/i' => '<br />',
 136                                 '/<center *>/i' => '<div class="center">',
 137                                 '/<\\/center *>/i' => '</div>',
 138                                 # Clean up spare ampersands; note that we probably ought to be
 139                                 # more careful about named entities.
 140                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 141                         );
 142                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 143                 } else {
 144                         $fixtags = array(
 145                                 # french spaces, last one Guillemet-left
 146                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 147                                 # french spaces, Guillemet-right
 148                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 149                                 '/<center *>/i' => '<div class="center">',
 150                                 '/<\\/center *>/i' => '</div>'
 151                         );
 152                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 153                 }
 154                 # only once and last
 155                 $text = $this->doBlockLevels( $text, $linestart );
 156                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 157                 if($wgUseTidy) {
 158                         $text = $this->tidy($text);
 159                 }
 160                 $this->mOutput->setText( $text );
 161                 wfProfileOut( $fname );
 162                 return $this->mOutput;
 163         }
 164
 165         /* static */ function getRandomString() {
 166                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 167         }
 168
 169         # Replaces all occurrences of <$tag>content</$tag> in the text
 170         # with a random marker and returns the new text. the output parameter
 171         # $content will be an associative array filled with data on the form
 172         # $unique_marker => content.
 173
 174         # If $content is already set, the additional entries will be appended
 175
 176         # If $tag is set to STRIP_COMMENTS, the function will extract
 177         # <!-- HTML comments -->
 178
 179         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ''){
 180                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 181                 if ( !$content ) {
 182                         $content = array( );
 183                 }
 184                 $n = 1;
 185                 $stripped = '';
 186
 187                 while ( '' != $text ) {
 188                         if($tag==STRIP_COMMENTS) {
 189                                 $p = preg_split( '/<!--/i', $text, 2 );
 190                         } else {
 191                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 192                         }
 193                         $stripped .= $p[0];
 194                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 195                                 $text = '';
 196                         } else {
 197                                 if($tag==STRIP_COMMENTS) {
 198                                         $q = preg_split( '/-->/i', $p[1], 2 );
 199                                 } else {
 200                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 201                                 }
 202                                 $marker = $rnd . sprintf('%08X', $n++);
 203                                 $content[$marker] = $q[0];
 204                                 $stripped .= $marker;
 205                                 $text = $q[1];
 206                         }
 207                 }
 208                 return $stripped;
 209         }
 210
 211         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 212         # If $render is set, performs necessary rendering operations on plugins
 213         # Returns the text, and fills an array with data needed in unstrip()
 214         # If the $state is already a valid strip state, it adds to the state
 215
 216         # When $stripcomments is set, HTML comments <!-- like this -->
 217         # will be stripped in addition to other tags. This is important
 218         # for section editing, where these comments cause confusion when
 219         # counting the sections in the wikisource
 220         function strip( $text, &$state, $stripcomments = false ) {
 221                 $render = ($this->mOutputType == OT_HTML);
 222                 $html_content = array();
 223                 $nowiki_content = array();
 224                 $math_content = array();
 225                 $pre_content = array();
 226                 $comment_content = array();
 227                 $ext_content = array();
 228
 229                 # Replace any instances of the placeholders
 230                 $uniq_prefix = UNIQ_PREFIX;
 231                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 232
 233                 # html
 234                 global $wgRawHtml;
 235                 if( $wgRawHtml ) {
 236                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 237                         foreach( $html_content as $marker => $content ) {
 238                                 if ($render ) {
 239                                         # Raw and unchecked for validity.
 240                                         $html_content[$marker] = $content;
 241                                 } else {
 242                                         $html_content[$marker] = '<html>'.$content.'</html>';
 243                                 }
 244                         }
 245                 }
 246
 247                 # nowiki
 248                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 249                 foreach( $nowiki_content as $marker => $content ) {
 250                         if( $render ){
 251                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 252                         } else {
 253                                 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
 254                         }
 255                 }
 256
 257                 # math
 258                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 259                 foreach( $math_content as $marker => $content ){
 260                         if( $render ) {
 261                                 if( $this->mOptions->getUseTeX() ) {
 262                                         $math_content[$marker] = renderMath( $content );
 263                                 } else {
 264                                         $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
 265                                 }
 266                         } else {
 267                                 $math_content[$marker] = '<math>'.$content.'</math>';
 268                         }
 269                 }
 270
 271                 # pre
 272                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 273                 foreach( $pre_content as $marker => $content ){
 274                         if( $render ){
 275                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 276                         } else {
 277                                 $pre_content[$marker] = '<pre>'.$content.'</pre>';
 278                         }
 279                 }
 280
 281                 # Comments
 282                 if($stripcomments) {
 283                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 284                         foreach( $comment_content as $marker => $content ){
 285                                 $comment_content[$marker] = '<!--'.$content.'-->';
 286                         }
 287                 }
 288
 289                 # Extensions
 290                 foreach ( $this->mTagHooks as $tag => $callback ) {
 291                         $ext_contents[$tag] = array();
 292                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 293                         foreach( $ext_content[$tag] as $marker => $content ) {
 294                                 if ( $render ) {
 295                                         $ext_content[$tag][$marker] = $callback( $content );
 296                                 } else {
 297                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 298                                 }
 299                         }
 300                 }
 301
 302                 # Merge state with the pre-existing state, if there is one
 303                 if ( $state ) {
 304                         $state['html'] = $state['html'] + $html_content;
 305                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 306                         $state['math'] = $state['math'] + $math_content;
 307                         $state['pre'] = $state['pre'] + $pre_content;
 308                         $state['comment'] = $state['comment'] + $comment_content;
 309
 310                         foreach( $ext_content as $tag => $array ) {
 311                                 if ( array_key_exists( $tag, $state ) ) {
 312                                         $state[$tag] = $state[$tag] + $array;
 313                                 }
 314                         }
 315                 } else {
 316                         $state = array(
 317                           'html' => $html_content,
 318                           'nowiki' => $nowiki_content,
 319                           'math' => $math_content,
 320                           'pre' => $pre_content,
 321                           'comment' => $comment_content,
 322                         ) + $ext_content;
 323                 }
 324                 return $text;
 325         }
 326
 327         # always call unstripNoWiki() after this one
 328         function unstrip( $text, &$state ) {
 329                 # Must expand in reverse order, otherwise nested tags will be corrupted
 330                 $contentDict = end( $state );
 331                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 332                         if( key($state) != 'nowiki' && key($state) != 'html') {
 333                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 334                                         $text = str_replace( key( $contentDict ), $content, $text );
 335                                 }
 336                         }
 337                 }
 338
 339                 return $text;
 340         }
 341         # always call this after unstrip() to preserve the order
 342         function unstripNoWiki( $text, &$state ) {
 343                 # Must expand in reverse order, otherwise nested tags will be corrupted
 344                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 345                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 346                 }
 347
 348                 global $wgRawHtml;
 349                 if ($wgRawHtml) {
 350                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 351                                 $text = str_replace( key( $state['html'] ), $content, $text );
 352                         }
 353                 }
 354
 355                 return $text;
 356         }
 357
 358         # Add an item to the strip state
 359         # Returns the unique tag which must be inserted into the stripped text
 360         # The tag will be replaced with the original text in unstrip()
 361         function insertStripItem( $text, &$state ) {
 362                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 363                 if ( !$state ) {
 364                         $state = array(
 365                           'html' => array(),
 366                           'nowiki' => array(),
 367                           'math' => array(),
 368                           'pre' => array()
 369                         );
 370                 }
 371                 $state['item'][$rnd] = $text;
 372                 return $rnd;
 373         }
 374
 375         # Return allowed HTML attributes
 376         function getHTMLattrs () {
 377                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 378                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 379                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 380                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 381                                 /* FONT */ 'type', 'start', 'value', 'compact',
 382                                 /* For various lists, mostly deprecated but safe */
 383                                 'summary', 'width', 'border', 'frame', 'rules',
 384                                 'cellspacing', 'cellpadding', 'valign', 'char',
 385                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 386                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 387                                 'id', 'class', 'name', 'style' /* For CSS */
 388                                 );
 389                 return $htmlattrs ;
 390         }
 391
 392         # Remove non approved attributes and javascript in css
 393         function fixTagAttributes ( $t ) {
 394                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 395                 $htmlattrs = $this->getHTMLattrs() ;
 396
 397                 # Strip non-approved attributes from the tag
 398                 $t = preg_replace(
 399                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 400                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 401                         $t);
 402
 403                 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
 404
 405                 # Strip javascript "expression" from stylesheets. Brute force approach:
 406                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 407
 408                 if( preg_match(
 409                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 410                         wfMungeToUtf8( $t ) ) )
 411                 {
 412                         $t='';
 413                 }
 414
 415                 return trim ( $t ) ;
 416         }
 417
 418         # interface with html tidy, used if $wgUseTidy = true
 419         function tidy ( $text ) {
 420                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 421                 global $wgInputEncoding, $wgOutputEncoding;
 422                 $fname = 'Parser::tidy';
 423                 wfProfileIn( $fname );
 424
 425                 $cleansource = '';
 426                 switch(strtoupper($wgOutputEncoding)) {
 427                         case 'ISO-8859-1':
 428                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 429                                 break;
 430                         case 'UTF-8':
 431                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 432                                 break;
 433                         default:
 434                                 $wgTidyOpts .= ' -raw';
 435                         }
 436
 437                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 438 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 439 '<head><title>test</title></head><body>'.$text.'</body></html>';
 440                 $descriptorspec = array(
 441                         0 => array('pipe', 'r'),
 442                         1 => array('pipe', 'w'),
 443                         2 => array('file', '/dev/null', 'a')
 444                 );
 445                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 446                 if (is_resource($process)) {
 447                         fwrite($pipes[0], $wrappedtext);
 448                         fclose($pipes[0]);
 449                         while (!feof($pipes[1])) {
 450                                 $cleansource .= fgets($pipes[1], 1024);
 451                         }
 452                         fclose($pipes[1]);
 453                         $return_value = proc_close($process);
 454                 }
 455
 456                 wfProfileOut( $fname );
 457
 458                 if( $cleansource == '' && $text != '') {
 459                         wfDebug( "Tidy error detected!\n" );
 460                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 461                 } else {
 462                         return $cleansource;
 463                 }
 464         }
 465
 466         # parse the wiki syntax used to render tables
 467         function doTableStuff ( $t ) {
 468                 $fname = 'Parser::doTableStuff';
 469                 wfProfileIn( $fname );
 470
 471                 $t = explode ( "\n" , $t ) ;
 472                 $td = array () ; # Is currently a td tag open?
 473                 $ltd = array () ; # Was it TD or TH?
 474                 $tr = array () ; # Is currently a tr tag open?
 475                 $ltr = array () ; # tr attributes
 476                 $indent_level = 0; # indent level of the table
 477                 foreach ( $t AS $k => $x )
 478                 {
 479                         $x = trim ( $x ) ;
 480                         $fc = substr ( $x , 0 , 1 ) ;
 481                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 482                                 $indent_level = strlen( $matches[1] );
 483                                 $t[$k] = "\n" .
 484                                         str_repeat( '<dl><dd>', $indent_level ) .
 485                                         '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 486                                 array_push ( $td , false ) ;
 487                                 array_push ( $ltd , '' ) ;
 488                                 array_push ( $tr , false ) ;
 489                                 array_push ( $ltr , '' ) ;
 490                         }
 491                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 492                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 493                                 $z = "</table>\n" ;
 494                                 $l = array_pop ( $ltd ) ;
 495                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 496                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 497                                 array_pop ( $ltr ) ;
 498                                 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
 499                         }
 500                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 501                                 $x = substr ( $x , 1 ) ;
 502                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 503                                 $z = '' ;
 504                                 $l = array_pop ( $ltd ) ;
 505                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 506                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 507                                 array_pop ( $ltr ) ;
 508                                 $t[$k] = $z ;
 509                                 array_push ( $tr , false ) ;
 510                                 array_push ( $td , false ) ;
 511                                 array_push ( $ltd , '' ) ;
 512                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 513                         }
 514                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 515                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 516                                         $fc = '+' ;
 517                                         $x = substr ( $x , 1 ) ;
 518                                 }
 519                                 $after = substr ( $x , 1 ) ;
 520                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 521                                 $after = explode ( '||' , $after ) ;
 522                                 $t[$k] = '' ;
 523                                 foreach ( $after AS $theline )
 524                                 {
 525                                         $z = '' ;
 526                                         if ( $fc != '+' )
 527                                         {
 528                                                 $tra = array_pop ( $ltr ) ;
 529                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 530                                                 array_push ( $tr , true ) ;
 531                                                 array_push ( $ltr , '' ) ;
 532                                         }
 533
 534                                         $l = array_pop ( $ltd ) ;
 535                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 536                                         if ( $fc == '|' ) $l = 'td' ;
 537                                         else if ( $fc == '!' ) $l = 'th' ;
 538                                         else if ( $fc == '+' ) $l = 'caption' ;
 539                                         else $l = '' ;
 540                                         array_push ( $ltd , $l ) ;
 541                                         $y = explode ( '|' , $theline , 2 ) ;
 542                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 543                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 544                                         $t[$k] .= $y ;
 545                                         array_push ( $td , true ) ;
 546                                 }
 547                         }
 548                 }
 549
 550                 # Closing open td, tr && table
 551                 while ( count ( $td ) > 0 )
 552                 {
 553                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 554                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 555                         $t[] = '</table>' ;
 556                 }
 557
 558                 $t = implode ( "\n" , $t ) ;
 559                 #               $t = $this->removeHTMLtags( $t );
 560                 wfProfileOut( $fname );
 561                 return $t ;
 562         }
 563
 564         # Parses the text and adds the result to the strip state
 565         # Returns the strip tag
 566         function stripParse( $text, $newline, $args ) {
 567                 $text = $this->strip( $text, $this->mStripState );
 568                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 569                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 570         }
 571
 572         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 573                 $fname = 'Parser::internalParse';
 574                 wfProfileIn( $fname );
 575
 576                 $text = $this->removeHTMLtags( $text );
 577                 $text = $this->replaceVariables( $text, $args );
 578
 579                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 580
 581                 $text = $this->doHeadings( $text );
 582                 if($this->mOptions->getUseDynamicDates()) {
 583                         global $wgDateFormatter;
 584                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 585                 }
 586                 $text = $this->doAllQuotes( $text );
 587                 $text = $this->replaceExternalLinks( $text );
 588                 $text = $this->doMagicLinks( $text );
 589                 $text = $this->replaceInternalLinks ( $text );
 590                 $text = $this->replaceInternalLinks ( $text );
 591
 592                 $text = $this->unstrip( $text, $this->mStripState );
 593                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 594
 595                 $text = $this->doTableStuff( $text );
 596                 $text = $this->formatHeadings( $text, $isMain );
 597                 $sk =& $this->mOptions->getSkin();
 598                 $text = $sk->transformContent( $text );
 599
 600                 wfProfileOut( $fname );
 601                 return $text;
 602         }
 603
 604         /* private */ function &doMagicLinks( &$text ) {
 605                 global $wgUseGeoMode;
 606                 $text = $this->magicISBN( $text );
 607                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 608                         $text = $this->magicGEO( $text );
 609                 }
 610                 $text = $this->magicRFC( $text );
 611                 return $text;
 612         }
 613
 614         # Parse ^^ tokens and return html
 615         /* private */ function doExponent ( $text ) {
 616                 $fname = 'Parser::doExponent';
 617                 wfProfileIn( $fname);
 618                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 619                 wfProfileOut( $fname);
 620                 return $text;
 621         }
 622
 623         # Parse headers and return html
 624         /* private */ function doHeadings( $text ) {
 625                 $fname = 'Parser::doHeadings';
 626                 wfProfileIn( $fname );
 627                 for ( $i = 6; $i >= 1; --$i ) {
 628                         $h = substr( '======', 0, $i );
 629                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 630                           "<h{$i}>\\1</h{$i}>\\2", $text );
 631                 }
 632                 wfProfileOut( $fname );
 633                 return $text;
 634         }
 635
 636         /* private */ function doAllQuotes( $text ) {
 637                 $fname = 'Parser::doAllQuotes';
 638                 wfProfileIn( $fname );
 639                 $outtext = '';
 640                 $lines = explode( "\n", $text );
 641                 foreach ( $lines as $line ) {
 642                         $outtext .= $this->doQuotes ( $line ) . "\n";
 643                 }
 644                 $outtext = substr($outtext, 0,-1);
 645                 wfProfileOut( $fname );
 646                 return $outtext;
 647         }
 648
 649         /* private */ function doQuotes( $text ) {
 650                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 651                 if (count ($arr) == 1)
 652                         return $text;
 653                 else
 654                 {
 655                         # First, do some preliminary work. This may shift some apostrophes from
 656                         # being mark-up to being text. It also counts the number of occurrences
 657                         # of bold and italics mark-ups.
 658                         $i = 0;
 659                         $numbold = 0;
 660                         $numitalics = 0;
 661                         foreach ($arr as $r)
 662                         {
 663                                 if (($i % 2) == 1)
 664                                 {
 665                                         # If there are ever four apostrophes, assume the first is supposed to
 666                                         # be text, and the remaining three constitute mark-up for bold text.
 667                                         if (strlen ($arr[$i]) == 4)
 668                                         {
 669                                                 $arr[$i-1] .= "'";
 670                                                 $arr[$i] = "'''";
 671                                         }
 672                                         # If there are more than 5 apostrophes in a row, assume they're all
 673                                         # text except for the last 5.
 674                                         else if (strlen ($arr[$i]) > 5)
 675                                         {
 676                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 677                                                 $arr[$i] = "'''''";
 678                                         }
 679                                         # Count the number of occurrences of bold and italics mark-ups.
 680                                         # We are not counting sequences of five apostrophes.
 681                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 682                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 683                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 684                                 }
 685                                 $i++;
 686                         }
 687
 688                         # If there is an odd number of both bold and italics, it is likely
 689                         # that one of the bold ones was meant to be an apostrophe followed
 690                         # by italics. Which one we cannot know for certain, but it is more
 691                         # likely to be one that has a single-letter word before it.
 692                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 693                         {
 694                                 $i = 0;
 695                                 $firstsingleletterword = -1;
 696                                 $firstmultiletterword = -1;
 697                                 $firstspace = -1;
 698                                 foreach ($arr as $r)
 699                                 {
 700                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 701                                         {
 702                                                 $x1 = substr ($arr[$i-1], -1);
 703                                                 $x2 = substr ($arr[$i-1], -2, 1);
 704                                                 if ($x1 == ' ') {
 705                                                         if ($firstspace == -1) $firstspace = $i;
 706                                                 } else if ($x2 == ' ') {
 707                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 708                                                 } else {
 709                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 710                                                 }
 711                                         }
 712                                         $i++;
 713                                 }
 714
 715                                 # If there is a single-letter word, use it!
 716                                 if ($firstsingleletterword > -1)
 717                                 {
 718                                         $arr [ $firstsingleletterword ] = "''";
 719                                         $arr [ $firstsingleletterword-1 ] .= "'";
 720                                 }
 721                                 # If not, but there's a multi-letter word, use that one.
 722                                 else if ($firstmultiletterword > -1)
 723                                 {
 724                                         $arr [ $firstmultiletterword ] = "''";
 725                                         $arr [ $firstmultiletterword-1 ] .= "'";
 726                                 }
 727                                 # ... otherwise use the first one that has neither.
 728                                 # (notice that it is possible for all three to be -1 if, for example,
 729                                 # there is only one pentuple-apostrophe in the line)
 730                                 else if ($firstspace > -1)
 731                                 {
 732                                         $arr [ $firstspace ] = "''";
 733                                         $arr [ $firstspace-1 ] .= "'";
 734                                 }
 735                         }
 736
 737                         # Now let's actually convert our apostrophic mush to HTML!
 738                         $output = '';
 739                         $buffer = '';
 740                         $state = '';
 741                         $i = 0;
 742                         foreach ($arr as $r)
 743                         {
 744                                 if (($i % 2) == 0)
 745                                 {
 746                                         if ($state == 'both')
 747                                                 $buffer .= $r;
 748                                         else
 749                                                 $output .= $r;
 750                                 }
 751                                 else
 752                                 {
 753                                         if (strlen ($r) == 2)
 754                                         {
 755                                                 if ($state == 'em')
 756                                                 { $output .= '</em>'; $state = ''; }
 757                                                 else if ($state == 'strongem')
 758                                                 { $output .= '</em>'; $state = 'strong'; }
 759                                                 else if ($state == 'emstrong')
 760                                                 { $output .= '</strong></em><strong>'; $state = 'strong'; }
 761                                                 else if ($state == 'both')
 762                                                 { $output .= '<strong><em>'.$buffer.'</em>'; $state = 'strong'; }
 763                                                 else # $state can be 'strong' or ''
 764                                                 { $output .= '<em>'; $state .= 'em'; }
 765                                         }
 766                                         else if (strlen ($r) == 3)
 767                                         {
 768                                                 if ($state == 'strong')
 769                                                 { $output .= '</strong>'; $state = ''; }
 770                                                 else if ($state == 'strongem')
 771                                                 { $output .= '</em></strong><em>'; $state = 'em'; }
 772                                                 else if ($state == 'emstrong')
 773                                                 { $output .= '</strong>'; $state = 'em'; }
 774                                                 else if ($state == 'both')
 775                                                 { $output .= '<em><strong>'.$buffer.'</strong>'; $state = 'em'; }
 776                                                 else # $state can be 'em' or ''
 777                                                 { $output .= '<strong>'; $state .= 'strong'; }
 778                                         }
 779                                         else if (strlen ($r) == 5)
 780                                         {
 781                                                 if ($state == 'strong')
 782                                                 { $output .= '</strong><em>'; $state = 'em'; }
 783                                                 else if ($state == 'em')
 784                                                 { $output .= '</em><strong>'; $state = 'strong'; }
 785                                                 else if ($state == 'strongem')
 786                                                 { $output .= '</em></strong>'; $state = ''; }
 787                                                 else if ($state == 'emstrong')
 788                                                 { $output .= '</strong></em>'; $state = ''; }
 789                                                 else if ($state == 'both')
 790                                                 { $output .= '<em><strong>'.$buffer.'</strong></em>'; $state = ''; }
 791                                                 else # ($state == '')
 792                                                 { $buffer = ''; $state = 'both'; }
 793                                         }
 794                                 }
 795                                 $i++;
 796                         }
 797                         # Now close all remaining tags.  Notice that the order is important.
 798                         if ($state == 'strong' || $state == 'emstrong')
 799                                 $output .= '</strong>';
 800                         if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
 801                                 $output .= '</em>';
 802                         if ($state == 'strongem')
 803                                 $output .= '</strong>';
 804                         if ($state == 'both')
 805                                 $output .= '<strong><em>'.$buffer.'</em></strong>';
 806                         return $output;
 807                 }
 808         }
 809
 810         # Note: we have to do external links before the internal ones,
 811         # and otherwise take great care in the order of things here, so
 812         # that we don't end up interpreting some URLs twice.
 813
 814         /* private */ function replaceExternalLinks( $text ) {
 815                 $fname = 'Parser::replaceExternalLinks';
 816                 wfProfileIn( $fname );
 817
 818                 $sk =& $this->mOptions->getSkin();
 819                 $linktrail = wfMsg('linktrail');
 820                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 821
 822                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 823
 824                 $i = 0;
 825                 while ( $i<count( $bits ) ) {
 826                         $url = $bits[$i++];
 827                         $protocol = $bits[$i++];
 828                         $text = $bits[$i++];
 829                         $trail = $bits[$i++];
 830
 831                         # If the link text is an image URL, replace it with an <img> tag
 832                         # This happened by accident in the original parser, but some people used it extensively
 833                         $img = $this->maybeMakeImageLink( $text );
 834                         if ( $img !== false ) {
 835                                 $text = $img;
 836                         }
 837
 838                         $dtrail = '';
 839
 840                         # No link text, e.g. [http://domain.tld/some.link]
 841                         if ( $text == '' ) {
 842                                 # Autonumber if allowed
 843                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 844                                         $text = '[' . ++$this->mAutonumber . ']';
 845                                 } else {
 846                                         # Otherwise just use the URL
 847                                         $text = htmlspecialchars( $url );
 848                                 }
 849                         } else {
 850                                 # Have link text, e.g. [http://domain.tld/some.link text]s
 851                                 # Check for trail
 852                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
 853                                         $dtrail = $m2[1];
 854                                         $trail = $m2[2];
 855                                 }
 856                         }
 857
 858                         $encUrl = htmlspecialchars( $url );
 859                         # Bit in parentheses showing the URL for the printable version
 860                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
 861                                 $paren = '';
 862                         } else {
 863                                 # Expand the URL for printable version
 864                                 if ( ! $sk->suppressUrlExpansion() ) {
 865                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
 866                                 } else {
 867                                         $paren = '';
 868                                 }
 869                         }
 870
 871                         # Process the trail (i.e. everything after this link up until start of the next link),
 872                         # replacing any non-bracketed links
 873                         $trail = $this->replaceFreeExternalLinks( $trail );
 874
 875                         $la = $sk->getExternalLinkAttributes( $url, $text );
 876
 877                         # Use the encoded URL
 878                         # This means that users can paste URLs directly into the text
 879                         # Funny characters like &ouml; aren't valid in URLs anyway
 880                         # This was changed in August 2004
 881                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
 882                 }
 883
 884                 wfProfileOut( $fname );
 885                 return $s;
 886         }
 887
 888         # Replace anything that looks like a URL with a link
 889         function replaceFreeExternalLinks( $text ) {
 890                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 891                 $s = array_shift( $bits );
 892                 $i = 0;
 893
 894                 $sk =& $this->mOptions->getSkin();
 895
 896                 while ( $i < count( $bits ) ){
 897                         $protocol = $bits[$i++];
 898                         $remainder = $bits[$i++];
 899
 900                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
 901                                 # Found some characters after the protocol that look promising
 902                                 $url = $protocol . $m[1];
 903                                 $trail = $m[2];
 904
 905                                 # Move trailing punctuation to $trail
 906                                 $sep = ',;\.:!?';
 907                                 # If there is no left bracket, then consider right brackets fair game too
 908                                 if ( strpos( $url, '(' ) === false ) {
 909                                         $sep .= ')';
 910                                 }
 911
 912                                 $numSepChars = strspn( strrev( $url ), $sep );
 913                                 if ( $numSepChars ) {
 914                                         $trail = substr( $url, -$numSepChars ) . $trail;
 915                                         $url = substr( $url, 0, -$numSepChars );
 916                                 }
 917
 918                                 # Replace &amp; from obsolete syntax with &
 919                                 $url = str_replace( '&amp;', '&', $url );
 920
 921                                 # Is this an external image?
 922                                 $text = $this->maybeMakeImageLink( $url );
 923                                 if ( $text === false ) {
 924                                         # Not an image, make a link
 925                                         $text = $sk->makeExternalLink( $url, $url );
 926                                 }
 927                                 $s .= $text . $trail;
 928                         } else {
 929                                 $s .= $protocol . $remainder;
 930                         }
 931                 }
 932                 return $s;
 933         }
 934
 935         # make an image if it's allowed
 936         function maybeMakeImageLink( $url ) {
 937                 $sk =& $this->mOptions->getSkin();
 938                 $text = false;
 939                 if ( $this->mOptions->getAllowExternalImages() ) {
 940                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
 941                                 # Image found
 942                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
 943                         }
 944                 }
 945                 return $text;
 946         }
 947
 948         # The wikilinks [[ ]] are procedeed here.
 949         /* private */ function replaceInternalLinks( $s ) {
 950                 global $wgLang, $wgLinkCache;
 951                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 952                 static $fname = 'Parser::replaceInternalLinks' ;
 953                 wfProfileIn( $fname );
 954
 955                 wfProfileIn( $fname.'-setup' );
 956                 static $tc = FALSE;
 957                 # the % is needed to support urlencoded titles as well
 958                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
 959                 $sk =& $this->mOptions->getSkin();
 960
 961                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
 962
 963                 $a = explode( '[[', ' ' . $s );
 964                 $s = array_shift( $a );
 965                 $s = substr( $s, 1 );
 966
 967                 # Match a link having the form [[namespace:link|alternate]]trail
 968                 static $e1 = FALSE;
 969                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 970                 # Match the end of a line for a word that's not followed by whitespace,
 971                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 972                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
 973
 974                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
 975                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 976
 977                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 978
 979                 if ( $useLinkPrefixExtension ) {
 980                         if ( preg_match( $e2, $s, $m ) ) {
 981                                 $first_prefix = $m[2];
 982                                 $s = $m[1];
 983                         } else {
 984                                 $first_prefix = false;
 985                         }
 986                 } else {
 987                         $prefix = '';
 988                 }
 989
 990                 wfProfileOut( $fname.'-setup' );
 991
 992                 # start procedeeding each line
 993                 foreach ( $a as $line ) {
 994                         wfProfileIn( $fname.'-prefixhandling' );
 995                         if ( $useLinkPrefixExtension ) {
 996                                 if ( preg_match( $e2, $s, $m ) ) {
 997                                         $prefix = $m[2];
 998                                         $s = $m[1];
 999                                 } else {
1000                                         $prefix='';
1001                                 }
1002                                 # first link
1003                                 if($first_prefix) {
1004                                         $prefix = $first_prefix;
1005                                         $first_prefix = false;
1006                                 }
1007                         }
1008                         wfProfileOut( $fname.'-prefixhandling' );
1009
1010                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1011                                 $text = $m[2];
1012                                 # fix up urlencoded title texts
1013                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1014                                 $trail = $m[3];
1015                         } else { # Invalid form; output directly
1016                                 $s .= $prefix . '[[' . $line ;
1017                                 continue;
1018                         }
1019
1020                         # Valid link forms:
1021                         # Foobar -- normal
1022                         # :Foobar -- override special treatment of prefix (images, language links)
1023                         # /Foobar -- convert to CurrentPage/Foobar
1024                         # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1025
1026                         # Look at the first character
1027                         $c = substr($m[1],0,1);
1028                         $noforce = ($c != ':');
1029
1030                         # subpage
1031                         if( $c == '/' ) {
1032                                 # / at end means we don't want the slash to be shown
1033                                 if(substr($m[1],-1,1)=='/') {
1034                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1035                                         $noslash=$m[1];
1036                                 } else {
1037                                         $noslash=substr($m[1],1);
1038                                 }
1039
1040                                 # Some namespaces don't allow subpages
1041                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1042                                         # subpages allowed here
1043                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1044                                         if( '' == $text ) {
1045                                                 $text= $m[1];
1046                                         } # this might be changed for ugliness reasons
1047                                 } else {
1048                                         # no subpage allowed, use standard link
1049                                         $link = $noslash;
1050                                 }
1051
1052                         } elseif( $noforce ) { # no subpage
1053                                 $link = $m[1];
1054                         } else {
1055                                 # We don't want to keep the first character
1056                                 $link = substr( $m[1], 1 );
1057                         }
1058
1059                         $wasblank = ( '' == $text );
1060                         if( $wasblank ) $text = $link;
1061
1062                         $nt = Title::newFromText( $link );
1063                         if( !$nt ) {
1064                                 $s .= $prefix . '[[' . $line;
1065                                 continue;
1066                         }
1067
1068                         $ns = $nt->getNamespace();
1069                         $iw = $nt->getInterWiki();
1070
1071                         # Link not escaped by : , create the various objects
1072                         if( $noforce ) {
1073
1074                                 # Interwikis
1075                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1076                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1077                                         $tmp = $prefix . $trail ;
1078                                         $s .= (trim($tmp) == '')? '': $tmp;
1079                                         continue;
1080                                 }
1081
1082                                 if ( $ns == NS_IMAGE ) {
1083                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1084                                         $wgLinkCache->addImageLinkObj( $nt );
1085                                         continue;
1086                                 }
1087
1088                                 if ( $ns == NS_CATEGORY ) {
1089                                         $t = $nt->getText() ;
1090                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1091
1092                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1093                                         $pPLC=$sk->postParseLinkColour();
1094                                         $sk->postParseLinkColour( false );
1095                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1096                                         $sk->postParseLinkColour( $pPLC );
1097                                         $wgLinkCache->resume();
1098
1099                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1100                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1101                                         $this->mOutput->mCategoryLinks[] = $t ;
1102                                         $s .= $prefix . $trail ;
1103                                         continue;
1104                                 }
1105                         }
1106
1107                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1108                             ( strpos( $link, '#' ) == FALSE ) ) {
1109                                 # Self-links are handled specially; generally de-link and change to bold.
1110                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1111                                 continue;
1112                         }
1113
1114                         if( $ns == NS_MEDIA ) {
1115                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1116                                 $wgLinkCache->addImageLinkObj( $nt );
1117                                 continue;
1118                         } elseif( $ns == NS_SPECIAL ) {
1119                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1120                                 continue;
1121                         }
1122                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1123                 }
1124                 wfProfileOut( $fname );
1125                 return $s;
1126         }
1127
1128         # Some functions here used by doBlockLevels()
1129         #
1130         /* private */ function closeParagraph() {
1131                 $result = '';
1132                 if ( '' != $this->mLastSection ) {
1133                         $result = '</' . $this->mLastSection  . ">\n";
1134                 }
1135                 $this->mInPre = false;
1136                 $this->mLastSection = '';
1137                 return $result;
1138         }
1139         # getCommon() returns the length of the longest common substring
1140         # of both arguments, starting at the beginning of both.
1141         #
1142         /* private */ function getCommon( $st1, $st2 ) {
1143                 $fl = strlen( $st1 );
1144                 $shorter = strlen( $st2 );
1145                 if ( $fl < $shorter ) { $shorter = $fl; }
1146
1147                 for ( $i = 0; $i < $shorter; ++$i ) {
1148                         if ( $st1{$i} != $st2{$i} ) { break; }
1149                 }
1150                 return $i;
1151         }
1152         # These next three functions open, continue, and close the list
1153         # element appropriate to the prefix character passed into them.
1154         #
1155         /* private */ function openList( $char ) {
1156                 $result = $this->closeParagraph();
1157
1158                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1159                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1160                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1161                 else if ( ';' == $char ) {
1162                         $result .= '<dl><dt>';
1163                         $this->mDTopen = true;
1164                 }
1165                 else { $result = '<!-- ERR 1 -->'; }
1166
1167                 return $result;
1168         }
1169
1170         /* private */ function nextItem( $char ) {
1171                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1172                 else if ( ':' == $char || ';' == $char ) {
1173                         $close = '</dd>';
1174                         if ( $this->mDTopen ) { $close = '</dt>'; }
1175                         if ( ';' == $char ) {
1176                                 $this->mDTopen = true;
1177                                 return $close . '<dt>';
1178                         } else {
1179                                 $this->mDTopen = false;
1180                                 return $close . '<dd>';
1181                         }
1182                 }
1183                 return '<!-- ERR 2 -->';
1184         }
1185
1186         /* private */ function closeList( $char ) {
1187                 if ( '*' == $char ) { $text = '</li></ul>'; }
1188                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1189                 else if ( ':' == $char ) {
1190                         if ( $this->mDTopen ) {
1191                                 $this->mDTopen = false;
1192                                 $text = '</dt></dl>';
1193                         } else {
1194                                 $text = '</dd></dl>';
1195                         }
1196                 }
1197                 else {  return '<!-- ERR 3 -->'; }
1198                 return $text."\n";
1199         }
1200
1201         /* private */ function doBlockLevels( $text, $linestart ) {
1202                 $fname = 'Parser::doBlockLevels';
1203                 wfProfileIn( $fname );
1204
1205                 # Parsing through the text line by line.  The main thing
1206                 # happening here is handling of block-level elements p, pre,
1207                 # and making lists from lines starting with * # : etc.
1208                 #
1209                 $textLines = explode( "\n", $text );
1210
1211                 $lastPrefix = $output = $lastLine = '';
1212                 $this->mDTopen = $inBlockElem = false;
1213                 $prefixLength = 0;
1214                 $paragraphStack = false;
1215
1216                 if ( !$linestart ) {
1217                         $output .= array_shift( $textLines );
1218                 }
1219                 foreach ( $textLines as $oLine ) {
1220                         $lastPrefixLength = strlen( $lastPrefix );
1221                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1222                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1223                         if ( !$this->mInPre ) {
1224                                 # Multiple prefixes may abut each other for nested lists.
1225                                 $prefixLength = strspn( $oLine, '*#:;' );
1226                                 $pref = substr( $oLine, 0, $prefixLength );
1227
1228                                 # eh?
1229                                 $pref2 = str_replace( ';', ':', $pref );
1230                                 $t = substr( $oLine, $prefixLength );
1231                                 $this->mInPre = !empty($preOpenMatch);
1232                         } else {
1233                                 # Don't interpret any other prefixes in preformatted text
1234                                 $prefixLength = 0;
1235                                 $pref = $pref2 = '';
1236                                 $t = $oLine;
1237                         }
1238
1239                         # List generation
1240                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1241                                 # Same as the last item, so no need to deal with nesting or opening stuff
1242                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1243                                 $paragraphStack = false;
1244
1245                                 if ( substr( $pref, -1 ) == ';') {
1246                                         # The one nasty exception: definition lists work like this:
1247                                         # ; title : definition text
1248                                         # So we check for : in the remainder text to split up the
1249                                         # title and definition, without b0rking links.
1250                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1251                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1252                                                 $term = $match[1];
1253                                                 $output .= $term . $this->nextItem( ':' );
1254                                                 $t = $match[2];
1255                                         }
1256                                 }
1257                         } elseif( $prefixLength || $lastPrefixLength ) {
1258                                 # Either open or close a level...
1259                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1260                                 $paragraphStack = false;
1261
1262                                 while( $commonPrefixLength < $lastPrefixLength ) {
1263                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1264                                         --$lastPrefixLength;
1265                                 }
1266                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1267                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1268                                 }
1269                                 while ( $prefixLength > $commonPrefixLength ) {
1270                                         $char = substr( $pref, $commonPrefixLength, 1 );
1271                                         $output .= $this->openList( $char );
1272
1273                                         if ( ';' == $char ) {
1274                                                 # FIXME: This is dupe of code above
1275                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1276                                                         $term = $match[1];
1277                                                         $output .= $term . $this->nextItem( ':' );
1278                                                         $t = $match[2];
1279                                                 }
1280                                         }
1281                                         ++$commonPrefixLength;
1282                                 }
1283                                 $lastPrefix = $pref2;
1284                         }
1285                         if( 0 == $prefixLength ) {
1286                                 # No prefix (not in list)--go to paragraph mode
1287                                 $uniq_prefix = UNIQ_PREFIX;
1288                                 // XXX: use a stack for nestable elements like span, table and div
1289                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1290                                 $closematch = preg_match(
1291                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1292                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1293                                 if ( $openmatch or $closematch ) {
1294                                         $paragraphStack = false;
1295                                         $output .= $this->closeParagraph();
1296                                         if($preOpenMatch and !$preCloseMatch) {
1297                                                 $this->mInPre = true;
1298                                         }
1299                                         if ( $closematch ) {
1300                                                 $inBlockElem = false;
1301                                         } else {
1302                                                 $inBlockElem = true;
1303                                         }
1304                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1305                                         if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1306                                                 // pre
1307                                                 if ($this->mLastSection != 'pre') {
1308                                                         $paragraphStack = false;
1309                                                         $output .= $this->closeParagraph().'<pre>';
1310                                                         $this->mLastSection = 'pre';
1311                                                 }
1312                                         } else {
1313                                                 // paragraph
1314                                                 if ( '' == trim($t) ) {
1315                                                         if ( $paragraphStack ) {
1316                                                                 $output .= $paragraphStack.'<br />';
1317                                                                 $paragraphStack = false;
1318                                                                 $this->mLastSection = 'p';
1319                                                         } else {
1320                                                                 if ($this->mLastSection != 'p' ) {
1321                                                                         $output .= $this->closeParagraph();
1322                                                                         $this->mLastSection = '';
1323                                                                         $paragraphStack = '<p>';
1324                                                                 } else {
1325                                                                         $paragraphStack = '</p><p>';
1326                                                                 }
1327                                                         }
1328                                                 } else {
1329                                                         if ( $paragraphStack ) {
1330                                                                 $output .= $paragraphStack;
1331                                                                 $paragraphStack = false;
1332                                                                 $this->mLastSection = 'p';
1333                                                         } else if ($this->mLastSection != 'p') {
1334                                                                 $output .= $this->closeParagraph().'<p>';
1335                                                                 $this->mLastSection = 'p';
1336                                                         }
1337                                                 }
1338                                         }
1339                                 }
1340                         }
1341                         if ($paragraphStack === false) {
1342                                 $output .= $t."\n";
1343                         }
1344                 }
1345                 while ( $prefixLength ) {
1346                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1347                         --$prefixLength;
1348                 }
1349                 if ( '' != $this->mLastSection ) {
1350                         $output .= '</' . $this->mLastSection . '>';
1351                         $this->mLastSection = '';
1352                 }
1353
1354                 wfProfileOut( $fname );
1355                 return $output;
1356         }
1357
1358         # Return value of a magic variable (like PAGENAME)
1359         function getVariableValue( $index ) {
1360                 global $wgLang, $wgSitename, $wgServer;
1361
1362                 switch ( $index ) {
1363                         case MAG_CURRENTMONTH:
1364                                 return $wgLang->formatNum( date( 'm' ) );
1365                         case MAG_CURRENTMONTHNAME:
1366                                 return $wgLang->getMonthName( date('n') );
1367                         case MAG_CURRENTMONTHNAMEGEN:
1368                                 return $wgLang->getMonthNameGen( date('n') );
1369                         case MAG_CURRENTDAY:
1370                                 return $wgLang->formatNum( date('j') );
1371                         case MAG_PAGENAME:
1372                                 return $this->mTitle->getText();
1373                         case MAG_PAGENAMEE:
1374                                 return $this->mTitle->getPartialURL();
1375                         case MAG_NAMESPACE:
1376                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1377                                 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1378                         case MAG_CURRENTDAYNAME:
1379                                 return $wgLang->getWeekdayName( date('w')+1 );
1380                         case MAG_CURRENTYEAR:
1381                                 return $wgLang->formatNum( date( 'Y' ) );
1382                         case MAG_CURRENTTIME:
1383                                 return $wgLang->time( wfTimestampNow(), false );
1384                         case MAG_NUMBEROFARTICLES:
1385                                 return $wgLang->formatNum( wfNumberOfArticles() );
1386                         case MAG_SITENAME:
1387                                 return $wgSitename;
1388                         case MAG_SERVER:
1389                                 return $wgServer;
1390                         default:
1391                                 return NULL;
1392                 }
1393         }
1394
1395         # initialise the magic variables (like CURRENTMONTHNAME)
1396         function initialiseVariables() {
1397                 global $wgVariableIDs;
1398                 $this->mVariables = array();
1399                 foreach ( $wgVariableIDs as $id ) {
1400                         $mw =& MagicWord::get( $id );
1401                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1402                 }
1403         }
1404
1405         /* private */ function replaceVariables( $text, $args = array() ) {
1406                 global $wgLang, $wgScript, $wgArticlePath;
1407
1408                 # Prevent too big inclusions
1409                 if(strlen($text)> MAX_INCLUDE_SIZE)
1410                 return $text;
1411
1412                 $fname = 'Parser::replaceVariables';
1413                 wfProfileIn( $fname );
1414
1415                 $bail = false;
1416                 $titleChars = Title::legalChars();
1417                 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1418
1419                 # This function is called recursively. To keep track of arguments we need a stack:
1420                 array_push( $this->mArgStack, $args );
1421
1422                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1423                 $GLOBALS['wgCurParser'] =& $this;
1424
1425                 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_MSG ) {
1426                         # Variable substitution
1427                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1428                 }
1429
1430                 if ( $this->mOutputType == OT_HTML ) {
1431                         # Argument substitution
1432                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1433                 }
1434                 # Template substitution
1435                 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1436                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1437
1438                 array_pop( $this->mArgStack );
1439
1440                 wfProfileOut( $fname );
1441                 return $text;
1442         }
1443
1444         function variableSubstitution( $matches ) {
1445                 if ( !$this->mVariables ) {
1446                         $this->initialiseVariables();
1447                 }
1448                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1449                         $text = $this->mVariables[$matches[1]];
1450                         $this->mOutput->mContainsOldMagic = true;
1451                 } else {
1452                         $text = $matches[0];
1453                 }
1454                 return $text;
1455         }
1456
1457         # Split template arguments
1458         function getTemplateArgs( $argsString ) {
1459                 if ( $argsString === '' ) {
1460                         return array();
1461                 }
1462
1463                 $args = explode( '|', substr( $argsString, 1 ) );
1464
1465                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1466                 # merged with the next arg because the '|' character between belongs
1467                 # to the link syntax and not the template parameter syntax.
1468                 $argc = count($args);
1469                 $i = 0;
1470                 for ( $i = 0; $i < $argc-1; $i++ ) {
1471                         if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1472                                 $args[$i] .= '|'.$args[$i+1];
1473                                 array_splice($args, $i+1, 1);
1474                                 $i--;
1475                                 $argc--;
1476                         }
1477                 }
1478
1479                 return $args;
1480         }
1481
1482         function braceSubstitution( $matches ) {
1483                 global $wgLinkCache, $wgLang;
1484                 $fname = 'Parser::braceSubstitution';
1485                 $found = false;
1486                 $nowiki = false;
1487                 $noparse = false;
1488
1489                 $title = NULL;
1490
1491                 # $newline is an optional newline character before the braces
1492                 # $part1 is the bit before the first |, and must contain only title characters
1493                 # $args is a list of arguments, starting from index 0, not including $part1
1494
1495                 $newline = $matches[1];
1496                 $part1 = $matches[2];
1497                 # If the third subpattern matched anything, it will start with |
1498
1499                 $args = $this->getTemplateArgs($matches[3]);
1500                 $argc = count( $args );
1501
1502                 # {{{}}}
1503                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1504                         $text = $matches[0];
1505                         $found = true;
1506                         $noparse = true;
1507                 }
1508
1509                 # SUBST
1510                 if ( !$found ) {
1511                         $mwSubst =& MagicWord::get( MAG_SUBST );
1512                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1513                                 if ( $this->mOutputType != OT_WIKI ) {
1514                                         # Invalid SUBST not replaced at PST time
1515                                         # Return without further processing
1516                                         $text = $matches[0];
1517                                         $found = true;
1518                                         $noparse= true;
1519                                 }
1520                         } elseif ( $this->mOutputType == OT_WIKI ) {
1521                                 # SUBST not found in PST pass, do nothing
1522                                 $text = $matches[0];
1523                                 $found = true;
1524                         }
1525                 }
1526
1527                 # MSG, MSGNW and INT
1528                 if ( !$found ) {
1529                         # Check for MSGNW:
1530                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1531                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1532                                 $nowiki = true;
1533                         } else {
1534                                 # Remove obsolete MSG:
1535                                 $mwMsg =& MagicWord::get( MAG_MSG );
1536                                 $mwMsg->matchStartAndRemove( $part1 );
1537                         }
1538
1539                         # Check if it is an internal message
1540                         $mwInt =& MagicWord::get( MAG_INT );
1541                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1542                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1543                                         $text = wfMsgReal( $part1, $args, true );
1544                                         $found = true;
1545                                 }
1546                         }
1547                 }
1548
1549                 # NS
1550                 if ( !$found ) {
1551                         # Check for NS: (namespace expansion)
1552                         $mwNs = MagicWord::get( MAG_NS );
1553                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1554                                 if ( intval( $part1 ) ) {
1555                                         $text = $wgLang->getNsText( intval( $part1 ) );
1556                                         $found = true;
1557                                 } else {
1558                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1559                                         if ( !is_null( $index ) ) {
1560                                                 $text = $wgLang->getNsText( $index );
1561                                                 $found = true;
1562                                         }
1563                                 }
1564                         }
1565                 }
1566
1567                 # LOCALURL and LOCALURLE
1568                 if ( !$found ) {
1569                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1570                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1571
1572                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1573                                 $func = 'getLocalURL';
1574                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1575                                 $func = 'escapeLocalURL';
1576                         } else {
1577                                 $func = '';
1578                         }
1579
1580                         if ( $func !== '' ) {
1581                                 $title = Title::newFromText( $part1 );
1582                                 if ( !is_null( $title ) ) {
1583                                         if ( $argc > 0 ) {
1584                                                 $text = $title->$func( $args[0] );
1585                                         } else {
1586                                                 $text = $title->$func();
1587                                         }
1588                                         $found = true;
1589                                 }
1590                         }
1591                 }
1592
1593                 # Internal variables
1594                 if ( !$this->mVariables ) {
1595                         $this->initialiseVariables();
1596                 }
1597                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1598                         $text = $this->mVariables[$part1];
1599                         $found = true;
1600                         $this->mOutput->mContainsOldMagic = true;
1601                 }
1602
1603                 # GRAMMAR
1604                 if ( !$found && $argc == 1 ) {
1605                         $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1606                         if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1607                                 $text = $wgLang->convertGrammar( $args[0], $part1 );
1608                                 $found = true;
1609                         }
1610                 }
1611
1612                 # Template table test
1613
1614                 # Did we encounter this template already? If yes, it is in the cache
1615                 # and we need to check for loops.
1616                 if ( isset( $this->mTemplates[$part1] ) ) {
1617                         # Infinite loop test
1618                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1619                                 $noparse = true;
1620                                 $found = true;
1621                         }
1622                         # set $text to cached message.
1623                         $text = $this->mTemplates[$part1];
1624                         $found = true;
1625                 }
1626
1627                 # Load from database
1628                 if ( !$found ) {
1629                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1630                         if ( !is_null( $title ) && !$title->isExternal() ) {
1631                                 # Check for excessive inclusion
1632                                 $dbk = $title->getPrefixedDBkey();
1633                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1634                                         # This should never be reached.
1635                                         $article = new Article( $title );
1636                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1637                                         if ( $articleContent !== false ) {
1638                                                 $found = true;
1639                                                 $text = $articleContent;
1640                                         }
1641                                 }
1642
1643                                 # If the title is valid but undisplayable, make a link to it
1644                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1645                                         $text = '[['.$title->getPrefixedText().']]';
1646                                         $found = true;
1647                                 }
1648
1649                                 # Template cache array insertion
1650                                 $this->mTemplates[$part1] = $text;
1651                         }
1652                 }
1653
1654                 # Recursive parsing, escaping and link table handling
1655                 # Only for HTML output
1656                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1657                         $text = wfEscapeWikiText( $text );
1658                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1659                         # Clean up argument array
1660                         $assocArgs = array();
1661                         $index = 1;
1662                         foreach( $args as $arg ) {
1663                                 $eqpos = strpos( $arg, '=' );
1664                                 if ( $eqpos === false ) {
1665                                         $assocArgs[$index++] = $arg;
1666                                 } else {
1667                                         $name = trim( substr( $arg, 0, $eqpos ) );
1668                                         $value = trim( substr( $arg, $eqpos+1 ) );
1669                                         if ( $value === false ) {
1670                                                 $value = '';
1671                                         }
1672                                         if ( $name !== false ) {
1673                                                 $assocArgs[$name] = $value;
1674                                         }
1675                                 }
1676                         }
1677
1678                         # Do not enter included links in link table
1679                         if ( !is_null( $title ) ) {
1680                                 $wgLinkCache->suspend();
1681                         }
1682
1683                         # Add a new element to the templace recursion path
1684                         $this->mTemplatePath[$part1] = 1;
1685
1686                         $text = $this->stripParse( $text, $newline, $assocArgs );
1687
1688                         # Resume the link cache and register the inclusion as a link
1689                         if ( !is_null( $title ) ) {
1690                                 $wgLinkCache->resume();
1691                                 $wgLinkCache->addLinkObj( $title );
1692                         }
1693                 }
1694
1695                 # Empties the template path
1696                 $this->mTemplatePath = array();
1697
1698                 if ( !$found ) {
1699                         return $matches[0];
1700                 } else {
1701                         return $text;
1702                 }
1703         }
1704
1705         # Triple brace replacement -- used for template arguments
1706         function argSubstitution( $matches ) {
1707                 $newline = $matches[1];
1708                 $arg = trim( $matches[2] );
1709                 $text = $matches[0];
1710                 $inputArgs = end( $this->mArgStack );
1711
1712                 if ( array_key_exists( $arg, $inputArgs ) ) {
1713                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1714                 }
1715
1716                 return $text;
1717         }
1718
1719         # Returns true if the function is allowed to include this entity
1720         function incrementIncludeCount( $dbk ) {
1721                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1722                         $this->mIncludeCount[$dbk] = 0;
1723                 }
1724                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1725                         return true;
1726                 } else {
1727                         return false;
1728                 }
1729         }
1730
1731
1732         # Cleans up HTML, removes dangerous tags and attributes
1733         /* private */ function removeHTMLtags( $text ) {
1734                 global $wgUseTidy, $wgUserHtml;
1735                 $fname = 'Parser::removeHTMLtags';
1736                 wfProfileIn( $fname );
1737
1738                 if( $wgUserHtml ) {
1739                         $htmlpairs = array( # Tags that must be closed
1740                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1741                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1742                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
1743                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1744                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
1745                         );
1746                         $htmlsingle = array(
1747                                 'br', 'hr', 'li', 'dt', 'dd'
1748                         );
1749                         $htmlnest = array( # Tags that can be nested--??
1750                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1751                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
1752                         );
1753                         $tabletags = array( # Can only appear inside table
1754                                 'td', 'th', 'tr'
1755                         );
1756                 } else {
1757                         $htmlpairs = array();
1758                         $htmlsingle = array();
1759                         $htmlnest = array();
1760                         $tabletags = array();
1761                 }
1762
1763                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1764                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1765
1766                 $htmlattrs = $this->getHTMLattrs () ;
1767
1768                 # Remove HTML comments
1769                 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
1770
1771                 $bits = explode( '<', $text );
1772                 $text = array_shift( $bits );
1773                 if(!$wgUseTidy) {
1774                         $tagstack = array(); $tablestack = array();
1775                         foreach ( $bits as $x ) {
1776                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1777                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1778                                 $x, $regs );
1779                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1780                                 error_reporting( $prev );
1781
1782                                 $badtag = 0 ;
1783                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1784                                         # Check our stack
1785                                         if ( $slash ) {
1786                                                 # Closing a tag...
1787                                                 if ( ! in_array( $t, $htmlsingle ) &&
1788                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1789                                                         @array_push( $tagstack, $ot );
1790                                                         $badtag = 1;
1791                                                 } else {
1792                                                         if ( $t == 'table' ) {
1793                                                                 $tagstack = array_pop( $tablestack );
1794                                                         }
1795                                                         $newparams = '';
1796                                                 }
1797                                         } else {
1798                                                 # Keep track for later
1799                                                 if ( in_array( $t, $tabletags ) &&
1800                                                 ! in_array( 'table', $tagstack ) ) {
1801                                                         $badtag = 1;
1802                                                 } else if ( in_array( $t, $tagstack ) &&
1803                                                 ! in_array ( $t , $htmlnest ) ) {
1804                                                         $badtag = 1 ;
1805                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1806                                                         if ( $t == 'table' ) {
1807                                                                 array_push( $tablestack, $tagstack );
1808                                                                 $tagstack = array();
1809                                                         }
1810                                                         array_push( $tagstack, $t );
1811                                                 }
1812                                                 # Strip non-approved attributes from the tag
1813                                                 $newparams = $this->fixTagAttributes($params);
1814
1815                                         }
1816                                         if ( ! $badtag ) {
1817                                                 $rest = str_replace( '>', '&gt;', $rest );
1818                                                 $text .= "<$slash$t $newparams$brace$rest";
1819                                                 continue;
1820                                         }
1821                                 }
1822                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1823                         }
1824                         # Close off any remaining tags
1825                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1826                                 $text .= "</$t>\n";
1827                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1828                         }
1829                 } else {
1830                         # this might be possible using tidy itself
1831                         foreach ( $bits as $x ) {
1832                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1833                                 $x, $regs );
1834                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1835                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1836                                         $newparams = $this->fixTagAttributes($params);
1837                                         $rest = str_replace( '>', '&gt;', $rest );
1838                                         $text .= "<$slash$t $newparams$brace$rest";
1839                                 } else {
1840                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1841                                 }
1842                         }
1843                 }
1844                 wfProfileOut( $fname );
1845                 return $text;
1846         }
1847
1848
1849         # This function accomplishes several tasks:
1850         # 1) Auto-number headings if that option is enabled
1851         # 2) Add an [edit] link to sections for logged in users who have enabled the option
1852         # 3) Add a Table of contents on the top for users who have enabled the option
1853         # 4) Auto-anchor headings
1854         #
1855         # It loops through all headlines, collects the necessary data, then splits up the
1856         # string and re-inserts the newly formatted headlines.
1857         /* private */ function formatHeadings( $text, $isMain=true ) {
1858                 global $wgInputEncoding, $wgMaxTocLevel;
1859
1860                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1861                 $doShowToc = $this->mOptions->getShowToc();
1862                 $forceTocHere = false;
1863                 if( !$this->mTitle->userCanEdit() ) {
1864                         $showEditLink = 0;
1865                         $rightClickHack = 0;
1866                 } else {
1867                         $showEditLink = $this->mOptions->getEditSection();
1868                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1869                 }
1870
1871                 # Inhibit editsection links if requested in the page
1872                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1873                 if( $esw->matchAndRemove( $text ) ) {
1874                         $showEditLink = 0;
1875                 }
1876                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1877                 # do not add TOC
1878                 $mw =& MagicWord::get( MAG_NOTOC );
1879                 if( $mw->matchAndRemove( $text ) ) {
1880                         $doShowToc = 0;
1881                 }
1882
1883                 # never add the TOC to the Main Page. This is an entry page that should not
1884                 # be more than 1-2 screens large anyway
1885                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1886                         $doShowToc = 0;
1887                 }
1888
1889                 # Get all headlines for numbering them and adding funky stuff like [edit]
1890                 # links - this is for later, but we need the number of headlines right now
1891                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1892
1893                 # if there are fewer than 4 headlines in the article, do not show TOC
1894                 if( $numMatches < 4 ) {
1895                         $doShowToc = 0;
1896                 }
1897
1898                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
1899                 # override above conditions and always show TOC at that place
1900                 $mw =& MagicWord::get( MAG_TOC );
1901                 if ($mw->match( $text ) ) {
1902                         $doShowToc = 1;
1903                         $forceTocHere = true;
1904                 } else {
1905                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1906                         # override above conditions and always show TOC above first header
1907                         $mw =& MagicWord::get( MAG_FORCETOC );
1908                         if ($mw->matchAndRemove( $text ) ) {
1909                                 $doShowToc = 1;
1910                         }
1911                 }
1912
1913
1914
1915                 # We need this to perform operations on the HTML
1916                 $sk =& $this->mOptions->getSkin();
1917
1918                 # headline counter
1919                 $headlineCount = 0;
1920
1921                 # Ugh .. the TOC should have neat indentation levels which can be
1922                 # passed to the skin functions. These are determined here
1923                 $toclevel = 0;
1924                 $toc = '';
1925                 $full = '';
1926                 $head = array();
1927                 $sublevelCount = array();
1928                 $level = 0;
1929                 $prevlevel = 0;
1930                 foreach( $matches[3] as $headline ) {
1931                         $numbering = '';
1932                         if( $level ) {
1933                                 $prevlevel = $level;
1934                         }
1935                         $level = $matches[1][$headlineCount];
1936                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1937                                 # reset when we enter a new level
1938                                 $sublevelCount[$level] = 0;
1939                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1940                                 $toclevel += $level - $prevlevel;
1941                         }
1942                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1943                                 # reset when we step back a level
1944                                 $sublevelCount[$level+1]=0;
1945                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1946                                 $toclevel -= $prevlevel - $level;
1947                         }
1948                         # count number of headlines for each level
1949                         @$sublevelCount[$level]++;
1950                         if( $doNumberHeadings || $doShowToc ) {
1951                                 $dot = 0;
1952                                 for( $i = 1; $i <= $level; $i++ ) {
1953                                         if( !empty( $sublevelCount[$i] ) ) {
1954                                                 if( $dot ) {
1955                                                         $numbering .= '.';
1956                                                 }
1957                                                 $numbering .= $sublevelCount[$i];
1958                                                 $dot = 1;
1959                                         }
1960                                 }
1961                         }
1962
1963                         # The canonized header is a version of the header text safe to use for links
1964                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1965                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1966                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1967
1968                         # Remove link placeholders by the link text.
1969                         #     <!--LINK namespace page_title link text with suffix-->
1970                         # turns into
1971                         #     link text with suffix
1972                         $canonized_headline = preg_replace( '/<!--LINK [0-9]* [^ ]* *(.*?)-->/','$1', $canonized_headline );
1973                         # strip out HTML
1974                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1975                         $tocline = trim( $canonized_headline );
1976                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1977                         $replacearray = array(
1978                                 '%3A' => ':',
1979                                 '%' => '.'
1980                         );
1981                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1982                         $refer[$headlineCount] = $canonized_headline;
1983
1984                         # count how many in assoc. array so we can track dupes in anchors
1985                         @$refers[$canonized_headline]++;
1986                         $refcount[$headlineCount]=$refers[$canonized_headline];
1987
1988                         # Prepend the number to the heading text
1989
1990                         if( $doNumberHeadings || $doShowToc ) {
1991                                 $tocline = $numbering . ' ' . $tocline;
1992
1993                                 # Don't number the heading if it is the only one (looks silly)
1994                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1995                                         # the two are different if the line contains a link
1996                                         $headline=$numbering . ' ' . $headline;
1997                                 }
1998                         }
1999
2000                         # Create the anchor for linking from the TOC to the section
2001                         $anchor = $canonized_headline;
2002                         if($refcount[$headlineCount] > 1 ) {
2003                                 $anchor .= '_' . $refcount[$headlineCount];
2004                         }
2005                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2006                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2007                         }
2008                         if( $showEditLink ) {
2009                                 if ( empty( $head[$headlineCount] ) ) {
2010                                         $head[$headlineCount] = '';
2011                                 }
2012                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2013                         }
2014
2015                         # Add the edit section span
2016                         if( $rightClickHack ) {
2017                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2018                         }
2019
2020                         # give headline the correct <h#> tag
2021                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2022
2023                         $headlineCount++;
2024                 }
2025
2026                 if( $doShowToc ) {
2027                         $toclines = $headlineCount;
2028                         $toc .= $sk->tocUnindent( $toclevel );
2029                         $toc = $sk->tocTable( $toc );
2030                 }
2031
2032                 # split up and insert constructed headlines
2033
2034                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2035                 $i = 0;
2036
2037                 foreach( $blocks as $block ) {
2038                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2039                                 # This is the [edit] link that appears for the top block of text when
2040                                 # section editing is enabled
2041
2042                                 # Disabled because it broke block formatting
2043                                 # For example, a bullet point in the top line
2044                                 # $full .= $sk->editSectionLink(0);
2045                         }
2046                         $full .= $block;
2047                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2048                         # Top anchor now in skin
2049                                 $full = $full.$toc;
2050                         }
2051
2052                         if( !empty( $head[$i] ) ) {
2053                                 $full .= $head[$i];
2054                         }
2055                         $i++;
2056                 }
2057                 if($forceTocHere) {
2058                         $mw =& MagicWord::get( MAG_TOC );
2059                         return $mw->replace( $toc, $full );
2060                 } else {
2061                         return $full;
2062                 }
2063         }
2064
2065         # Return an HTML link for the "ISBN 123456" text
2066         /* private */ function magicISBN( $text ) {
2067                 global $wgLang;
2068                 $fname = 'Parser::magicISBN';
2069                 wfProfileIn( $fname );
2070
2071                 $a = split( 'ISBN ', ' '.$text );
2072                 if ( count ( $a ) < 2 ) {
2073                         wfProfileOut( $fname );
2074                         return $text;
2075                 }
2076                 $text = substr( array_shift( $a ), 1);
2077                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2078
2079                 foreach ( $a as $x ) {
2080                         $isbn = $blank = '' ;
2081                         while ( ' ' == $x{0} ) {
2082                                 $blank .= ' ';
2083                                 $x = substr( $x, 1 );
2084                         }
2085                         while ( strstr( $valid, $x{0} ) != false ) {
2086                                 $isbn .= $x{0};
2087                                 $x = substr( $x, 1 );
2088                         }
2089                         $num = str_replace( '-', '', $isbn );
2090                         $num = str_replace( ' ', '', $num );
2091
2092                         if ( '' == $num ) {
2093                                 $text .= "ISBN $blank$x";
2094                         } else {
2095                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2096                                 $text .= '<a href="' .
2097                                 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2098                                         "\" class=\"internal\">ISBN $isbn</a>";
2099                                 $text .= $x;
2100                         }
2101                 }
2102                 wfProfileOut( $fname );
2103                 return $text;
2104         }
2105
2106         # Return an HTML link for the "GEO ..." text
2107         /* private */ function magicGEO( $text ) {
2108                 global $wgLang, $wgUseGeoMode;
2109                 $fname = 'Parser::magicGEO';
2110                 wfProfileIn( $fname );
2111
2112                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2113                 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2114                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2115                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2116                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2117                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2118
2119                 $a = split( 'GEO ', ' '.$text );
2120                 if ( count ( $a ) < 2 ) {
2121                         wfProfileOut( $fname );
2122                         return $text;
2123                 }
2124                 $text = substr( array_shift( $a ), 1);
2125                 $valid = '0123456789.+-:';
2126
2127                 foreach ( $a as $x ) {
2128                         $geo = $blank = '' ;
2129                         while ( ' ' == $x{0} ) {
2130                                 $blank .= ' ';
2131                                 $x = substr( $x, 1 );
2132                         }
2133                         while ( strstr( $valid, $x{0} ) != false ) {
2134                                 $geo .= $x{0};
2135                                 $x = substr( $x, 1 );
2136                         }
2137                         $num = str_replace( '+', '', $geo );
2138                         $num = str_replace( ' ', '', $num );
2139
2140                         if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2141                                 $text .= "GEO $blank$x";
2142                         } else {
2143                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2144                                 $text .= '<a href="' .
2145                                 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2146                                         "\" class=\"internal\">GEO $geo</a>";
2147                                 $text .= $x;
2148                         }
2149                 }
2150                 wfProfileOut( $fname );
2151                 return $text;
2152         }
2153
2154         # Return an HTML link for the "RFC 1234" text
2155         /* private */ function magicRFC( $text ) {
2156                 global $wgLang;
2157
2158                 $a = split( 'RFC ', ' '.$text );
2159                 if ( count ( $a ) < 2 ) return $text;
2160                 $text = substr( array_shift( $a ), 1);
2161                 $valid = '0123456789';
2162
2163                 foreach ( $a as $x ) {
2164                         $rfc = $blank = '' ;
2165                         while ( ' ' == $x{0} ) {
2166                                 $blank .= ' ';
2167                                 $x = substr( $x, 1 );
2168                         }
2169                         while ( strstr( $valid, $x{0} ) != false ) {
2170                                 $rfc .= $x{0};
2171                                 $x = substr( $x, 1 );
2172                         }
2173
2174                         if ( '' == $rfc ) {
2175                                 $text .= "RFC $blank$x";
2176                         } else {
2177                                 $url = wfmsg( 'rfcurl' );
2178                                 $url = str_replace( '$1', $rfc, $url);
2179                                 $sk =& $this->mOptions->getSkin();
2180                                 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2181                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2182                         }
2183                 }
2184                 return $text;
2185         }
2186
2187         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2188                 $this->mOptions = $options;
2189                 $this->mTitle =& $title;
2190                 $this->mOutputType = OT_WIKI;
2191
2192                 if ( $clearState ) {
2193                         $this->clearState();
2194                 }
2195
2196                 $stripState = false;
2197                 $pairs = array(
2198                         "\r\n" => "\n",
2199                         );
2200                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2201                 // now with regexes
2202                 /*
2203                 $pairs = array(
2204                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2205                         "/<br *?>/i" => "<br />",
2206                 );
2207                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2208                 */
2209                 $text = $this->strip( $text, $stripState, false );
2210                 $text = $this->pstPass2( $text, $user );
2211                 $text = $this->unstrip( $text, $stripState );
2212                 $text = $this->unstripNoWiki( $text, $stripState );
2213                 return $text;
2214         }
2215
2216         /* private */ function pstPass2( $text, &$user ) {
2217                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2218
2219                 # Variable replacement
2220                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2221                 $text = $this->replaceVariables( $text );
2222
2223                 # Signatures
2224                 #
2225                 $n = $user->getName();
2226                 $k = $user->getOption( 'nickname' );
2227                 if ( '' == $k ) { $k = $n; }
2228                 if(isset($wgLocaltimezone)) {
2229                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2230                 }
2231                 /* Note: this is an ugly timezone hack for the European wikis */
2232                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2233                   ' (' . date( 'T' ) . ')';
2234                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2235
2236                 $text = preg_replace( '/~~~~~/', $d, $text );
2237                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2238                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2239
2240                 # Context links: [[|name]] and [[name (context)|]]
2241                 #
2242                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2243                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2244                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2245                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2246
2247                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2248                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2249                 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/";                # [[namespace:page|]] and [[:namespace:page|]]
2250                 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2251                 $context = '';
2252                 $t = $this->mTitle->getText();
2253                 if ( preg_match( $conpat, $t, $m ) ) {
2254                         $context = $m[2];
2255                 }
2256                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2257                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2258                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2259
2260                 if ( '' == $context ) {
2261                         $text = preg_replace( $p2, '[[\\1]]', $text );
2262                 } else {
2263                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2264                 }
2265
2266                 /*
2267                 $mw =& MagicWord::get( MAG_SUBST );
2268                 $wgCurParser = $this->fork();
2269                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2270                 $this->merge( $wgCurParser );
2271                 */
2272
2273                 # Trim trailing whitespace
2274                 # MAG_END (__END__) tag allows for trailing
2275                 # whitespace to be deliberately included
2276                 $text = rtrim( $text );
2277                 $mw =& MagicWord::get( MAG_END );
2278                 $mw->matchAndRemove( $text );
2279
2280                 return $text;
2281         }
2282
2283         # Set up some variables which are usually set up in parse()
2284         # so that an external function can call some class members with confidence
2285         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2286                 $this->mTitle =& $title;
2287                 $this->mOptions = $options;
2288                 $this->mOutputType = $outputType;
2289                 if ( $clearState ) {
2290                         $this->clearState();
2291                 }
2292         }
2293
2294         function transformMsg( $text, $options ) {
2295                 global $wgTitle;
2296                 static $executing = false;
2297
2298                 # Guard against infinite recursion
2299                 if ( $executing ) {
2300                         return $text;
2301                 }
2302                 $executing = true;
2303
2304                 $this->mTitle = $wgTitle;
2305                 $this->mOptions = $options;
2306                 $this->mOutputType = OT_MSG;
2307                 $this->clearState();
2308                 $text = $this->replaceVariables( $text );
2309
2310                 $executing = false;
2311                 return $text;
2312         }
2313
2314         # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2315         # Callback will be called with the text within
2316         # Transform and return the text within
2317         function setHook( $tag, $callback ) {
2318                 $oldVal = @$this->mTagHooks[$tag];
2319                 $this->mTagHooks[$tag] = $callback;
2320                 return $oldVal;
2321         }
2322 }
2323
2324 class ParserOutput
2325 {
2326         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2327         var $mCacheTime; # Used in ParserCache
2328
2329         function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2330                 $containsOldMagic = false )
2331         {
2332                 $this->mText = $text;
2333                 $this->mLanguageLinks = $languageLinks;
2334                 $this->mCategoryLinks = $categoryLinks;
2335                 $this->mContainsOldMagic = $containsOldMagic;
2336                 $this->mCacheTime = '';
2337         }
2338
2339         function getText() { return $this->mText; }
2340         function getLanguageLinks() { return $this->mLanguageLinks; }
2341         function getCategoryLinks() { return $this->mCategoryLinks; }
2342         function getCacheTime() { return $this->mCacheTime; }
2343         function containsOldMagic() { return $this->mContainsOldMagic; }
2344         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2345         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2346         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2347         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2348         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2349
2350         function merge( $other ) {
2351                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2352                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2353                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2354         }
2355
2356 }
2357
2358 class ParserOptions
2359 {
2360         # All variables are private
2361         var $mUseTeX;                    # Use texvc to expand <math> tags
2362         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2363         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2364         var $mAllowExternalImages;       # Allow external images inline
2365         var $mSkin;                      # Reference to the preferred skin
2366         var $mDateFormat;                # Date format index
2367         var $mEditSection;               # Create "edit section" links
2368         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2369         var $mNumberHeadings;            # Automatically number headings
2370         var $mShowToc;                   # Show table of contents
2371
2372         function getUseTeX()                        { return $this->mUseTeX; }
2373         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2374         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2375         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2376         function getSkin()                          { return $this->mSkin; }
2377         function getDateFormat()                    { return $this->mDateFormat; }
2378         function getEditSection()                   { return $this->mEditSection; }
2379         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2380         function getNumberHeadings()                { return $this->mNumberHeadings; }
2381         function getShowToc()                       { return $this->mShowToc; }
2382
2383         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2384         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2385         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2386         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2387         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2388         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2389         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2390         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2391         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2392
2393         function setSkin( &$x ) { $this->mSkin =& $x; }
2394
2395         # Get parser options
2396         /* static */ function newFromUser( &$user ) {
2397                 $popts = new ParserOptions;
2398                 $popts->initialiseFromUser( $user );
2399                 return $popts;
2400         }
2401
2402         # Get user options
2403         function initialiseFromUser( &$userInput ) {
2404                 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2405
2406                 $fname = 'ParserOptions::initialiseFromUser';
2407                 wfProfileIn( $fname );
2408                 if ( !$userInput ) {
2409                         $user = new User;
2410                         $user->setLoaded( true );
2411                 } else {
2412                         $user =& $userInput;
2413                 }
2414
2415                 $this->mUseTeX = $wgUseTeX;
2416                 $this->mUseDynamicDates = $wgUseDynamicDates;
2417                 $this->mInterwikiMagic = $wgInterwikiMagic;
2418                 $this->mAllowExternalImages = $wgAllowExternalImages;
2419                 wfProfileIn( $fname.'-skin' );
2420                 $this->mSkin =& $user->getSkin();
2421                 wfProfileOut( $fname.'-skin' );
2422                 $this->mDateFormat = $user->getOption( 'date' );
2423                 $this->mEditSection = $user->getOption( 'editsection' );
2424                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2425                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2426                 $this->mShowToc = $user->getOption( 'showtoc' );
2427                 wfProfileOut( $fname );
2428         }
2429
2430
2431 }
2432
2433 # Regex callbacks, used in Parser::replaceVariables
2434 function wfBraceSubstitution( $matches ) {
2435         global $wgCurParser;
2436         return $wgCurParser->braceSubstitution( $matches );
2437 }
2438
2439 function wfArgSubstitution( $matches ) {
2440         global $wgCurParser;
2441         return $wgCurParser->argSubstitution( $matches );
2442 }
2443
2444 function wfVariableSubstitution( $matches ) {
2445         global $wgCurParser;
2446         return $wgCurParser->variableSubstitution( $matches );
2447 }
2448
2449 # Return the total number of articles
2450 function wfNumberOfArticles() {
2451         global $wgNumberOfArticles;
2452
2453         wfLoadSiteStats();
2454         return $wgNumberOfArticles;
2455 }
2456
2457 # Get various statistics from the database
2458 /* private */ function wfLoadSiteStats() {
2459         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2460         $fname = 'wfLoadSiteStats';
2461
2462         if ( -1 != $wgNumberOfArticles ) return;
2463         $dbr =& wfGetDB( DB_SLAVE );
2464         $s = $dbr->getArray( 'site_stats',
2465                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2466                 array( 'ss_row_id' => 1 ), $fname
2467         );
2468
2469         if ( $s === false ) {
2470                 return;
2471         } else {
2472                 $wgTotalViews = $s->ss_total_views;
2473                 $wgTotalEdits = $s->ss_total_edits;
2474                 $wgNumberOfArticles = $s->ss_good_articles;
2475         }
2476 }
2477
2478 function wfEscapeHTMLTagsOnly( $in ) {
2479         return str_replace(
2480                 array( '"', '>', '<' ),
2481                 array( '&quot;', '&gt;', '&lt;' ),
2482                 $in );
2483 }
2484
2485
2486 ?>