includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Processes wiki markup
   8 #
   9 # There are two main entry points into the Parser class:
  10 # parse()
  11 #   produces HTML output
  12 # preSaveTransform().
  13 #   produces altered wiki markup.
  14 #
  15 # Globals used:
  16 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  17 #
  18 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  19 #
  20 # settings:
  21 #   $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #   $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #   $wgLocaltimezone
  24 #
  25 #   * only within ParserOptions
  26 #
  27 #----------------------------------------
  28 #    Variable substitution O(N^2) attack
  29 #-----------------------------------------
  30 # Without countermeasures, it would be possible to attack the parser by saving
  31 # a page filled with a large number of inclusions of large pages. The size of
  32 # the generated page would be proportional to the square of the input size.
  33 # Hence, we limit the number of inclusions of any given page, thus bringing any
  34 # attack back to O(N).
  35 define( "MAX_INCLUDE_REPEAT", 100 );
  36 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
  37
  38 # Allowed values for $mOutputType
  39 define( "OT_HTML", 1 );
  40 define( "OT_WIKI", 2 );
  41 define( "OT_MSG" , 3 );
  42
  43 # string parameter for extractTags which will cause it
  44 # to strip HTML comments in addition to regular
  45 # <XML>-style tags. This should not be anything we
  46 # may want to use in wikisyntax
  47 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  48
  49 # prefix for escaping, used in two functions at least
  50 define( 'UNIQ_PREFIX', 'NaodW29');
  51
  52 # Constants needed for external link processing
  53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  54 define( 'HTTP_PROTOCOLS', 'http|https' );
  55 # Everything except bracket, space, or control characters
  56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  58 # Including space
  59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  62 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  63 define( 'EXT_IMAGE_REGEX',
  64         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  65         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  66         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  67 );
  68
  69 class Parser
  70 {
  71         # Persistent:
  72         var $mTagHooks;
  73
  74         # Cleared with clearState():
  75         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  76         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  77
  78         # Temporary:
  79         var $mOptions, $mTitle, $mOutputType,
  80             $mTemplates,        // cache of already loaded templates, avoids
  81                                 // multiple SQL queries for the same string
  82             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  83                                 // in this path. Used for loop detection.
  84
  85         function Parser() {
  86                 $this->mTemplates = array();
  87                 $this->mTemplatePath = array();
  88                 $this->mTagHooks = array();
  89                 $this->clearState();
  90         }
  91
  92         function clearState() {
  93                 $this->mOutput = new ParserOutput;
  94                 $this->mAutonumber = 0;
  95                 $this->mLastSection = "";
  96                 $this->mDTopen = false;
  97                 $this->mVariables = false;
  98                 $this->mIncludeCount = array();
  99                 $this->mStripState = array();
 100                 $this->mArgStack = array();
 101                 $this->mInPre = false;
 102         }
 103
 104         # First pass--just handle <nowiki> sections, pass the rest off
 105         # to internalParse() which does all the real work.
 106         #
 107         # Returns a ParserOutput
 108         #
 109         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 110                 global $wgUseTidy;
 111                 $fname = "Parser::parse";
 112                 wfProfileIn( $fname );
 113
 114                 if ( $clearState ) {
 115                         $this->clearState();
 116                 }
 117
 118                 $this->mOptions = $options;
 119                 $this->mTitle =& $title;
 120                 $this->mOutputType = OT_HTML;
 121
 122                 $stripState = NULL;
 123                 $text = $this->strip( $text, $this->mStripState );
 124                 $text = $this->internalParse( $text, $linestart );
 125                 $text = $this->unstrip( $text, $this->mStripState );
 126                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 127                 if(!$wgUseTidy) {
 128                         $fixtags = array(
 129                                 # french spaces, last one Guillemet-left
 130                                 # only if there is something before the space
 131                                 '/ (?=\\?|:|;|!|\\302\\273)/' => '&nbsp;\\1',
 132                                 '/(\d) (?=\d{3}\D)/' => '\\1&nbsp;\\2',
 133                                 # french spaces, Guillemet-right
 134                                 "/(\\302\\253) /"=>"\\1&nbsp;",
 135                                 '/<hr *>/i' => '<hr />',
 136                                 '/<br *>/i' => '<br />',
 137                                 '/<center *>/i' => '<div class="center">',
 138                                 '/<\\/center *>/i' => '</div>',
 139                                 # Clean up spare ampersands; note that we probably ought to be
 140                                 # more careful about named entities.
 141                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 142                         );
 143                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 144                 } else {
 145                         $fixtags = array(
 146                                 # french spaces, last one Guillemet-left
 147                                 '/ (?=\\?|:|;|!|\\302\\273)/' => '&nbsp;\\1',
 148                                 '/(\d) (?=\d{3}\D)/' => '\\1&nbsp;\\2',
 149                                 # french spaces, Guillemet-right
 150                                 '/(\\302\\253) /' => '\\1&nbsp;',
 151                                 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
 152                                 '/<center *>/i' => '<div class="center">',
 153                                 '/<\\/center *>/i' => '</div>'
 154                         );
 155                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 156                 }
 157                 # only once and last
 158                 $text = $this->doBlockLevels( $text, $linestart );
 159                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 160                 if($wgUseTidy) {
 161                         $text = $this->tidy($text);
 162                 }
 163                 $this->mOutput->setText( $text );
 164                 wfProfileOut( $fname );
 165                 return $this->mOutput;
 166         }
 167
 168         /* static */ function getRandomString() {
 169                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 170         }
 171
 172         # Replaces all occurrences of <$tag>content</$tag> in the text
 173         # with a random marker and returns the new text. the output parameter
 174         # $content will be an associative array filled with data on the form
 175         # $unique_marker => content.
 176
 177         # If $content is already set, the additional entries will be appended
 178
 179         # If $tag is set to STRIP_COMMENTS, the function will extract
 180         # <!-- HTML comments -->
 181
 182         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 183                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 184                 if ( !$content ) {
 185                         $content = array( );
 186                 }
 187                 $n = 1;
 188                 $stripped = '';
 189
 190                 while ( '' != $text ) {
 191                         if($tag==STRIP_COMMENTS) {
 192                                 $p = preg_split( '/<!--/i', $text, 2 );
 193                         } else {
 194                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 195                         }
 196                         $stripped .= $p[0];
 197                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 198                                 $text = '';
 199                         } else {
 200                                 if($tag==STRIP_COMMENTS) {
 201                                         $q = preg_split( '/-->/i', $p[1], 2 );
 202                                 } else {
 203                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 204                                 }
 205                                 $marker = $rnd . sprintf('%08X', $n++);
 206                                 $content[$marker] = $q[0];
 207                                 $stripped .= $marker;
 208                                 $text = $q[1];
 209                         }
 210                 }
 211                 return $stripped;
 212         }
 213
 214         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 215         # If $render is set, performs necessary rendering operations on plugins
 216         # Returns the text, and fills an array with data needed in unstrip()
 217         # If the $state is already a valid strip state, it adds to the state
 218
 219         # When $stripcomments is set, HTML comments <!-- like this -->
 220         # will be stripped in addition to other tags. This is important
 221         # for section editing, where these comments cause confusion when
 222         # counting the sections in the wikisource
 223         function strip( $text, &$state, $stripcomments = false ) {
 224                 $render = ($this->mOutputType == OT_HTML);
 225                 $html_content = array();
 226                 $nowiki_content = array();
 227                 $math_content = array();
 228                 $pre_content = array();
 229                 $comment_content = array();
 230                 $ext_content = array();
 231
 232                 # Replace any instances of the placeholders
 233                 $uniq_prefix = UNIQ_PREFIX;
 234                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 235
 236                 # html
 237                 global $wgRawHtml;
 238                 if( $wgRawHtml ) {
 239                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 240                         foreach( $html_content as $marker => $content ) {
 241                                 if ($render ) {
 242                                         # Raw and unchecked for validity.
 243                                         $html_content[$marker] = $content;
 244                                 } else {
 245                                         $html_content[$marker] = "<html>$content</html>";
 246                                 }
 247                         }
 248                 }
 249
 250                 # nowiki
 251                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 252                 foreach( $nowiki_content as $marker => $content ) {
 253                         if( $render ){
 254                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 255                         } else {
 256                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 257                         }
 258                 }
 259
 260                 # math
 261                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 262                 foreach( $math_content as $marker => $content ){
 263                         if( $render ) {
 264                                 if( $this->mOptions->getUseTeX() ) {
 265                                         $math_content[$marker] = renderMath( $content );
 266                                 } else {
 267                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 268                                 }
 269                         } else {
 270                                 $math_content[$marker] = "<math>$content</math>";
 271                         }
 272                 }
 273
 274                 # pre
 275                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 276                 foreach( $pre_content as $marker => $content ){
 277                         if( $render ){
 278                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 279                         } else {
 280                                 $pre_content[$marker] = "<pre>$content</pre>";
 281                         }
 282                 }
 283
 284                 # Comments
 285                 if($stripcomments) {
 286                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 287                         foreach( $comment_content as $marker => $content ){
 288                                 $comment_content[$marker] = "<!--$content-->";
 289                         }
 290                 }
 291
 292                 # Extensions
 293                 foreach ( $this->mTagHooks as $tag => $callback ) {
 294                         $ext_contents[$tag] = array();
 295                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 296                         foreach( $ext_content[$tag] as $marker => $content ) {
 297                                 if ( $render ) {
 298                                         $ext_content[$tag][$marker] = $callback( $content );
 299                                 } else {
 300                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 301                                 }
 302                         }
 303                 }
 304
 305                 # Merge state with the pre-existing state, if there is one
 306                 if ( $state ) {
 307                         $state['html'] = $state['html'] + $html_content;
 308                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 309                         $state['math'] = $state['math'] + $math_content;
 310                         $state['pre'] = $state['pre'] + $pre_content;
 311                         $state['comment'] = $state['comment'] + $comment_content;
 312
 313                         foreach( $ext_content as $tag => $array ) {
 314                                 if ( array_key_exists( $tag, $state ) ) {
 315                                         $state[$tag] = $state[$tag] + $array;
 316                                 }
 317                         }
 318                 } else {
 319                         $state = array(
 320                           'html' => $html_content,
 321                           'nowiki' => $nowiki_content,
 322                           'math' => $math_content,
 323                           'pre' => $pre_content,
 324                           'comment' => $comment_content,
 325                         ) + $ext_content;
 326                 }
 327                 return $text;
 328         }
 329
 330         # always call unstripNoWiki() after this one
 331         function unstrip( $text, &$state ) {
 332                 # Must expand in reverse order, otherwise nested tags will be corrupted
 333                 $contentDict = end( $state );
 334                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 335                         if( key($state) != 'nowiki' && key($state) != 'html') {
 336                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 337                                         $text = str_replace( key( $contentDict ), $content, $text );
 338                                 }
 339                         }
 340                 }
 341
 342                 return $text;
 343         }
 344         # always call this after unstrip() to preserve the order
 345         function unstripNoWiki( $text, &$state ) {
 346                 # Must expand in reverse order, otherwise nested tags will be corrupted
 347                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 348                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 349                 }
 350
 351                 global $wgRawHtml;
 352                 if ($wgRawHtml) {
 353                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 354                                 $text = str_replace( key( $state['html'] ), $content, $text );
 355                         }
 356                 }
 357
 358                 return $text;
 359         }
 360
 361         # Add an item to the strip state
 362         # Returns the unique tag which must be inserted into the stripped text
 363         # The tag will be replaced with the original text in unstrip()
 364         function insertStripItem( $text, &$state ) {
 365                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 366                 if ( !$state ) {
 367                         $state = array(
 368                           'html' => array(),
 369                           'nowiki' => array(),
 370                           'math' => array(),
 371                           'pre' => array()
 372                         );
 373                 }
 374                 $state['item'][$rnd] = $text;
 375                 return $rnd;
 376         }
 377
 378         # categoryMagic
 379         # generate a list of subcategories and pages for a category
 380         # depending on wfMsg("usenewcategorypage") it either calls the new
 381         # or the old code. The new code will not work properly for some
 382         # languages due to sorting issues, so they might want to turn it
 383         # off.
 384         function categoryMagic() {
 385                 $msg = wfMsg('usenewcategorypage');
 386                 if ( '0' == @$msg[0] )
 387                 {
 388                         return $this->oldCategoryMagic();
 389                 } else {
 390                         return $this->newCategoryMagic();
 391                 }
 392         }
 393
 394         # This method generates the list of subcategories and pages for a category
 395         function oldCategoryMagic () {
 396                 global $wgLang ;
 397                 $fname = 'Parser::oldCategoryMagic';
 398
 399                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 400
 401                 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return "" ; # This ain't a category page
 402
 403                 $r = "<br style=\"clear:both;\"/>\n";
 404
 405
 406                 $sk =& $this->mOptions->getSkin() ;
 407
 408                 $articles = array() ;
 409                 $children = array() ;
 410                 $data = array () ;
 411                 $id = $this->mTitle->getArticleID() ;
 412
 413                 # FIXME: add limits
 414                 $dbr =& wfGetDB( DB_SLAVE );
 415                 $cur = $dbr->tableName( 'cur' );
 416                 $categorylinks = $dbr->tableName( 'categorylinks' );
 417
 418                 $t = $dbr->strencode( $this->mTitle->getDBKey() );
 419                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
 420                         "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 421                 $res = $dbr->query( $sql, $fname ) ;
 422                 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
 423
 424                 # For all pages that link to this category
 425                 foreach ( $data AS $x )
 426                 {
 427                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 428                         if ( $t != '' ) $t .= ':' ;
 429                         $t .= $x->cur_title ;
 430
 431                         if ( $x->cur_namespace == NS_CATEGORY ) {
 432                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 433                         } else {
 434                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 435                         }
 436                 }
 437                 $dbr->freeResult ( $res ) ;
 438
 439                 # Showing subcategories
 440                 if ( count ( $children ) > 0 ) {
 441                         $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
 442                         $r .= implode ( ', ' , $children ) ;
 443                 }
 444
 445                 # Showing pages in this category
 446                 if ( count ( $articles ) > 0 ) {
 447                         $ti = $this->mTitle->getText() ;
 448                         $h =  wfMsg( 'category_header', $ti );
 449                         $r .= "<h2>$h</h2>\n" ;
 450                         $r .= implode ( ', ' , $articles ) ;
 451                 }
 452
 453                 return $r ;
 454         }
 455
 456         function newCategoryMagic () {
 457                 global $wgLang;
 458                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 459
 460                 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return '' ; # This ain't a category page
 461
 462                 $r = "<br style=\"clear:both;\"/>\n";
 463
 464
 465                 $sk =& $this->mOptions->getSkin() ;
 466
 467                 $articles = array() ;
 468                 $articles_start_char = array();
 469                 $children = array() ;
 470                 $children_start_char = array();
 471                 $data = array () ;
 472                 $id = $this->mTitle->getArticleID() ;
 473
 474                 # FIXME: add limits
 475                 $dbr =& wfGetDB( DB_SLAVE );
 476                 $cur = $dbr->tableName( 'cur' );
 477                 $categorylinks = $dbr->tableName( 'categorylinks' );
 478
 479                 $t = $dbr->strencode( $this->mTitle->getDBKey() );
 480                 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
 481                         "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 482                 $res = $dbr->query ( $sql ) ;
 483                 while ( $x = $dbr->fetchObject ( $res ) )
 484                 {
 485                         $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
 486                         if ( $t != '' ) $t .= ':' ;
 487                         $t .= $x->cur_title ;
 488
 489                         if ( $x->cur_namespace == NS_CATEGORY ) {
 490                                 $ctitle = str_replace( '_',' ',$x->cur_title );
 491                                 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
 492
 493                                 // If there's a link from Category:A to Category:B, the sortkey of the resulting
 494                                 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
 495                                 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
 496                                 // else use sortkey...
 497                                 if ( ($ns.':'.$ctitle) == $x->cl_sortkey ) {
 498                                         array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
 499                                 } else {
 500                                         array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
 501                                 }
 502                         } else {
 503                                 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
 504                                 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
 505                         }
 506                 }
 507                 $dbr->freeResult ( $res ) ;
 508
 509                 $ti = $this->mTitle->getText() ;
 510
 511                 # Don't show subcategories section if there are none.
 512                 if ( count ( $children ) > 0 )
 513                 {
 514                         # Showing subcategories
 515                         $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
 516
 517                         $numchild = count( $children );
 518                         if($numchild == 1) {
 519                                 $r .= wfMsg( 'subcategorycount1', 1 );
 520                         } else {
 521                                 $r .= wfMsg( 'subcategorycount' , $numchild );
 522                         }
 523                         unset($numchild);
 524
 525                         if ( count ( $children ) > 6 ) {
 526
 527                                 // divide list into three equal chunks
 528                                 $chunk = (int) (count ( $children ) / 3);
 529
 530                                 // get and display header
 531                                 $r .= '<table width="100%"><tr valign="top">';
 532
 533                                 $startChunk = 0;
 534                                 $endChunk = $chunk;
 535
 536                                 // loop through the chunks
 537                                 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
 538                                         $chunkIndex < 3;
 539                                         $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
 540                                 {
 541
 542                                         $r .= '<td><ul>';
 543                                         // output all subcategories to category
 544                                         for ($index = $startChunk ;
 545                                                 $index < $endChunk && $index < count($children);
 546                                                 $index++ )
 547                                         {
 548                                                 // check for change of starting letter or begging of chunk
 549                                                 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
 550                                                         || ($index == $startChunk) )
 551                                                 {
 552                                                         $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
 553                                                 }
 554
 555                                                 $r .= "<li>{$children[$index]}</li>";
 556                                         }
 557                                         $r .= '</ul></td>';
 558
 559
 560                                 }
 561                                 $r .= '</tr></table>';
 562                         } else {
 563                                 // for short lists of subcategories to category.
 564
 565                                 $r .= "<h3>{$children_start_char[0]}</h3>\n";
 566                                 $r .= '<ul><li>'.$children[0].'</li>';
 567                                 for ($index = 1; $index < count($children); $index++ )
 568                                 {
 569                                         if ($children_start_char[$index] != $children_start_char[$index - 1])
 570                                         {
 571                                                 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
 572                                         }
 573
 574                                         $r .= "<li>{$children[$index]}</li>";
 575                                 }
 576                                 $r .= '</ul>';
 577                         }
 578                 } # END of if ( count($children) > 0 )
 579
 580                 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
 581
 582                 $numart = count( $articles );
 583                 if($numart == 1) {
 584                         $r .= wfMsg( 'categoryarticlecount1', 1 );
 585                 } else {
 586                         $r .= wfMsg( 'categoryarticlecount' , $numart );
 587                 }
 588                 unset($numart);
 589
 590                 # Showing articles in this category
 591                 if ( count ( $articles ) > 6) {
 592                         $ti = $this->mTitle->getText() ;
 593
 594                         // divide list into three equal chunks
 595                         $chunk = (int) (count ( $articles ) / 3);
 596
 597                         // get and display header
 598                         $r .= '<table width="100%"><tr valign="top">';
 599
 600                         // loop through the chunks
 601                         for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
 602                                 $chunkIndex < 3;
 603                                 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
 604                         {
 605
 606                                 $r .= '<td><ul>';
 607
 608                                 // output all articles in category
 609                                 for ($index = $startChunk ;
 610                                         $index < $endChunk && $index < count($articles);
 611                                         $index++ )
 612                                 {
 613                                         // check for change of starting letter or begging of chunk
 614                                         if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
 615                                                 || ($index == $startChunk) )
 616                                         {
 617                                                 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
 618                                         }
 619
 620                                         $r .= "<li>{$articles[$index]}</li>";
 621                                 }
 622                                 $r .= '</ul></td>';
 623
 624
 625                         }
 626                         $r .= '</tr></table>';
 627                 } elseif ( count($articles) > 0) {
 628                         // for short lists of articles in categories.
 629                         $ti = $this->mTitle->getText() ;
 630
 631                         $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
 632                         $r .= '<ul><li>'.$articles[0].'</li>';
 633                         for ($index = 1; $index < count($articles); $index++ )
 634                         {
 635                                 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
 636                                 {
 637                                         $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
 638                                 }
 639
 640                                 $r .= "<li>{$articles[$index]}</li>";
 641                         }
 642                         $r .= '</ul>';
 643                 }
 644
 645
 646                 return $r ;
 647         }
 648
 649         # Return allowed HTML attributes
 650         function getHTMLattrs () {
 651                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 652                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 653                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 654                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 655                                 /* FONT */ 'type', 'start', 'value', 'compact',
 656                                 /* For various lists, mostly deprecated but safe */
 657                                 'summary', 'width', 'border', 'frame', 'rules',
 658                                 'cellspacing', 'cellpadding', 'valign', 'char',
 659                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 660                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 661                                 'id', 'class', 'name', 'style' /* For CSS */
 662                                 );
 663                 return $htmlattrs ;
 664         }
 665
 666         # Remove non approved attributes and javascript in css
 667         function fixTagAttributes ( $t ) {
 668                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 669                 $htmlattrs = $this->getHTMLattrs() ;
 670
 671                 # Strip non-approved attributes from the tag
 672                 $t = preg_replace(
 673                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 674                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 675                         $t);
 676
 677                 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
 678
 679                 # Strip javascript "expression" from stylesheets. Brute force approach:
 680                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 681
 682                 if( preg_match(
 683                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 684                         wfMungeToUtf8( $t ) ) )
 685                 {
 686                         $t='';
 687                 }
 688
 689                 return trim ( $t ) ;
 690         }
 691
 692         # interface with html tidy, used if $wgUseTidy = true
 693         function tidy ( $text ) {
 694                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 695                 global $wgInputEncoding, $wgOutputEncoding;
 696                 $fname = 'Parser::tidy';
 697                 wfProfileIn( $fname );
 698
 699                 $cleansource = '';
 700                 switch(strtoupper($wgOutputEncoding)) {
 701                         case 'ISO-8859-1':
 702                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 703                                 break;
 704                         case 'UTF-8':
 705                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 706                                 break;
 707                         default:
 708                                 $wgTidyOpts .= ' -raw';
 709                         }
 710
 711                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 712 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 713 '<head><title>test</title></head><body>'.$text.'</body></html>';
 714                 $descriptorspec = array(
 715                         0 => array('pipe', 'r'),
 716                         1 => array('pipe', 'w'),
 717                         2 => array('file', '/dev/null', 'a')
 718                 );
 719                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 720                 if (is_resource($process)) {
 721                         fwrite($pipes[0], $wrappedtext);
 722                         fclose($pipes[0]);
 723                         while (!feof($pipes[1])) {
 724                                 $cleansource .= fgets($pipes[1], 1024);
 725                         }
 726                         fclose($pipes[1]);
 727                         $return_value = proc_close($process);
 728                 }
 729
 730                 wfProfileOut( $fname );
 731
 732                 if( $cleansource == '' && $text != '') {
 733                         wfDebug( "Tidy error detected!\n" );
 734                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 735                 } else {
 736                         return $cleansource;
 737                 }
 738         }
 739
 740         # parse the wiki syntax used to render tables
 741         function doTableStuff ( $t ) {
 742                 $fname = 'Parser::doTableStuff';
 743                 wfProfileIn( $fname );
 744
 745                 $t = explode ( "\n" , $t ) ;
 746                 $td = array () ; # Is currently a td tag open?
 747                 $ltd = array () ; # Was it TD or TH?
 748                 $tr = array () ; # Is currently a tr tag open?
 749                 $ltr = array () ; # tr attributes
 750                 $indent_level = 0; # indent level of the table
 751                 foreach ( $t AS $k => $x )
 752                 {
 753                         $x = trim ( $x ) ;
 754                         $fc = substr ( $x , 0 , 1 ) ;
 755                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 756                                 $indent_level = strlen( $matches[1] );
 757                                 $t[$k] = "\n" .
 758                                         str_repeat( "<dl><dd>", $indent_level ) .
 759                                         "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 760                                 array_push ( $td , false ) ;
 761                                 array_push ( $ltd , '' ) ;
 762                                 array_push ( $tr , false ) ;
 763                                 array_push ( $ltr , '' ) ;
 764                         }
 765                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 766                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 767                                 $z = "</table>\n" ;
 768                                 $l = array_pop ( $ltd ) ;
 769                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 770                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 771                                 array_pop ( $ltr ) ;
 772                                 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
 773                         }
 774                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 775                                 $x = substr ( $x , 1 ) ;
 776                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 777                                 $z = '' ;
 778                                 $l = array_pop ( $ltd ) ;
 779                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 780                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 781                                 array_pop ( $ltr ) ;
 782                                 $t[$k] = $z ;
 783                                 array_push ( $tr , false ) ;
 784                                 array_push ( $td , false ) ;
 785                                 array_push ( $ltd , '' ) ;
 786                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 787                         }
 788                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 789                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 790                                         $fc = '+' ;
 791                                         $x = substr ( $x , 1 ) ;
 792                                 }
 793                                 $after = substr ( $x , 1 ) ;
 794                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 795                                 $after = explode ( '||' , $after ) ;
 796                                 $t[$k] = '' ;
 797                                 foreach ( $after AS $theline )
 798                                 {
 799                                         $z = '' ;
 800                                         if ( $fc != '+' )
 801                                         {
 802                                                 $tra = array_pop ( $ltr ) ;
 803                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 804                                                 array_push ( $tr , true ) ;
 805                                                 array_push ( $ltr , '' ) ;
 806                                         }
 807
 808                                         $l = array_pop ( $ltd ) ;
 809                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 810                                         if ( $fc == '|' ) $l = 'td' ;
 811                                         else if ( $fc == '!' ) $l = 'th' ;
 812                                         else if ( $fc == '+' ) $l = 'caption' ;
 813                                         else $l = '' ;
 814                                         array_push ( $ltd , $l ) ;
 815                                         $y = explode ( '|' , $theline , 2 ) ;
 816                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 817                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 818                                         $t[$k] .= $y ;
 819                                         array_push ( $td , true ) ;
 820                                 }
 821                         }
 822                 }
 823
 824                 # Closing open td, tr && table
 825                 while ( count ( $td ) > 0 )
 826                 {
 827                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 828                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 829                         $t[] = '</table>' ;
 830                 }
 831
 832                 $t = implode ( "\n" , $t ) ;
 833                 #               $t = $this->removeHTMLtags( $t );
 834                 wfProfileOut( $fname );
 835                 return $t ;
 836         }
 837
 838         # Parses the text and adds the result to the strip state
 839         # Returns the strip tag
 840         function stripParse( $text, $newline, $args ) {
 841                 $text = $this->strip( $text, $this->mStripState );
 842                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 843                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 844         }
 845
 846         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 847                 $fname = 'Parser::internalParse';
 848                 wfProfileIn( $fname );
 849
 850                 $text = $this->removeHTMLtags( $text );
 851                 $text = $this->replaceVariables( $text, $args );
 852
 853                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 854
 855                 $text = $this->doHeadings( $text );
 856                 if($this->mOptions->getUseDynamicDates()) {
 857                         global $wgDateFormatter;
 858                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 859                 }
 860                 $text = $this->doAllQuotes( $text );
 861                 $text = $this->replaceExternalLinks( $text );
 862                 $text = $this->doMagicLinks( $text );
 863                 $text = $this->replaceInternalLinks ( $text );
 864                 $text = $this->replaceInternalLinks ( $text );
 865
 866                 $text = $this->unstrip( $text, $this->mStripState );
 867                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 868
 869                 $text = $this->doTableStuff( $text );
 870                 $text = $this->formatHeadings( $text, $isMain );
 871                 $sk =& $this->mOptions->getSkin();
 872                 $text = $sk->transformContent( $text );
 873
 874                 if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
 875                         $text .= $this->categoryMagic () ;
 876                         $this->categoryMagicDone = true ;
 877                 }
 878
 879                 wfProfileOut( $fname );
 880                 return $text;
 881         }
 882
 883         /* private */ function &doMagicLinks( &$text ) {
 884                 global $wgUseGeoMode;
 885                 $text = $this->magicISBN( $text );
 886                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 887                         $text = $this->magicGEO( $text );
 888                 }
 889                 $text = $this->magicRFC( $text );
 890                 return $text;
 891         }
 892
 893         # Parse ^^ tokens and return html
 894         /* private */ function doExponent ( $text ) {
 895                 $fname = 'Parser::doExponent';
 896                 wfProfileIn( $fname);
 897                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 898                 wfProfileOut( $fname);
 899                 return $text;
 900         }
 901
 902         # Parse headers and return html
 903         /* private */ function doHeadings( $text ) {
 904                 $fname = 'Parser::doHeadings';
 905                 wfProfileIn( $fname );
 906                 for ( $i = 6; $i >= 1; --$i ) {
 907                         $h = substr( '======', 0, $i );
 908                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 909                           "<h{$i}>\\1</h{$i}>\\2", $text );
 910                 }
 911                 wfProfileOut( $fname );
 912                 return $text;
 913         }
 914
 915         /* private */ function doAllQuotes( $text ) {
 916                 $fname = 'Parser::doAllQuotes';
 917                 wfProfileIn( $fname );
 918                 $outtext = '';
 919                 $lines = explode( "\n", $text );
 920                 foreach ( $lines as $line ) {
 921                         $outtext .= $this->doQuotes ( $line ) . "\n";
 922                 }
 923                 $outtext = substr($outtext, 0,-1);
 924                 wfProfileOut( $fname );
 925                 return $outtext;
 926         }
 927
 928         /* private */ function doQuotes( $text ) {
 929                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 930                 if (count ($arr) == 1)
 931                         return $text;
 932                 else
 933                 {
 934                         # First, do some preliminary work. This may shift some apostrophes from
 935                         # being mark-up to being text. It also counts the number of occurrences
 936                         # of bold and italics mark-ups.
 937                         $i = 0;
 938                         $numbold = 0;
 939                         $numitalics = 0;
 940                         foreach ($arr as $r)
 941                         {
 942                                 if (($i % 2) == 1)
 943                                 {
 944                                         # If there are ever four apostrophes, assume the first is supposed to
 945                                         # be text, and the remaining three constitute mark-up for bold text.
 946                                         if (strlen ($arr[$i]) == 4)
 947                                         {
 948                                                 $arr[$i-1] .= "'";
 949                                                 $arr[$i] = "'''";
 950                                         }
 951                                         # If there are more than 5 apostrophes in a row, assume they're all
 952                                         # text except for the last 5.
 953                                         else if (strlen ($arr[$i]) > 5)
 954                                         {
 955                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 956                                                 $arr[$i] = "'''''";
 957                                         }
 958                                         # Count the number of occurrences of bold and italics mark-ups.
 959                                         # We are not counting sequences of five apostrophes.
 960                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 961                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 962                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 963                                 }
 964                                 $i++;
 965                         }
 966
 967                         # If there is an odd number of both bold and italics, it is likely
 968                         # that one of the bold ones was meant to be an apostrophe followed
 969                         # by italics. Which one we cannot know for certain, but it is more
 970                         # likely to be one that has a single-letter word before it.
 971                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 972                         {
 973                                 $i = 0;
 974                                 $firstsingleletterword = -1;
 975                                 $firstmultiletterword = -1;
 976                                 $firstspace = -1;
 977                                 foreach ($arr as $r)
 978                                 {
 979                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 980                                         {
 981                                                 $x1 = substr ($arr[$i-1], -1);
 982                                                 $x2 = substr ($arr[$i-1], -2, 1);
 983                                                 if ($x1 == " ") {
 984                                                         if ($firstspace == -1) $firstspace = $i;
 985                                                 } else if ($x2 == " ") {
 986                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 987                                                 } else {
 988                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 989                                                 }
 990                                         }
 991                                         $i++;
 992                                 }
 993
 994                                 # If there is a single-letter word, use it!
 995                                 if ($firstsingleletterword > -1)
 996                                 {
 997                                         $arr [ $firstsingleletterword ] = "''";
 998                                         $arr [ $firstsingleletterword-1 ] .= "'";
 999                                 }
1000                                 # If not, but there's a multi-letter word, use that one.
1001                                 else if ($firstmultiletterword > -1)
1002                                 {
1003                                         $arr [ $firstmultiletterword ] = "''";
1004                                         $arr [ $firstmultiletterword-1 ] .= "'";
1005                                 }
1006                                 # ... otherwise use the first one that has neither.
1007                                 # (notice that it is possible for all three to be -1 if, for example,
1008                                 # there is only one pentuple-apostrophe in the line)
1009                                 else if ($firstspace > -1)
1010                                 {
1011                                         $arr [ $firstspace ] = "''";
1012                                         $arr [ $firstspace-1 ] .= "'";
1013                                 }
1014                         }
1015
1016                         # Now let's actually convert our apostrophic mush to HTML!
1017                         $output = '';
1018                         $buffer = '';
1019                         $state = '';
1020                         $i = 0;
1021                         foreach ($arr as $r)
1022                         {
1023                                 if (($i % 2) == 0)
1024                                 {
1025                                         if ($state == 'both')
1026                                                 $buffer .= $r;
1027                                         else
1028                                                 $output .= $r;
1029                                 }
1030                                 else
1031                                 {
1032                                         if (strlen ($r) == 2)
1033                                         {
1034                                                 if ($state == 'em')
1035                                                 { $output .= "</em>"; $state = ''; }
1036                                                 else if ($state == 'strongem')
1037                                                 { $output .= "</em>"; $state = 'strong'; }
1038                                                 else if ($state == 'emstrong')
1039                                                 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1040                                                 else if ($state == 'both')
1041                                                 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1042                                                 else # $state can be 'strong' or ''
1043                                                 { $output .= "<em>"; $state .= 'em'; }
1044                                         }
1045                                         else if (strlen ($r) == 3)
1046                                         {
1047                                                 if ($state == 'strong')
1048                                                 { $output .= "</strong>"; $state = ''; }
1049                                                 else if ($state == 'strongem')
1050                                                 { $output .= "</em></strong><em>"; $state = 'em'; }
1051                                                 else if ($state == 'emstrong')
1052                                                 { $output .= "</strong>"; $state = 'em'; }
1053                                                 else if ($state == 'both')
1054                                                 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1055                                                 else # $state can be 'em' or ''
1056                                                 { $output .= "<strong>"; $state .= 'strong'; }
1057                                         }
1058                                         else if (strlen ($r) == 5)
1059                                         {
1060                                                 if ($state == 'strong')
1061                                                 { $output .= "</strong><em>"; $state = 'em'; }
1062                                                 else if ($state == 'em')
1063                                                 { $output .= "</em><strong>"; $state = 'strong'; }
1064                                                 else if ($state == 'strongem')
1065                                                 { $output .= "</em></strong>"; $state = ''; }
1066                                                 else if ($state == 'emstrong')
1067                                                 { $output .= "</strong></em>"; $state = ''; }
1068                                                 else if ($state == 'both')
1069                                                 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1070                                                 else # ($state == '')
1071                                                 { $buffer = ''; $state = 'both'; }
1072                                         }
1073                                 }
1074                                 $i++;
1075                         }
1076                         # Now close all remaining tags.  Notice that the order is important.
1077                         if ($state == 'strong' || $state == 'emstrong')
1078                                 $output .= '</strong>';
1079                         if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
1080                                 $output .= '</em>';
1081                         if ($state == 'strongem')
1082                                 $output .= '</strong>';
1083                         if ($state == 'both')
1084                                 $output .= "<strong><em>{$buffer}</em></strong>";
1085                         return $output;
1086                 }
1087         }
1088
1089         # Note: we have to do external links before the internal ones,
1090         # and otherwise take great care in the order of things here, so
1091         # that we don't end up interpreting some URLs twice.
1092
1093         /* private */ function replaceExternalLinks( $text ) {
1094                 $fname = 'Parser::replaceExternalLinks';
1095                 wfProfileIn( $fname );
1096
1097                 $sk =& $this->mOptions->getSkin();
1098                 $linktrail = wfMsg('linktrail');
1099                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1100
1101                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1102
1103                 $i = 0;
1104                 while ( $i<count( $bits ) ) {
1105                         $url = $bits[$i++];
1106                         $protocol = $bits[$i++];
1107                         $text = $bits[$i++];
1108                         $trail = $bits[$i++];
1109
1110                         # If the link text is an image URL, replace it with an <img> tag
1111                         # This happened by accident in the original parser, but some people used it extensively
1112                         $img = $this->maybeMakeImageLink( $text );
1113                         if ( $img !== false ) {
1114                                 $text = $img;
1115                         }
1116
1117                         $dtrail = '';
1118
1119                         # No link text, e.g. [http://domain.tld/some.link]
1120                         if ( $text == '' ) {
1121                                 # Autonumber if allowed
1122                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
1123                                         $text = "[" . ++$this->mAutonumber . "]";
1124                                 } else {
1125                                         # Otherwise just use the URL
1126                                         $text = htmlspecialchars( $url );
1127                                 }
1128                         } else {
1129                                 # Have link text, e.g. [http://domain.tld/some.link text]s
1130                                 # Check for trail
1131                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1132                                         $dtrail = $m2[1];
1133                                         $trail = $m2[2];
1134                                 }
1135                         }
1136
1137                         $encUrl = htmlspecialchars( $url );
1138                         # Bit in parentheses showing the URL for the printable version
1139                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1140                                 $paren = '';
1141                         } else {
1142                                 # Expand the URL for printable version
1143                                 if ( ! $sk->suppressUrlExpansion() ) {
1144                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1145                                 } else {
1146                                         $paren = '';
1147                                 }
1148                         }
1149
1150                         # Process the trail (i.e. everything after this link up until start of the next link),
1151                         # replacing any non-bracketed links
1152                         $trail = $this->replaceFreeExternalLinks( $trail );
1153
1154                         $la = $sk->getExternalLinkAttributes( $url, $text );
1155
1156                         # Use the encoded URL
1157                         # This means that users can paste URLs directly into the text
1158                         # Funny characters like &ouml; aren't valid in URLs anyway
1159                         # This was changed in August 2004
1160                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1161                 }
1162
1163                 wfProfileOut( $fname );
1164                 return $s;
1165         }
1166
1167         # Replace anything that looks like a URL with a link
1168         function replaceFreeExternalLinks( $text ) {
1169                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1170                 $s = array_shift( $bits );
1171                 $i = 0;
1172
1173                 $sk =& $this->mOptions->getSkin();
1174
1175                 while ( $i < count( $bits ) ){
1176                         $protocol = $bits[$i++];
1177                         $remainder = $bits[$i++];
1178
1179                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1180                                 # Found some characters after the protocol that look promising
1181                                 $url = $protocol . $m[1];
1182                                 $trail = $m[2];
1183
1184                                 # Move trailing punctuation to $trail
1185                                 $sep = ',;\.:!?';
1186                                 # If there is no left bracket, then consider right brackets fair game too
1187                                 if ( strpos( $url, '(' ) === false ) {
1188                                         $sep .= ')';
1189                                 }
1190
1191                                 $numSepChars = strspn( strrev( $url ), $sep );
1192                                 if ( $numSepChars ) {
1193                                         $trail = substr( $url, -$numSepChars ) . $trail;
1194                                         $url = substr( $url, 0, -$numSepChars );
1195                                 }
1196
1197                                 # Replace &amp; from obsolete syntax with &
1198                                 $url = str_replace( '&amp;', '&', $url );
1199
1200                                 # Is this an external image?
1201                                 $text = $this->maybeMakeImageLink( $url );
1202                                 if ( $text === false ) {
1203                                         # Not an image, make a link
1204                                         $text = $sk->makeExternalLink( $url, $url );
1205                                 }
1206                                 $s .= $text . $trail;
1207                         } else {
1208                                 $s .= $protocol . $remainder;
1209                         }
1210                 }
1211                 return $s;
1212         }
1213
1214         # make an image if it's allowed
1215         function maybeMakeImageLink( $url ) {
1216                 $sk =& $this->mOptions->getSkin();
1217                 $text = false;
1218                 if ( $this->mOptions->getAllowExternalImages() ) {
1219                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1220                                 # Image found
1221                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1222                         }
1223                 }
1224                 return $text;
1225         }
1226
1227         # The wikilinks [[ ]] are procedeed here.
1228         /* private */ function replaceInternalLinks( $s ) {
1229                 global $wgLang, $wgLinkCache;
1230                 global $wgNamespacesWithSubpages, $wgLanguageCode;
1231                 static $fname = 'Parser::replaceInternalLinks' ;
1232                 wfProfileIn( $fname );
1233
1234                 wfProfileIn( $fname.'-setup' );
1235                 static $tc = FALSE;
1236                 # the % is needed to support urlencoded titles as well
1237                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1238                 $sk =& $this->mOptions->getSkin();
1239
1240                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1241
1242                 $a = explode( '[[', ' ' . $s );
1243                 $s = array_shift( $a );
1244                 $s = substr( $s, 1 );
1245
1246                 # Match a link having the form [[namespace:link|alternate]]trail
1247                 static $e1 = FALSE;
1248                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1249                 # Match the end of a line for a word that's not followed by whitespace,
1250                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1251                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1252
1253                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1254                 # Special and Media are pseudo-namespaces; no pages actually exist in them
1255
1256                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1257
1258                 if ( $useLinkPrefixExtension ) {
1259                         if ( preg_match( $e2, $s, $m ) ) {
1260                                 $first_prefix = $m[2];
1261                                 $s = $m[1];
1262                         } else {
1263                                 $first_prefix = false;
1264                         }
1265                 } else {
1266                         $prefix = '';
1267                 }
1268
1269                 wfProfileOut( $fname.'-setup' );
1270
1271                 # start procedeeding each line
1272                 foreach ( $a as $line ) {
1273                         wfProfileIn( $fname.'-prefixhandling' );
1274                         if ( $useLinkPrefixExtension ) {
1275                                 if ( preg_match( $e2, $s, $m ) ) {
1276                                         $prefix = $m[2];
1277                                         $s = $m[1];
1278                                 } else {
1279                                         $prefix='';
1280                                 }
1281                                 # first link
1282                                 if($first_prefix) {
1283                                         $prefix = $first_prefix;
1284                                         $first_prefix = false;
1285                                 }
1286                         }
1287                         wfProfileOut( $fname.'-prefixhandling' );
1288
1289                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1290                                 $text = $m[2];
1291                                 # fix up urlencoded title texts
1292                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1293                                 $trail = $m[3];
1294                         } else { # Invalid form; output directly
1295                                 $s .= $prefix . '[[' . $line ;
1296                                 continue;
1297                         }
1298
1299                         # Valid link forms:
1300                         # Foobar -- normal
1301                         # :Foobar -- override special treatment of prefix (images, language links)
1302                         # /Foobar -- convert to CurrentPage/Foobar
1303                         # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1304
1305                         # Look at the first character
1306                         $c = substr($m[1],0,1);
1307                         $noforce = ($c != ':');
1308
1309                         # subpage
1310                         if( $c == '/' ) {
1311                                 # / at end means we don't want the slash to be shown
1312                                 if(substr($m[1],-1,1)=='/') {
1313                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1314                                         $noslash=$m[1];
1315                                 } else {
1316                                         $noslash=substr($m[1],1);
1317                                 }
1318
1319                                 # Some namespaces don't allow subpages
1320                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1321                                         # subpages allowed here
1322                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1323                                         if( '' == $text ) {
1324                                                 $text= $m[1];
1325                                         } # this might be changed for ugliness reasons
1326                                 } else {
1327                                         # no subpage allowed, use standard link
1328                                         $link = $noslash;
1329                                 }
1330
1331                         } elseif( $noforce ) { # no subpage
1332                                 $link = $m[1];
1333                         } else {
1334                                 # We don't want to keep the first character
1335                                 $link = substr( $m[1], 1 );
1336                         }
1337
1338                         $wasblank = ( '' == $text );
1339                         if( $wasblank ) $text = $link;
1340
1341                         $nt = Title::newFromText( $link );
1342                         if( !$nt ) {
1343                                 $s .= $prefix . '[[' . $line;
1344                                 continue;
1345                         }
1346
1347                         $ns = $nt->getNamespace();
1348                         $iw = $nt->getInterWiki();
1349
1350                         # Link not escaped by : , create the various objects
1351                         if( $noforce ) {
1352
1353                                 # Interwikis
1354                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1355                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1356                                         $tmp = $prefix . $trail ;
1357                                         $s .= (trim($tmp) == '')? '': $tmp;
1358                                         continue;
1359                                 }
1360
1361                                 if ( $ns == NS_IMAGE ) {
1362                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1363                                         $wgLinkCache->addImageLinkObj( $nt );
1364                                         continue;
1365                                 }
1366
1367                                 if ( $ns == NS_CATEGORY ) {
1368                                         $t = $nt->getText() ;
1369                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).":".$t ) ;
1370
1371                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1372                                         $pPLC=$sk->postParseLinkColour();
1373                                         $sk->postParseLinkColour( false );
1374                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1375                                         $sk->postParseLinkColour( $pPLC );
1376                                         $wgLinkCache->resume();
1377
1378                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1379                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1380                                         $this->mOutput->mCategoryLinks[] = $t ;
1381                                         $s .= $prefix . $trail ;
1382                                         continue;
1383                                 }
1384                         }
1385
1386                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1387                             ( strpos( $link, '#' ) == FALSE ) ) {
1388                                 # Self-links are handled specially; generally de-link and change to bold.
1389                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1390                                 continue;
1391                         }
1392
1393                         if( $ns == NS_MEDIA ) {
1394                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1395                                 $wgLinkCache->addImageLinkObj( $nt );
1396                                 continue;
1397                         } elseif( $ns == NS_SPECIAL ) {
1398                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1399                                 continue;
1400                         }
1401                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1402                 }
1403                 wfProfileOut( $fname );
1404                 return $s;
1405         }
1406
1407         # Some functions here used by doBlockLevels()
1408         #
1409         /* private */ function closeParagraph() {
1410                 $result = '';
1411                 if ( '' != $this->mLastSection ) {
1412                         $result = '</' . $this->mLastSection  . ">\n";
1413                 }
1414                 $this->mInPre = false;
1415                 $this->mLastSection = '';
1416                 return $result;
1417         }
1418         # getCommon() returns the length of the longest common substring
1419         # of both arguments, starting at the beginning of both.
1420         #
1421         /* private */ function getCommon( $st1, $st2 ) {
1422                 $fl = strlen( $st1 );
1423                 $shorter = strlen( $st2 );
1424                 if ( $fl < $shorter ) { $shorter = $fl; }
1425
1426                 for ( $i = 0; $i < $shorter; ++$i ) {
1427                         if ( $st1{$i} != $st2{$i} ) { break; }
1428                 }
1429                 return $i;
1430         }
1431         # These next three functions open, continue, and close the list
1432         # element appropriate to the prefix character passed into them.
1433         #
1434         /* private */ function openList( $char ) {
1435                 $result = $this->closeParagraph();
1436
1437                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1438                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1439                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1440                 else if ( ';' == $char ) {
1441                         $result .= '<dl><dt>';
1442                         $this->mDTopen = true;
1443                 }
1444                 else { $result = '<!-- ERR 1 -->'; }
1445
1446                 return $result;
1447         }
1448
1449         /* private */ function nextItem( $char ) {
1450                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1451                 else if ( ':' == $char || ';' == $char ) {
1452                         $close = '</dd>';
1453                         if ( $this->mDTopen ) { $close = '</dt>'; }
1454                         if ( ';' == $char ) {
1455                                 $this->mDTopen = true;
1456                                 return $close . '<dt>';
1457                         } else {
1458                                 $this->mDTopen = false;
1459                                 return $close . '<dd>';
1460                         }
1461                 }
1462                 return '<!-- ERR 2 -->';
1463         }
1464
1465         /* private */ function closeList( $char ) {
1466                 if ( '*' == $char ) { $text = '</li></ul>'; }
1467                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1468                 else if ( ':' == $char ) {
1469                         if ( $this->mDTopen ) {
1470                                 $this->mDTopen = false;
1471                                 $text = '</dt></dl>';
1472                         } else {
1473                                 $text = '</dd></dl>';
1474                         }
1475                 }
1476                 else {  return '<!-- ERR 3 -->'; }
1477                 return $text."\n";
1478         }
1479
1480         /* private */ function doBlockLevels( $text, $linestart ) {
1481                 $fname = 'Parser::doBlockLevels';
1482                 wfProfileIn( $fname );
1483
1484                 # Parsing through the text line by line.  The main thing
1485                 # happening here is handling of block-level elements p, pre,
1486                 # and making lists from lines starting with * # : etc.
1487                 #
1488                 $textLines = explode( "\n", $text );
1489
1490                 $lastPrefix = $output = $lastLine = '';
1491                 $this->mDTopen = $inBlockElem = false;
1492                 $prefixLength = 0;
1493                 $paragraphStack = false;
1494
1495                 if ( !$linestart ) {
1496                         $output .= array_shift( $textLines );
1497                 }
1498                 foreach ( $textLines as $oLine ) {
1499                         $lastPrefixLength = strlen( $lastPrefix );
1500                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1501                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1502                         if ( !$this->mInPre ) {
1503                                 # Multiple prefixes may abut each other for nested lists.
1504                                 $prefixLength = strspn( $oLine, '*#:;' );
1505                                 $pref = substr( $oLine, 0, $prefixLength );
1506
1507                                 # eh?
1508                                 $pref2 = str_replace( ';', ':', $pref );
1509                                 $t = substr( $oLine, $prefixLength );
1510                                 $this->mInPre = !empty($preOpenMatch);
1511                         } else {
1512                                 # Don't interpret any other prefixes in preformatted text
1513                                 $prefixLength = 0;
1514                                 $pref = $pref2 = '';
1515                                 $t = $oLine;
1516                         }
1517
1518                         # List generation
1519                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1520                                 # Same as the last item, so no need to deal with nesting or opening stuff
1521                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1522                                 $paragraphStack = false;
1523
1524                                 if ( substr( $pref, -1 ) == ';') {
1525                                         # The one nasty exception: definition lists work like this:
1526                                         # ; title : definition text
1527                                         # So we check for : in the remainder text to split up the
1528                                         # title and definition, without b0rking links.
1529                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1530                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1531                                                 $term = $match[1];
1532                                                 $output .= $term . $this->nextItem( ':' );
1533                                                 $t = $match[2];
1534                                         }
1535                                 }
1536                         } elseif( $prefixLength || $lastPrefixLength ) {
1537                                 # Either open or close a level...
1538                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1539                                 $paragraphStack = false;
1540
1541                                 while( $commonPrefixLength < $lastPrefixLength ) {
1542                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1543                                         --$lastPrefixLength;
1544                                 }
1545                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1546                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1547                                 }
1548                                 while ( $prefixLength > $commonPrefixLength ) {
1549                                         $char = substr( $pref, $commonPrefixLength, 1 );
1550                                         $output .= $this->openList( $char );
1551
1552                                         if ( ';' == $char ) {
1553                                                 # FIXME: This is dupe of code above
1554                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1555                                                         $term = $match[1];
1556                                                         $output .= $term . $this->nextItem( ":" );
1557                                                         $t = $match[2];
1558                                                 }
1559                                         }
1560                                         ++$commonPrefixLength;
1561                                 }
1562                                 $lastPrefix = $pref2;
1563                         }
1564                         if( 0 == $prefixLength ) {
1565                                 # No prefix (not in list)--go to paragraph mode
1566                                 $uniq_prefix = UNIQ_PREFIX;
1567                                 // XXX: use a stack for nestable elements like span, table and div
1568                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1569                                 $closematch = preg_match(
1570                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1571                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1572                                 if ( $openmatch or $closematch ) {
1573                                         $paragraphStack = false;
1574                                         $output .= $this->closeParagraph();
1575                                         if($preOpenMatch and !$preCloseMatch) {
1576                                                 $this->mInPre = true;
1577                                         }
1578                                         if ( $closematch ) {
1579                                                 $inBlockElem = false;
1580                                         } else {
1581                                                 $inBlockElem = true;
1582                                         }
1583                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1584                                         if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1585                                                 // pre
1586                                                 if ($this->mLastSection != 'pre') {
1587                                                         $paragraphStack = false;
1588                                                         $output .= $this->closeParagraph().'<pre>';
1589                                                         $this->mLastSection = 'pre';
1590                                                 }
1591                                         } else {
1592                                                 // paragraph
1593                                                 if ( '' == trim($t) ) {
1594                                                         if ( $paragraphStack ) {
1595                                                                 $output .= $paragraphStack.'<br />';
1596                                                                 $paragraphStack = false;
1597                                                                 $this->mLastSection = 'p';
1598                                                         } else {
1599                                                                 if ($this->mLastSection != 'p' ) {
1600                                                                         $output .= $this->closeParagraph();
1601                                                                         $this->mLastSection = '';
1602                                                                         $paragraphStack = '<p>';
1603                                                                 } else {
1604                                                                         $paragraphStack = '</p><p>';
1605                                                                 }
1606                                                         }
1607                                                 } else {
1608                                                         if ( $paragraphStack ) {
1609                                                                 $output .= $paragraphStack;
1610                                                                 $paragraphStack = false;
1611                                                                 $this->mLastSection = 'p';
1612                                                         } else if ($this->mLastSection != 'p') {
1613                                                                 $output .= $this->closeParagraph().'<p>';
1614                                                                 $this->mLastSection = 'p';
1615                                                         }
1616                                                 }
1617                                         }
1618                                 }
1619                         }
1620                         if ($paragraphStack === false) {
1621                                 $output .= $t."\n";
1622                         }
1623                 }
1624                 while ( $prefixLength ) {
1625                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1626                         --$prefixLength;
1627                 }
1628                 if ( '' != $this->mLastSection ) {
1629                         $output .= '</' . $this->mLastSection . '>';
1630                         $this->mLastSection = '';
1631                 }
1632
1633                 wfProfileOut( $fname );
1634                 return $output;
1635         }
1636
1637         # Return value of a magic variable (like PAGENAME)
1638         function getVariableValue( $index ) {
1639                 global $wgLang, $wgSitename, $wgServer;
1640
1641                 switch ( $index ) {
1642                         case MAG_CURRENTMONTH:
1643                                 return $wgLang->formatNum( date( 'm' ) );
1644                         case MAG_CURRENTMONTHNAME:
1645                                 return $wgLang->getMonthName( date('n') );
1646                         case MAG_CURRENTMONTHNAMEGEN:
1647                                 return $wgLang->getMonthNameGen( date('n') );
1648                         case MAG_CURRENTDAY:
1649                                 return $wgLang->formatNum( date('j') );
1650                         case MAG_PAGENAME:
1651                                 return $this->mTitle->getText();
1652                         case MAG_PAGENAMEE:
1653                                 return $this->mTitle->getPartialURL();
1654                         case MAG_NAMESPACE:
1655                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1656                                 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1657                         case MAG_CURRENTDAYNAME:
1658                                 return $wgLang->getWeekdayName( date('w')+1 );
1659                         case MAG_CURRENTYEAR:
1660                                 return $wgLang->formatNum( date( 'Y' ) );
1661                         case MAG_CURRENTTIME:
1662                                 return $wgLang->time( wfTimestampNow(), false );
1663                         case MAG_NUMBEROFARTICLES:
1664                                 return $wgLang->formatNum( wfNumberOfArticles() );
1665                         case MAG_SITENAME:
1666                                 return $wgSitename;
1667                         case MAG_SERVER:
1668                                 return $wgServer;
1669                         default:
1670                                 return NULL;
1671                 }
1672         }
1673
1674         # initialise the magic variables (like CURRENTMONTHNAME)
1675         function initialiseVariables() {
1676                 global $wgVariableIDs;
1677                 $this->mVariables = array();
1678                 foreach ( $wgVariableIDs as $id ) {
1679                         $mw =& MagicWord::get( $id );
1680                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1681                 }
1682         }
1683
1684         /* private */ function replaceVariables( $text, $args = array() ) {
1685                 global $wgLang, $wgScript, $wgArticlePath;
1686
1687                 # Prevent too big inclusions
1688                 if(strlen($text)> MAX_INCLUDE_SIZE)
1689                 return $text;
1690
1691                 $fname = 'Parser::replaceVariables';
1692                 wfProfileIn( $fname );
1693
1694                 $bail = false;
1695                 $titleChars = Title::legalChars();
1696                 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1697
1698                 # This function is called recursively. To keep track of arguments we need a stack:
1699                 array_push( $this->mArgStack, $args );
1700
1701                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1702                 $GLOBALS['wgCurParser'] =& $this;
1703
1704
1705                 if ( $this->mOutputType == OT_HTML ) {
1706                         # Variable substitution
1707                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1708
1709                         # Argument substitution
1710                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1711                 }
1712                 # Template substitution
1713                 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1714                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1715
1716                 array_pop( $this->mArgStack );
1717
1718                 wfProfileOut( $fname );
1719                 return $text;
1720         }
1721
1722         function variableSubstitution( $matches ) {
1723                 if ( !$this->mVariables ) {
1724                         $this->initialiseVariables();
1725                 }
1726                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1727                         $text = $this->mVariables[$matches[1]];
1728                         $this->mOutput->mContainsOldMagic = true;
1729                 } else {
1730                         $text = $matches[0];
1731                 }
1732                 return $text;
1733         }
1734
1735         # Split template arguments
1736         function getTemplateArgs( $argsString ) {
1737                 if ( $argsString === '' ) {
1738                         return array();
1739                 }
1740
1741                 $args = explode( '|', substr( $argsString, 1 ) );
1742
1743                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1744                 # merged with the next arg because the '|' character between belongs
1745                 # to the link syntax and not the template parameter syntax.
1746                 $argc = count($args);
1747                 $i = 0;
1748                 for ( $i = 0; $i < $argc-1; $i++ ) {
1749                         if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1750                                 $args[$i] .= "|".$args[$i+1];
1751                                 array_splice($args, $i+1, 1);
1752                                 $i--;
1753                                 $argc--;
1754                         }
1755                 }
1756
1757                 return $args;
1758         }
1759
1760         function braceSubstitution( $matches ) {
1761                 global $wgLinkCache, $wgLang;
1762                 $fname = 'Parser::braceSubstitution';
1763                 $found = false;
1764                 $nowiki = false;
1765                 $noparse = false;
1766
1767                 $title = NULL;
1768
1769                 # $newline is an optional newline character before the braces
1770                 # $part1 is the bit before the first |, and must contain only title characters
1771                 # $args is a list of arguments, starting from index 0, not including $part1
1772
1773                 $newline = $matches[1];
1774                 $part1 = $matches[2];
1775                 # If the third subpattern matched anything, it will start with |
1776
1777                 $args = $this->getTemplateArgs($matches[3]);
1778                 $argc = count( $args );
1779
1780                 # {{{}}}
1781                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1782                         $text = $matches[0];
1783                         $found = true;
1784                         $noparse = true;
1785                 }
1786
1787                 # SUBST
1788                 if ( !$found ) {
1789                         $mwSubst =& MagicWord::get( MAG_SUBST );
1790                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1791                                 if ( $this->mOutputType != OT_WIKI ) {
1792                                         # Invalid SUBST not replaced at PST time
1793                                         # Return without further processing
1794                                         $text = $matches[0];
1795                                         $found = true;
1796                                         $noparse= true;
1797                                 }
1798                         } elseif ( $this->mOutputType == OT_WIKI ) {
1799                                 # SUBST not found in PST pass, do nothing
1800                                 $text = $matches[0];
1801                                 $found = true;
1802                         }
1803                 }
1804
1805                 # MSG, MSGNW and INT
1806                 if ( !$found ) {
1807                         # Check for MSGNW:
1808                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1809                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1810                                 $nowiki = true;
1811                         } else {
1812                                 # Remove obsolete MSG:
1813                                 $mwMsg =& MagicWord::get( MAG_MSG );
1814                                 $mwMsg->matchStartAndRemove( $part1 );
1815                         }
1816
1817                         # Check if it is an internal message
1818                         $mwInt =& MagicWord::get( MAG_INT );
1819                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1820                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1821                                         $text = wfMsgReal( $part1, $args, true );
1822                                         $found = true;
1823                                 }
1824                         }
1825                 }
1826
1827                 # NS
1828                 if ( !$found ) {
1829                         # Check for NS: (namespace expansion)
1830                         $mwNs = MagicWord::get( MAG_NS );
1831                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1832                                 if ( intval( $part1 ) ) {
1833                                         $text = $wgLang->getNsText( intval( $part1 ) );
1834                                         $found = true;
1835                                 } else {
1836                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1837                                         if ( !is_null( $index ) ) {
1838                                                 $text = $wgLang->getNsText( $index );
1839                                                 $found = true;
1840                                         }
1841                                 }
1842                         }
1843                 }
1844
1845                 # LOCALURL and LOCALURLE
1846                 if ( !$found ) {
1847                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1848                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1849
1850                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1851                                 $func = 'getLocalURL';
1852                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1853                                 $func = 'escapeLocalURL';
1854                         } else {
1855                                 $func = '';
1856                         }
1857
1858                         if ( $func !== '' ) {
1859                                 $title = Title::newFromText( $part1 );
1860                                 if ( !is_null( $title ) ) {
1861                                         if ( $argc > 0 ) {
1862                                                 $text = $title->$func( $args[0] );
1863                                         } else {
1864                                                 $text = $title->$func();
1865                                         }
1866                                         $found = true;
1867                                 }
1868                         }
1869                 }
1870
1871                 # Internal variables
1872                 if ( !$this->mVariables ) {
1873                         $this->initialiseVariables();
1874                 }
1875                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1876                         $text = $this->mVariables[$part1];
1877                         $found = true;
1878                         $this->mOutput->mContainsOldMagic = true;
1879                 }
1880
1881                 # Template table test
1882
1883                 # Did we encounter this template already? If yes, it is in the cache
1884                 # and we need to check for loops.
1885                 if ( isset( $this->mTemplates[$part1] ) ) {
1886                         # Infinite loop test
1887                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1888                                 $noparse = true;
1889                                 $found = true;
1890                         }
1891                         # set $text to cached message.
1892                         $text = $this->mTemplates[$part1];
1893                         $found = true;
1894                 }
1895
1896                 # Load from database
1897                 if ( !$found ) {
1898                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1899                         if ( !is_null( $title ) && !$title->isExternal() ) {
1900                                 # Check for excessive inclusion
1901                                 $dbk = $title->getPrefixedDBkey();
1902                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1903                                         # This should never be reached.
1904                                         $article = new Article( $title );
1905                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1906                                         if ( $articleContent !== false ) {
1907                                                 $found = true;
1908                                                 $text = $articleContent;
1909                                         }
1910                                 }
1911
1912                                 # If the title is valid but undisplayable, make a link to it
1913                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1914                                         $text = '[['.$title->getPrefixedText().']]';
1915                                         $found = true;
1916                                 }
1917
1918                                 # Template cache array insertion
1919                                 $this->mTemplates[$part1] = $text;
1920                         }
1921                 }
1922
1923                 # Recursive parsing, escaping and link table handling
1924                 # Only for HTML output
1925                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1926                         $text = wfEscapeWikiText( $text );
1927                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1928                         # Clean up argument array
1929                         $assocArgs = array();
1930                         $index = 1;
1931                         foreach( $args as $arg ) {
1932                                 $eqpos = strpos( $arg, '=' );
1933                                 if ( $eqpos === false ) {
1934                                         $assocArgs[$index++] = $arg;
1935                                 } else {
1936                                         $name = trim( substr( $arg, 0, $eqpos ) );
1937                                         $value = trim( substr( $arg, $eqpos+1 ) );
1938                                         if ( $value === false ) {
1939                                                 $value = '';
1940                                         }
1941                                         if ( $name !== false ) {
1942                                                 $assocArgs[$name] = $value;
1943                                         }
1944                                 }
1945                         }
1946
1947                         # Do not enter included links in link table
1948                         if ( !is_null( $title ) ) {
1949                                 $wgLinkCache->suspend();
1950                         }
1951
1952                         # Add a new element to the templace recursion path
1953                         $this->mTemplatePath[$part1] = 1;
1954
1955                         $text = $this->stripParse( $text, $newline, $assocArgs );
1956
1957                         # Resume the link cache and register the inclusion as a link
1958                         if ( !is_null( $title ) ) {
1959                                 $wgLinkCache->resume();
1960                                 $wgLinkCache->addLinkObj( $title );
1961                         }
1962                 }
1963                                 # Empties the template path
1964                                 $this->mTemplatePath = array();
1965
1966                 if ( !$found ) {
1967                         return $matches[0];
1968                 } else {
1969                         return $text;
1970                 }
1971         }
1972
1973         # Triple brace replacement -- used for template arguments
1974         function argSubstitution( $matches ) {
1975                 $newline = $matches[1];
1976                 $arg = trim( $matches[2] );
1977                 $text = $matches[0];
1978                 $inputArgs = end( $this->mArgStack );
1979
1980                 if ( array_key_exists( $arg, $inputArgs ) ) {
1981                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1982                 }
1983
1984                 return $text;
1985         }
1986
1987         # Returns true if the function is allowed to include this entity
1988         function incrementIncludeCount( $dbk ) {
1989                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1990                         $this->mIncludeCount[$dbk] = 0;
1991                 }
1992                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1993                         return true;
1994                 } else {
1995                         return false;
1996                 }
1997         }
1998
1999
2000         # Cleans up HTML, removes dangerous tags and attributes
2001         /* private */ function removeHTMLtags( $text ) {
2002                 global $wgUseTidy, $wgUserHtml;
2003                 $fname = 'Parser::removeHTMLtags';
2004                 wfProfileIn( $fname );
2005
2006                 if( $wgUserHtml ) {
2007                         $htmlpairs = array( # Tags that must be closed
2008                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2009                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2010                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
2011                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2012                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
2013                         );
2014                         $htmlsingle = array(
2015                                 'br', 'hr', 'li', 'dt', 'dd'
2016                         );
2017                         $htmlnest = array( # Tags that can be nested--??
2018                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2019                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
2020                         );
2021                         $tabletags = array( # Can only appear inside table
2022                                 'td', 'th', 'tr'
2023                         );
2024                 } else {
2025                         $htmlpairs = array();
2026                         $htmlsingle = array();
2027                         $htmlnest = array();
2028                         $tabletags = array();
2029                 }
2030
2031                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2032                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2033
2034                 $htmlattrs = $this->getHTMLattrs () ;
2035
2036                 # Remove HTML comments
2037                 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2038
2039                 $bits = explode( '<', $text );
2040                 $text = array_shift( $bits );
2041                 if(!$wgUseTidy) {
2042                         $tagstack = array(); $tablestack = array();
2043                         foreach ( $bits as $x ) {
2044                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2045                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2046                                 $x, $regs );
2047                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2048                                 error_reporting( $prev );
2049
2050                                 $badtag = 0 ;
2051                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2052                                         # Check our stack
2053                                         if ( $slash ) {
2054                                                 # Closing a tag...
2055                                                 if ( ! in_array( $t, $htmlsingle ) &&
2056                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2057                                                         @array_push( $tagstack, $ot );
2058                                                         $badtag = 1;
2059                                                 } else {
2060                                                         if ( $t == 'table' ) {
2061                                                                 $tagstack = array_pop( $tablestack );
2062                                                         }
2063                                                         $newparams = '';
2064                                                 }
2065                                         } else {
2066                                                 # Keep track for later
2067                                                 if ( in_array( $t, $tabletags ) &&
2068                                                 ! in_array( 'table', $tagstack ) ) {
2069                                                         $badtag = 1;
2070                                                 } else if ( in_array( $t, $tagstack ) &&
2071                                                 ! in_array ( $t , $htmlnest ) ) {
2072                                                         $badtag = 1 ;
2073                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
2074                                                         if ( $t == 'table' ) {
2075                                                                 array_push( $tablestack, $tagstack );
2076                                                                 $tagstack = array();
2077                                                         }
2078                                                         array_push( $tagstack, $t );
2079                                                 }
2080                                                 # Strip non-approved attributes from the tag
2081                                                 $newparams = $this->fixTagAttributes($params);
2082
2083                                         }
2084                                         if ( ! $badtag ) {
2085                                                 $rest = str_replace( '>', '&gt;', $rest );
2086                                                 $text .= "<$slash$t $newparams$brace$rest";
2087                                                 continue;
2088                                         }
2089                                 }
2090                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2091                         }
2092                         # Close off any remaining tags
2093                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2094                                 $text .= "</$t>\n";
2095                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2096                         }
2097                 } else {
2098                         # this might be possible using tidy itself
2099                         foreach ( $bits as $x ) {
2100                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2101                                 $x, $regs );
2102                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2103                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2104                                         $newparams = $this->fixTagAttributes($params);
2105                                         $rest = str_replace( '>', '&gt;', $rest );
2106                                         $text .= "<$slash$t $newparams$brace$rest";
2107                                 } else {
2108                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2109                                 }
2110                         }
2111                 }
2112                 wfProfileOut( $fname );
2113                 return $text;
2114         }
2115
2116
2117         # This function accomplishes several tasks:
2118         # 1) Auto-number headings if that option is enabled
2119         # 2) Add an [edit] link to sections for logged in users who have enabled the option
2120         # 3) Add a Table of contents on the top for users who have enabled the option
2121         # 4) Auto-anchor headings
2122         #
2123         # It loops through all headlines, collects the necessary data, then splits up the
2124         # string and re-inserts the newly formatted headlines.
2125         /* private */ function formatHeadings( $text, $isMain=true ) {
2126                 global $wgInputEncoding, $wgMaxTocLevel;
2127
2128                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2129                 $doShowToc = $this->mOptions->getShowToc();
2130                 $forceTocHere = false;
2131                 if( !$this->mTitle->userCanEdit() ) {
2132                         $showEditLink = 0;
2133                         $rightClickHack = 0;
2134                 } else {
2135                         $showEditLink = $this->mOptions->getEditSection();
2136                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2137                 }
2138
2139                 # Inhibit editsection links if requested in the page
2140                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2141                 if( $esw->matchAndRemove( $text ) ) {
2142                         $showEditLink = 0;
2143                 }
2144                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2145                 # do not add TOC
2146                 $mw =& MagicWord::get( MAG_NOTOC );
2147                 if( $mw->matchAndRemove( $text ) ) {
2148                         $doShowToc = 0;
2149                 }
2150
2151                 # never add the TOC to the Main Page. This is an entry page that should not
2152                 # be more than 1-2 screens large anyway
2153                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2154                         $doShowToc = 0;
2155                 }
2156
2157                 # Get all headlines for numbering them and adding funky stuff like [edit]
2158                 # links - this is for later, but we need the number of headlines right now
2159                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2160
2161                 # if there are fewer than 4 headlines in the article, do not show TOC
2162                 if( $numMatches < 4 ) {
2163                         $doShowToc = 0;
2164                 }
2165
2166                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2167                 # override above conditions and always show TOC at that place
2168                 $mw =& MagicWord::get( MAG_TOC );
2169                 if ($mw->match( $text ) ) {
2170                         $doShowToc = 1;
2171                         $forceTocHere = true;
2172                 } else {
2173                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2174                         # override above conditions and always show TOC above first header
2175                         $mw =& MagicWord::get( MAG_FORCETOC );
2176                         if ($mw->matchAndRemove( $text ) ) {
2177                                 $doShowToc = 1;
2178                         }
2179                 }
2180
2181
2182
2183                 # We need this to perform operations on the HTML
2184                 $sk =& $this->mOptions->getSkin();
2185
2186                 # headline counter
2187                 $headlineCount = 0;
2188
2189                 # Ugh .. the TOC should have neat indentation levels which can be
2190                 # passed to the skin functions. These are determined here
2191                 $toclevel = 0;
2192                 $toc = '';
2193                 $full = '';
2194                 $head = array();
2195                 $sublevelCount = array();
2196                 $level = 0;
2197                 $prevlevel = 0;
2198                 foreach( $matches[3] as $headline ) {
2199                         $numbering = '';
2200                         if( $level ) {
2201                                 $prevlevel = $level;
2202                         }
2203                         $level = $matches[1][$headlineCount];
2204                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2205                                 # reset when we enter a new level
2206                                 $sublevelCount[$level] = 0;
2207                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2208                                 $toclevel += $level - $prevlevel;
2209                         }
2210                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2211                                 # reset when we step back a level
2212                                 $sublevelCount[$level+1]=0;
2213                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2214                                 $toclevel -= $prevlevel - $level;
2215                         }
2216                         # count number of headlines for each level
2217                         @$sublevelCount[$level]++;
2218                         if( $doNumberHeadings || $doShowToc ) {
2219                                 $dot = 0;
2220                                 for( $i = 1; $i <= $level; $i++ ) {
2221                                         if( !empty( $sublevelCount[$i] ) ) {
2222                                                 if( $dot ) {
2223                                                         $numbering .= '.';
2224                                                 }
2225                                                 $numbering .= $sublevelCount[$i];
2226                                                 $dot = 1;
2227                                         }
2228                                 }
2229                         }
2230
2231                         # The canonized header is a version of the header text safe to use for links
2232                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2233                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2234                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2235
2236                         # strip out HTML
2237                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2238                         $tocline = trim( $canonized_headline );
2239                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2240                         $replacearray = array(
2241                                 '%3A' => ':',
2242                                 '%' => '.'
2243                         );
2244                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2245                         $refer[$headlineCount] = $canonized_headline;
2246
2247                         # count how many in assoc. array so we can track dupes in anchors
2248                         @$refers[$canonized_headline]++;
2249                         $refcount[$headlineCount]=$refers[$canonized_headline];
2250
2251                         # Prepend the number to the heading text
2252
2253                         if( $doNumberHeadings || $doShowToc ) {
2254                                 $tocline = $numbering . ' ' . $tocline;
2255
2256                                 # Don't number the heading if it is the only one (looks silly)
2257                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2258                                         # the two are different if the line contains a link
2259                                         $headline=$numbering . ' ' . $headline;
2260                                 }
2261                         }
2262
2263                         # Create the anchor for linking from the TOC to the section
2264                         $anchor = $canonized_headline;
2265                         if($refcount[$headlineCount] > 1 ) {
2266                                 $anchor .= '_' . $refcount[$headlineCount];
2267                         }
2268                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2269                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2270                         }
2271                         if( $showEditLink ) {
2272                                 if ( empty( $head[$headlineCount] ) ) {
2273                                         $head[$headlineCount] = '';
2274                                 }
2275                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2276                         }
2277
2278                         # Add the edit section span
2279                         if( $rightClickHack ) {
2280                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2281                         }
2282
2283                         # give headline the correct <h#> tag
2284                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2285
2286                         $headlineCount++;
2287                 }
2288
2289                 if( $doShowToc ) {
2290                         $toclines = $headlineCount;
2291                         $toc .= $sk->tocUnindent( $toclevel );
2292                         $toc = $sk->tocTable( $toc );
2293                 }
2294
2295                 # split up and insert constructed headlines
2296
2297                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2298                 $i = 0;
2299
2300                 foreach( $blocks as $block ) {
2301                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2302                                 # This is the [edit] link that appears for the top block of text when
2303                                 # section editing is enabled
2304
2305                                 # Disabled because it broke block formatting
2306                                 # For example, a bullet point in the top line
2307                                 # $full .= $sk->editSectionLink(0);
2308                         }
2309                         $full .= $block;
2310                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2311                         # Top anchor now in skin
2312                                 $full = $full.$toc;
2313                         }
2314
2315                         if( !empty( $head[$i] ) ) {
2316                                 $full .= $head[$i];
2317                         }
2318                         $i++;
2319                 }
2320                 if($forceTocHere) {
2321                         $mw =& MagicWord::get( MAG_TOC );
2322                         return $mw->replace( $toc, $full );
2323                 } else {
2324                         return $full;
2325                 }
2326         }
2327
2328         # Return an HTML link for the "ISBN 123456" text
2329         /* private */ function magicISBN( $text ) {
2330                 global $wgLang;
2331                 $fname = 'Parser::magicISBN';
2332                 wfProfileIn( $fname );
2333
2334                 $a = split( 'ISBN ', " $text" );
2335                 if ( count ( $a ) < 2 ) {
2336                         wfProfileOut( $fname );
2337                         return $text;
2338                 }
2339                 $text = substr( array_shift( $a ), 1);
2340                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2341
2342                 foreach ( $a as $x ) {
2343                         $isbn = $blank = '' ;
2344                         while ( ' ' == $x{0} ) {
2345                                 $blank .= ' ';
2346                                 $x = substr( $x, 1 );
2347                         }
2348                         while ( strstr( $valid, $x{0} ) != false ) {
2349                                 $isbn .= $x{0};
2350                                 $x = substr( $x, 1 );
2351                         }
2352                         $num = str_replace( '-', '', $isbn );
2353                         $num = str_replace( ' ', '', $num );
2354
2355                         if ( '' == $num ) {
2356                                 $text .= "ISBN $blank$x";
2357                         } else {
2358                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2359                                 $text .= '<a href="' .
2360                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2361                                         "\" class=\"internal\">ISBN $isbn</a>";
2362                                 $text .= $x;
2363                         }
2364                 }
2365                 wfProfileOut( $fname );
2366                 return $text;
2367         }
2368
2369         # Return an HTML link for the "GEO ..." text
2370         /* private */ function magicGEO( $text ) {
2371                 global $wgLang, $wgUseGeoMode;
2372                 $fname = 'Parser::magicGEO';
2373                 wfProfileIn( $fname );
2374
2375                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2376                 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2377                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2378                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2379                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2380                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2381
2382                 $a = split( 'GEO ', " $text" );
2383                 if ( count ( $a ) < 2 ) {
2384                         wfProfileOut( $fname );
2385                         return $text;
2386                 }
2387                 $text = substr( array_shift( $a ), 1);
2388                 $valid = '0123456789.+-:';
2389
2390                 foreach ( $a as $x ) {
2391                         $geo = $blank = '' ;
2392                         while ( ' ' == $x{0} ) {
2393                                 $blank .= ' ';
2394                                 $x = substr( $x, 1 );
2395                         }
2396                         while ( strstr( $valid, $x{0} ) != false ) {
2397                                 $geo .= $x{0};
2398                                 $x = substr( $x, 1 );
2399                         }
2400                         $num = str_replace( '+', '', $geo );
2401                         $num = str_replace( ' ', '', $num );
2402
2403                         if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2404                                 $text .= "GEO $blank$x";
2405                         } else {
2406                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2407                                 $text .= '<a href="' .
2408                                 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2409                                         "\" class=\"internal\">GEO $geo</a>";
2410                                 $text .= $x;
2411                         }
2412                 }
2413                 wfProfileOut( $fname );
2414                 return $text;
2415         }
2416
2417         # Return an HTML link for the "RFC 1234" text
2418         /* private */ function magicRFC( $text ) {
2419                 global $wgLang;
2420
2421                 $a = split( 'RFC ', ' '.$text );
2422                 if ( count ( $a ) < 2 ) return $text;
2423                 $text = substr( array_shift( $a ), 1);
2424                 $valid = '0123456789';
2425
2426                 foreach ( $a as $x ) {
2427                         $rfc = $blank = '' ;
2428                         while ( ' ' == $x{0} ) {
2429                                 $blank .= ' ';
2430                                 $x = substr( $x, 1 );
2431                         }
2432                         while ( strstr( $valid, $x{0} ) != false ) {
2433                                 $rfc .= $x{0};
2434                                 $x = substr( $x, 1 );
2435                         }
2436
2437                         if ( '' == $rfc ) {
2438                                 $text .= "RFC $blank$x";
2439                         } else {
2440                                 $url = wfmsg( 'rfcurl' );
2441                                 $url = str_replace( '$1', $rfc, $url);
2442                                 $sk =& $this->mOptions->getSkin();
2443                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2444                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2445                         }
2446                 }
2447                 return $text;
2448         }
2449
2450         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2451                 $this->mOptions = $options;
2452                 $this->mTitle =& $title;
2453                 $this->mOutputType = OT_WIKI;
2454
2455                 if ( $clearState ) {
2456                         $this->clearState();
2457                 }
2458
2459                 $stripState = false;
2460                 $pairs = array(
2461                         "\r\n" => "\n",
2462                         );
2463                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2464                 // now with regexes
2465                 /*
2466                 $pairs = array(
2467                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2468                         "/<br *?>/i" => "<br />",
2469                 );
2470                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2471                 */
2472                 $text = $this->strip( $text, $stripState, false );
2473                 $text = $this->pstPass2( $text, $user );
2474                 $text = $this->unstrip( $text, $stripState );
2475                 $text = $this->unstripNoWiki( $text, $stripState );
2476                 return $text;
2477         }
2478
2479         /* private */ function pstPass2( $text, &$user ) {
2480                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2481
2482                 # Variable replacement
2483                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2484                 $text = $this->replaceVariables( $text );
2485
2486                 # Signatures
2487                 #
2488                 $n = $user->getName();
2489                 $k = $user->getOption( 'nickname' );
2490                 if ( '' == $k ) { $k = $n; }
2491                 if(isset($wgLocaltimezone)) {
2492                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2493                 }
2494                 /* Note: this is an ugly timezone hack for the European wikis */
2495                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2496                   ' (' . date( 'T' ) . ')';
2497                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2498
2499                 $text = preg_replace( '/~~~~~/', $d, $text );
2500                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2501                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2502
2503                 # Context links: [[|name]] and [[name (context)|]]
2504                 #
2505                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2506                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2507                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2508                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2509
2510                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2511                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2512                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
2513                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2514                                                                                                                 # [[ns:page (cont)|]]
2515                 $context = '';
2516                 $t = $this->mTitle->getText();
2517                 if ( preg_match( $conpat, $t, $m ) ) {
2518                         $context = $m[2];
2519                 }
2520                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2521                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2522                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2523
2524                 if ( '' == $context ) {
2525                         $text = preg_replace( $p2, '[[\\1]]', $text );
2526                 } else {
2527                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2528                 }
2529
2530                 /*
2531                 $mw =& MagicWord::get( MAG_SUBST );
2532                 $wgCurParser = $this->fork();
2533                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2534                 $this->merge( $wgCurParser );
2535                 */
2536
2537                 # Trim trailing whitespace
2538                 # MAG_END (__END__) tag allows for trailing
2539                 # whitespace to be deliberately included
2540                 $text = rtrim( $text );
2541                 $mw =& MagicWord::get( MAG_END );
2542                 $mw->matchAndRemove( $text );
2543
2544                 return $text;
2545         }
2546
2547         # Set up some variables which are usually set up in parse()
2548         # so that an external function can call some class members with confidence
2549         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2550                 $this->mTitle =& $title;
2551                 $this->mOptions = $options;
2552                 $this->mOutputType = $outputType;
2553                 if ( $clearState ) {
2554                         $this->clearState();
2555                 }
2556         }
2557
2558         function transformMsg( $text, $options ) {
2559                 global $wgTitle;
2560                 static $executing = false;
2561
2562                 # Guard against infinite recursion
2563                 if ( $executing ) {
2564                         return $text;
2565                 }
2566                 $executing = true;
2567
2568                 $this->mTitle = $wgTitle;
2569                 $this->mOptions = $options;
2570                 $this->mOutputType = OT_MSG;
2571                 $this->clearState();
2572                 $text = $this->replaceVariables( $text );
2573
2574                 $executing = false;
2575                 return $text;
2576         }
2577
2578         # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2579         # Callback will be called with the text within
2580         # Transform and return the text within
2581         function setHook( $tag, $callback ) {
2582                 $oldVal = @$this->mTagHooks[$tag];
2583                 $this->mTagHooks[$tag] = $callback;
2584                 return $oldVal;
2585         }
2586 }
2587
2588 class ParserOutput
2589 {
2590         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2591         var $mCacheTime; # Used in ParserCache
2592
2593         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2594                 $containsOldMagic = false )
2595         {
2596                 $this->mText = $text;
2597                 $this->mLanguageLinks = $languageLinks;
2598                 $this->mCategoryLinks = $categoryLinks;
2599                 $this->mContainsOldMagic = $containsOldMagic;
2600                 $this->mCacheTime = "";
2601         }
2602
2603         function getText() { return $this->mText; }
2604         function getLanguageLinks() { return $this->mLanguageLinks; }
2605         function getCategoryLinks() { return $this->mCategoryLinks; }
2606         function getCacheTime() { return $this->mCacheTime; }
2607         function containsOldMagic() { return $this->mContainsOldMagic; }
2608         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2609         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2610         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2611         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2612         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2613
2614         function merge( $other ) {
2615                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2616                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2617                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2618         }
2619
2620 }
2621
2622 class ParserOptions
2623 {
2624         # All variables are private
2625         var $mUseTeX;                    # Use texvc to expand <math> tags
2626         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2627         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2628         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2629         var $mAllowExternalImages;       # Allow external images inline
2630         var $mSkin;                      # Reference to the preferred skin
2631         var $mDateFormat;                # Date format index
2632         var $mEditSection;               # Create "edit section" links
2633         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2634         var $mNumberHeadings;            # Automatically number headings
2635         var $mShowToc;                   # Show table of contents
2636
2637         function getUseTeX()                        { return $this->mUseTeX; }
2638         function getUseCategoryMagic()              { return $this->mUseCategoryMagic; }
2639         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2640         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2641         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2642         function getSkin()                          { return $this->mSkin; }
2643         function getDateFormat()                    { return $this->mDateFormat; }
2644         function getEditSection()                   { return $this->mEditSection; }
2645         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2646         function getNumberHeadings()                { return $this->mNumberHeadings; }
2647         function getShowToc()                       { return $this->mShowToc; }
2648
2649         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2650         function setUseCategoryMagic( $x )          { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2651         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2652         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2653         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2654         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2655         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2656         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2657         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2658         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2659
2660         function setSkin( &$x ) { $this->mSkin =& $x; }
2661
2662         # Get parser options
2663         /* static */ function newFromUser( &$user ) {
2664                 $popts = new ParserOptions;
2665                 $popts->initialiseFromUser( $user );
2666                 return $popts;
2667         }
2668
2669         # Get user options
2670         function initialiseFromUser( &$userInput ) {
2671                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2672
2673                 $fname = "ParserOptions::initialiseFromUser";
2674                 wfProfileIn( $fname );
2675                 if ( !$userInput ) {
2676                         $user = new User;
2677                         $user->setLoaded( true );
2678                 } else {
2679                         $user =& $userInput;
2680                 }
2681
2682                 $this->mUseTeX = $wgUseTeX;
2683                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2684                 $this->mUseDynamicDates = $wgUseDynamicDates;
2685                 $this->mInterwikiMagic = $wgInterwikiMagic;
2686                 $this->mAllowExternalImages = $wgAllowExternalImages;
2687                 wfProfileIn( "$fname-skin" );
2688                 $this->mSkin =& $user->getSkin();
2689                 wfProfileOut( "$fname-skin" );
2690                 $this->mDateFormat = $user->getOption( 'date' );
2691                 $this->mEditSection = $user->getOption( 'editsection' );
2692                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2693                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2694                 $this->mShowToc = $user->getOption( 'showtoc' );
2695                 wfProfileOut( $fname );
2696         }
2697
2698
2699 }
2700
2701 # Regex callbacks, used in Parser::replaceVariables
2702 function wfBraceSubstitution( $matches ) {
2703         global $wgCurParser;
2704         return $wgCurParser->braceSubstitution( $matches );
2705 }
2706
2707 function wfArgSubstitution( $matches ) {
2708         global $wgCurParser;
2709         return $wgCurParser->argSubstitution( $matches );
2710 }
2711
2712 function wfVariableSubstitution( $matches ) {
2713         global $wgCurParser;
2714         return $wgCurParser->variableSubstitution( $matches );
2715 }
2716
2717 # Return the total number of articles
2718 function wfNumberOfArticles() {
2719         global $wgNumberOfArticles;
2720
2721         wfLoadSiteStats();
2722         return $wgNumberOfArticles;
2723 }
2724
2725 # Get various statistics from the database
2726 /* private */ function wfLoadSiteStats() {
2727         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2728         $fname = 'wfLoadSiteStats';
2729
2730         if ( -1 != $wgNumberOfArticles ) return;
2731         $dbr =& wfGetDB( DB_SLAVE );
2732         $s = $dbr->getArray( 'site_stats',
2733                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2734                 array( 'ss_row_id' => 1 ), $fname
2735         );
2736
2737         if ( $s === false ) {
2738                 return;
2739         } else {
2740                 $wgTotalViews = $s->ss_total_views;
2741                 $wgTotalEdits = $s->ss_total_edits;
2742                 $wgNumberOfArticles = $s->ss_good_articles;
2743         }
2744 }
2745
2746 function wfEscapeHTMLTagsOnly( $in ) {
2747         return str_replace(
2748                 array( '"', '>', '<' ),
2749                 array( '&quot;', '&gt;', '&lt;' ),
2750                 $in );
2751 }
2752
2753
2754 ?>