restore section folding

[lhc/web/wiklou.git] / includes / Parser.php
diff --git a/includes/Parser.php b/includes/Parser.php

index 5594f9e..dfba14d 100644 (file)
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -1,25 +1,28 @@
  <?php
  
-include_once('Tokenizer.php');
+require_once('Tokenizer.php');
  
  if( $GLOBALS['wgUseWikiHiero'] ){
-       include_once('wikihiero.php');
+       require_once('extensions/wikihiero/wikihiero.php');
+}
+if( $GLOBALS['wgUseTimeline'] ){
+       require_once('extensions/timeline/Timeline.php');
  }
  
-# PHP Parser 
-# 
+# PHP Parser
+#
  # Processes wiki markup
  #
-# There are two main entry points into the Parser class: parse() and preSaveTransform(). 
+# There are two main entry points into the Parser class: parse() and preSaveTransform().
  # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  #
-# Globals used: 
+# Globals used:
  #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  #
  # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  #
  #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
-#               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*, 
+#               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  #               $wgLocaltimezone
  #
  #      * only within ParserOptions
@@ -29,8 +32,8 @@ if( $GLOBALS['wgUseWikiHiero'] ){
  #    Variable substitution O(N^2) attack
  #-----------------------------------------
  # Without countermeasures, it would be possible to attack the parser by saving a page
-# filled with a large number of inclusions of large pages. The size of the generated 
-# page would be proportional to the square of the input size. Hence, we limit the number 
+# filled with a large number of inclusions of large pages. The size of the generated
+# page would be proportional to the square of the input size. Hence, we limit the number
  # of inclusions of any given page, thus bringing any attack back to O(N).
  #
  
@@ -41,14 +44,20 @@ define( "OT_HTML", 1 );
  define( "OT_WIKI", 2 );
  define( "OT_MSG", 3 );
  
+# string parameter for extractTags which will cause it
+# to strip HTML comments in addition to regular
+# <XML>-style tags. This should not be anything we
+# may want to use in wikisyntax
+define( "STRIP_COMMENTS", "HTMLCommentStrip" );
+
  # prefix for escaping, used in two functions at least
  define( "UNIQ_PREFIX", "NaodW29");
  
  class Parser
  {
         # Cleared with clearState():
-       var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
-       var $mVariables, $mIncludeCount, $mArgStack;
+       var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
+       var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  
         # Temporary:
         var $mOptions, $mTitle, $mOutputType;
@@ -69,7 +78,7 @@ class Parser
                 $this->mStripState = array();
                 $this->mArgStack = array();
         }
-       
+
         # First pass--just handle <nowiki> sections, pass the rest off
         # to internalParse() which does all the real work.
         #
@@ -77,35 +86,46 @@ class Parser
         #
         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
         {
+               global $wgUseTidy;
                 $fname = "Parser::parse";
                 wfProfileIn( $fname );
  
                 if ( $clearState ) {
                         $this->clearState();
                 }
-               
+
                 $this->mOptions = $options;
                 $this->mTitle =& $title;
                 $this->mOutputType = OT_HTML;
-               
+
                 $stripState = NULL;
                 $text = $this->strip( $text, $this->mStripState );
                 $text = $this->internalParse( $text, $linestart );
-               # only once and next-to-last
-               $text = $this->doBlockLevels( $text, $linestart );              
                 $text = $this->unstrip( $text, $this->mStripState );
-               # Clean up special characters, only run once and last
-               $fixtags = array(
-                       "/<hr *>/i" => '<hr/>',
-                       "/<br *>/i" => '<br/>', 
-                       "/<center *>/i"=>'<span style="text-align:center;">',
-                       "/<\\/center *>/i" => '</span>',
-                       # Clean up spare ampersands; note that we probably ought to be
-                       # more careful about named entities.
-                       '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
-               );
-               $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
-               
+               # Clean up special characters, only run once, next-to-last before doBlockLevels
+               if(!$wgUseTidy) {
+                       $fixtags = array(
+                               "/<hr *>/i" => '<hr/>',
+                               "/<br *>/i" => '<br/>',
+                               "/<center *>/i"=>'<div class="center">',
+                               "/<\\/center *>/i" => '</div>',
+                               # Clean up spare ampersands; note that we probably ought to be
+                               # more careful about named entities.
+                               '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
+                       );
+                       $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+               } else {
+                       $fixtags = array(
+                               "/<center *>/i"=>'<div class="center">',
+                               "/<\\/center *>/i" => '</div>'
+                       );
+                       $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+               }
+               # only once and last
+               $text = $this->doBlockLevels( $text, $linestart );
+               if($wgUseTidy) {
+                       $text = $this->tidy($text);
+               }
                 $this->mOutput->setText( $text );
                 wfProfileOut( $fname );
                 return $this->mOutput;
@@ -116,13 +136,16 @@ class Parser
                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
         }
  
-       # Replaces all occurences of <$tag>content</$tag> in the text
+       # Replaces all occurrences of <$tag>content</$tag> in the text
         # with a random marker and returns the new text. the output parameter
         # $content will be an associative array filled with data on the form
         # $unique_marker => content.
  
         # If $content is already set, the additional entries will be appended
  
+       # If $tag is set to STRIP_COMMENTS, the function will extract
+       # <!-- HTML comments -->
+
         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
                 if ( !$content ) {
@@ -132,12 +155,20 @@ class Parser
                 $stripped = "";
  
                 while ( "" != $text ) {
-                       $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+                       if($tag==STRIP_COMMENTS) {
+                               $p = preg_split( "/<!--/i", $text, 2 );
+                       } else {
+                               $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+                       }
                         $stripped .= $p[0];
-                       if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { 
-                               $text = ""; 
+                       if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
+                               $text = "";
                         } else {
-                               $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+                               if($tag==STRIP_COMMENTS) {
+                                       $q = preg_split( "/-->/i", $p[1], 2 );
+                               } else {
+                                       $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+                               }
                                 $marker = $rnd . sprintf("%08X", $n++);
                                 $content[$marker] = $q[0];
                                 $stripped .= $marker;
@@ -145,20 +176,25 @@ class Parser
                         }
                 }
                 return $stripped;
-       }       
+       }
  
-       # Strips <nowiki>, <pre> and <math>
+       # Strips and renders <nowiki>, <pre>, <math>, <hiero>
+       # If $render is set, performs necessary rendering operations on plugins
         # Returns the text, and fills an array with data needed in unstrip()
         # If the $state is already a valid strip state, it adds to the state
-       #
-       function strip( $text, &$state )
+
+       # When $stripcomments is set, HTML comments <!-- like this -->
+       # will be stripped in addition to other tags. This is important
+       # for section editing, where these comments cause confusion when
+       # counting the sections in the wikisource
+       function strip( $text, &$state, $stripcomments = false )
         {
                 $render = ($this->mOutputType == OT_HTML);
-               $nowiki_content = array(); 
+               $nowiki_content = array();
                 $hiero_content = array();
                 $math_content = array();
                 $pre_content = array();
-               $item_content = array();
+               $comment_content = array();
  
                 # Replace any instances of the placeholders
                 $uniq_prefix = UNIQ_PREFIX;
@@ -173,25 +209,25 @@ class Parser
                         }
                 }
  
-               if( $GLOBALS['wgUseWikiHiero'] ){
-                       $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
-                       foreach( $hiero_content as $marker => $content ){
-                               if( $render ){
-                                       $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
-                               } else {
-                                       $hiero_content[$marker] = "<hiero>$content</hiero>";
-                               }
+               $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+               foreach( $hiero_content as $marker => $content ){
+                       if( $render && $GLOBALS['wgUseWikiHiero']){
+                               $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
+                       } else {
+                               $hiero_content[$marker] = "<hiero>$content</hiero>";
                         }
                 }
  
-               if( $this->mOptions->getUseTeX() ){
-                       $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
-                       foreach( $math_content as $marker => $content ){
-                               if( $render ){
+               $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+               foreach( $math_content as $marker => $content ){
+                       if( $render ) {
+                               if( $this->mOptions->getUseTeX() ) {
                                         $math_content[$marker] = renderMath( $content );
                                 } else {
-                                       $math_content[$marker] = "<math>$content</math>";
+                                       $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
                                 }
+                       } else {
+                               $math_content[$marker] = "<math>$content</math>";
                         }
                 }
  
@@ -203,20 +239,27 @@ class Parser
                                 $pre_content[$marker] = "<pre>$content</pre>";
                         }
                 }
-               
+               if($stripcomments) {
+                       $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
+                       foreach( $comment_content as $marker => $content ){
+                               $comment_content[$marker] = "<!--$content-->";
+                       }
+               }
+
                 # Merge state with the pre-existing state, if there is one
                 if ( $state ) {
                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
                         $state['hiero'] = $state['hiero'] + $hiero_content;
                         $state['math'] = $state['math'] + $math_content;
                         $state['pre'] = $state['pre'] + $pre_content;
+                       $state['comment'] = $state['comment'] + $comment_content;
                 } else {
-                       $state = array( 
+                       $state = array(
                           'nowiki' => $nowiki_content,
                           'hiero' => $hiero_content,
-                         'math' => $math_content, 
-                         'pre' => $pre_content, 
-                         'item' => $item_content
+                         'math' => $math_content,
+                         'pre' => $pre_content,
+                         'comment' => $comment_content
                         );
                 }
                 return $text;
@@ -226,15 +269,15 @@ class Parser
         {
                 # Must expand in reverse order, otherwise nested tags will be corrupted
                 $contentDict = end( $state );
-               for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { 
+               for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
                                 $text = str_replace( key( $contentDict ), $content, $text );
                         }
                 }
-               
+
                 return $text;
         }
-       
+
         # Add an item to the strip state
         # Returns the unique tag which must be inserted into the stripped text
         # The tag will be replaced with the original text in unstrip()
@@ -243,64 +286,65 @@ class Parser
         {
                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
                 if ( !$state ) {
-                       $state = array( 
+                       $state = array(
                           'nowiki' => array(),
                           'hiero' => array(),
                           'math' => array(),
-                         'pre' => array(),
-                         'item' => array()
+                         'pre' => array()
                         );
                 }
                 $state['item'][$rnd] = $text;
                 return $rnd;
         }
-               
+
+       # This method generates the list of subcategories and pages for a category
         function categoryMagic ()
         {
                 global $wgLang , $wgUser ;
-               if ( !$this->mOptions->getUseCategoryMagic() ) return ;
-               $id = $this->mTitle->getArticleID() ;
-               $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
-               $ti = $this->mTitle->getText() ;
-               $ti = explode ( ":" , $ti , 2 ) ;
-               if ( $cat != $ti[0] ) return "" ;
-               $r = '<br style="clear:both;"/>\n';
+               if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
  
-               $articles = array() ;
-               $parents = array () ;
-               $children = array() ;
+               $cns = Namespace::getCategory() ;
+               if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
+
+               $r = "<br style=\"clear:both;\"/>\n";
  
  
-#              $sk =& $this->mGetSkin();
                 $sk =& $wgUser->getSkin() ;
  
+               $articles = array() ;
+               $children = array() ;
                 $data = array () ;
-               $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
-               $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
-
-               $res = wfQuery ( $sql1, DB_READ ) ;
-               while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-
-               $res = wfQuery ( $sql2, DB_READ ) ;
-               while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
+               $id = $this->mTitle->getArticleID() ;
  
+               # For existing categories
+               if( $id ) {
+                       $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
+                       $res = wfQuery ( $sql, DB_READ ) ;
+                       while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
+               } else {
+                       # For non-existing categories
+                       $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
+                       $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
+                       $res = wfQuery ( $sql, DB_READ ) ;
+                       while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
+               }
  
+               # For all pages that link to this category
                 foreach ( $data AS $x )
                 {
                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
                         if ( $t != "" ) $t .= ":" ;
                         $t .= $x->cur_title ;
  
-                       $y = explode ( ":" , $t , 2 ) ;
-                       if ( count ( $y ) == 2 && $y[0] == $cat ) {
-                               array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
+                       if ( $x->cur_namespace == $cns ) {
+                               array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
                         } else {
-                               array_push ( $articles , $sk->makeLink ( $t ) ) ;
+                               array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
                         }
                 }
                 wfFreeResult ( $res ) ;
  
-               # Children
+               # Showing subcategories
                 if ( count ( $children ) > 0 )
                 {
                         asort ( $children ) ;
@@ -308,11 +352,12 @@ class Parser
                         $r .= implode ( ", " , $children ) ;
                 }
  
-               # Articles
+               # Showing pages in this category
                 if ( count ( $articles ) > 0 )
                 {
+                       $ti = $this->mTitle->getText() ;
                         asort ( $articles ) ;
-                       $h =  wfMsg( "category_header", $ti[1] );
+                       $h =  wfMsg( "category_header", $ti );
                         $r .= "<h2>{$h}</h2>\n" ;
                         $r .= implode ( ", " , $articles ) ;
                 }
@@ -342,7 +387,7 @@ class Parser
         {
                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
                 $htmlattrs = $this->getHTMLattrs() ;
-         
+
                 # Strip non-approved attributes from the tag
                 $t = preg_replace(
                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
@@ -351,7 +396,7 @@ class Parser
                 # Strip javascript "expression" from stylesheets. Brute force approach:
                 # If anythin offensive is found, all attributes of the HTML tag are dropped
  
-               if( preg_match( 
+               if( preg_match(
                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
                         wfMungeToUtf8( $t ) ) )
                 {
@@ -361,6 +406,47 @@ class Parser
                 return trim ( $t ) ;
         }
  
+       /* interface with html tidy, used if $wgUseTidy = true */
+       function tidy ( $text ) {
+               global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
+               global $wgInputEncoding, $wgOutputEncoding;
+               $cleansource = '';
+               switch(strtoupper($wgOutputEncoding)) {
+                       case 'ISO-8859-1':
+                               $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
+                               break;
+                       case 'UTF-8':
+                               $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
+                               break;
+                       default:
+                               $wgTidyOpts .= ' -raw';
+                       }
+
+               $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
+               $descriptorspec = array(
+                       0 => array("pipe", "r"),
+                       1 => array("pipe", "w"),
+                       2 => array("file", "/dev/null", "a")
+               );
+               $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
+               if (is_resource($process)) {
+                       fwrite($pipes[0], $text);
+                       fclose($pipes[0]);
+                       while (!feof($pipes[1])) {
+                               $cleansource .= fgets($pipes[1], 1024);
+                       }
+                       fclose($pipes[1]);
+                       $return_value = proc_close($process);
+               }
+               if( $cleansource == '' && $text != '') {
+                       return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
+               } else {
+                       return $cleansource;
+               }
+       }
+
         function doTableStuff ( $t )
         {
                 $t = explode ( "\n" , $t ) ;
@@ -370,7 +456,7 @@ class Parser
                         $ltr = array () ; # tr attributes
                         foreach ( $t AS $k => $x )
                         {
-                               $x = rtrim ( $x ) ;
+                               $x = trim ( $x ) ;
                                 $fc = substr ( $x , 0 , 1 ) ;
                                 if ( "{|" == substr ( $x , 0 , 2 ) )
                                 {
@@ -391,7 +477,7 @@ class Parser
                                         $t[$k] = $z ;
                                 }
                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
-                                               { 
+                                               {
                                                 $z = trim ( substr ( $x , 2 ) ) ;
                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
                                                 }*/
@@ -425,7 +511,7 @@ class Parser
                                         {
                                                 $z = "" ;
                                                 if ( $fc != "+" )
-                                               {  
+                                               {
                                                         $tra = array_pop ( $ltr ) ;
                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
                                                         array_push ( $tr , true ) ;
@@ -461,7 +547,9 @@ class Parser
                 return $t ;
         }
  
-       function internalParse( $text, $linestart, $args = array() )
+       // set isMain=false if you call from a template etc. and don't want to do stuff
+       // like TOC insertion for that content
+       function internalParse( $text, $linestart, $args = array(), $isMain=true )
         {
                 $fname = "Parser::internalParse";
                 wfProfileIn( $fname );
@@ -479,12 +567,15 @@ class Parser
                 $text = $this->replaceExternalLinks( $text );
                 $text = $this->doTokenizedParser ( $text );
                 $text = $this->doTableStuff ( $text ) ;
-               $text = $this->formatHeadings( $text );
+               $text = $this->formatHeadings( $text, $isMain );
                 $sk =& $this->mOptions->getSkin();
                 $text = $sk->transformContent( $text );
  
-               $text .= $this->categoryMagic () ;
-               
+               if ( !isset ( $this->categoryMagicDone ) ) {
+                  $text .= $this->categoryMagic () ;
+                  $this->categoryMagicDone = true ;
+                  }
+
                 wfProfileOut( $fname );
                 return $text;
         }
@@ -518,18 +609,18 @@ class Parser
                 wfProfileOut( $fname );
                 return $text;
         }
-       
+
         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
         {
                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
-               
-               # this is  the list of separators that should be ignored if they 
+
+               # this is  the list of separators that should be ignored if they
                 # are the last character of an URL but that should be included
                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
                 # in this case, the last comma should not become part of the URL,
                 # but in "www.foo.com/123,2342,32.htm" it should.
-               $sep = ",;\.:";   
+               $sep = ",;\.:";
                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
                 $images = "gif|png|jpg|jpeg";
  
@@ -538,7 +629,7 @@ class Parser
                 # that the content of the string should be inserted there).
                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
                   "((?i){$images})([^{$uc}]|$)/";
-                 
+
                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
                 $sk =& $this->mOptions->getSkin();
  
@@ -568,7 +659,7 @@ class Parser
                         } else if ( preg_match( $e2, $line, $m ) ) {
                                 $link = "{$protocol}:{$m[1]}";
                                 $text = $m[2];
-                               $trail = $m[3];                 
+                               $trail = $m[3];
                         } else {
                                 $s .= "[{$protocol}:" . $line;
                                 continue;
@@ -622,7 +713,7 @@ class Parser
                 }
                 return $s;
         }
-       
+
         /* private */ function handle5Quotes( &$state, $token )
         {
                 $s = "";
@@ -651,16 +742,17 @@ class Parser
         /* private */ function doTokenizedParser( $str )
         {
                 global $wgLang; # for language specific parser hook
+               global $wgUploadDirectory, $wgUseTimeline;
  
                 $tokenizer=Tokenizer::newFromString( $str );
                 $tokenStack = array();
-               
+
                 $s="";
                 $state["em"]      = FALSE;
                 $state["strong"]  = FALSE;
                 $tagIsOpen = FALSE;
                 $threeopen = false;
-               
+
                 # The tokenizer splits the text into tokens and returns them one by one.
                 # Every call to the tokenizer returns a new token.
                 while ( $token = $tokenizer->nextToken() )
@@ -671,6 +763,13 @@ class Parser
                                         # simple text with no further markup
                                         $txt = $token["text"];
                                         break;
+                               case "blank":
+                                       # Text that contains blanks that have to be converted to
+                                       # non-breakable spaces for French.
+                                       # U+202F NARROW NO-BREAK SPACE might be a better choice, but
+                                       # browser support for Unicode spacing is poor.
+                                       $txt = str_replace( " ", "&nbsp;", $token["text"] );
+                                       break;
                                 case "[[[":
                                         # remember the tag opened with 3 [
                                         $threeopen = true;
@@ -681,13 +780,13 @@ class Parser
                                         array_push( $tokenStack, $token );
                                         $txt="";
                                         break;
-                                       
+
                                 case "]]]":
                                 case "]]":
                                         # link close tag.
                                         # get text from stack, glue it together, and call the code to handle a
                                         # link
-                                       
+
                                         if ( count( $tokenStack ) == 0 )
                                         {
                                                 # stack empty. Found a ]] without an opening [[
@@ -702,16 +801,16 @@ class Parser
                                                         }
                                                         $lastToken = array_pop( $tokenStack );
                                                 }
-                                               
+
                                                 $txt = $linkText ."]]";
-                                               
+
                                                 if( isset( $lastToken["text"] ) ) {
                                                         $prefix = $lastToken["text"];
                                                 } else {
                                                         $prefix = "";
                                                 }
                                                 $nextToken = $tokenizer->previewToken();
-                                               if ( $nextToken["type"] == "text" ) 
+                                               if ( $nextToken["type"] == "text" )
                                                 {
                                                         # Preview just looks at it. Now we have to fetch it.
                                                         $nextToken = $tokenizer->nextToken();
@@ -719,13 +818,13 @@ class Parser
                                                 }
                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
  
-                                               # did the tag start with 3 [ ?                                          
+                                               # did the tag start with 3 [ ?
                                                 if($threeopen) {
                                                         # show the first as text
                                                         $txt = "[".$txt;
                                                         $threeopen=false;
                                                 }
-                               
+
                                         }
                                         $tagIsOpen = (count( $tokenStack ) != 0);
                                         break;
@@ -760,6 +859,15 @@ class Parser
                                                 $txt = $this->doMagicISBN( $tokenizer );
                                         }
                                         break;
+                               case "<timeline>":
+                                       if ( $wgUseTimeline && 
+                                            "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
+                                       {
+                                               $txt = renderTimeline( $timelinesrc );
+                                       } else {
+                                               $txt=$token["text"];
+                                       }
+                                       break;
                                 default:
                                         # Call language specific Hook.
                                         $txt = $wgLang->processToken( $token, $tokenStack );
@@ -791,7 +899,7 @@ class Parser
                                         $txt = $lastToken["text"] . $txt;
                                 } else {
                                         $txt = $lastToken["type"] . $txt;
-                               }       
+                               }
                         }
                         $s .= $txt;
                 }
@@ -818,7 +926,7 @@ class Parser
                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
-               
+
  
                 # Special and Media are pseudo-namespaces; no pages actually exist in them
                 static $image = FALSE;
@@ -828,21 +936,21 @@ class Parser
                 if ( !$image ) { $image = Namespace::getImage(); }
                 if ( !$special ) { $special = Namespace::getSpecial(); }
                 if ( !$media ) { $media = Namespace::getMedia(); }
-               if ( !$category ) { $category = wfMsg ( "category" ) ; }
-               
+               if ( !$category ) { $category = Namespace::getCategory(); }
+
                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
  
                 wfProfileOut( "$fname-setup" );
                 $s = "";
-               
+
                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
                         $text = $m[2];
-                       $trail = $m[3];                         
+                       $trail = $m[3];
                 } else { # Invalid form; output directly
                         $s .= $prefix . "[[" . $line ;
                         return $s;
                 }
-               
+
                 /* Valid link forms:
                 Foobar -- normal
                 :Foobar -- override special treatment of prefix (images, language links)
@@ -853,7 +961,7 @@ class Parser
                 $noforce = ($c != ":");
                 if( $c == "/" ) { # subpage
                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
-                               $m[1]=substr($m[1],1,strlen($m[1])-2); 
+                               $m[1]=substr($m[1],1,strlen($m[1])-2);
                                 $noslash=$m[1];
                         } else {
                                 $noslash=substr($m[1],1);
@@ -861,7 +969,7 @@ class Parser
                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
                                 if( "" == $text ) {
-                                       $text= $m[1]; 
+                                       $text= $m[1];
                                 } # this might be changed for ugliness reasons
                         } else {
                                 $link = $noslash; # no subpage allowed, use standard link
@@ -884,6 +992,7 @@ class Parser
                 if( $noforce ) {
                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
+                               $s .= $prefix . $trail ;
                                 return (trim($s) == '')? '': $s;
                         }
                         if( $ns == $image ) {
@@ -891,30 +1000,22 @@ class Parser
                                 $wgLinkCache->addImageLinkObj( $nt );
                                 return $s;
                         }
+                       if ( $ns == $category ) {
+                               $t = $nt->getText() ;
+                               $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
+                               $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
+                               $this->mOutput->mCategoryLinks[] = $t ;
+                               $s .= $prefix . $trail ;
+                               return $s ;
+                       }
                 }
                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
                     ( strpos( $link, "#" ) == FALSE ) ) {
-                       $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
+                       # Self-links are handled specially; generally de-link and change to bold.
+                       $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
                         return $s;
                 }
  
-               # Category feature
-               $catns = strtoupper ( $nt->getDBkey () ) ;
-               $catns = explode ( ":" , $catns ) ;
-               if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
-               else $catns = "" ;
-               if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
-                       $t = explode ( ":" , $nt->getText() ) ;
-                       array_shift ( $t ) ;
-                       $t = implode ( ":" , $t ) ;
-                       $t = $wgLang->ucFirst ( $t ) ;
-                       $nnt = Title::newFromText ( $category.":".$t ) ;
-                       $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
-                       $this->mOutput->mCategoryLinks[] = $t ;
-                       $s .= $prefix . $trail ;
-                       return $s ;
-               }
-
                 if( $ns == $media ) {
                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
                         $wgLinkCache->addImageLinkObj( $nt );
@@ -937,6 +1038,7 @@ class Parser
                 if ( '' != $this->mLastSection ) {
                         $result = "</" . $this->mLastSection  . ">\n";
                 }
+               $this->mInPre = false;
                 $this->mLastSection = "";
                 return $result;
         }
@@ -1006,79 +1108,106 @@ class Parser
                 return $text."\n";
         }
  
-       /* private */ function doBlockLevels( $text, $linestart )
-       {
+       /* private */ function doBlockLevels( $text, $linestart ) {
                 $fname = "Parser::doBlockLevels";
                 wfProfileIn( $fname );
+               
                 # Parsing through the text line by line.  The main thing
                 # happening here is handling of block-level elements p, pre,
                 # and making lists from lines starting with * # : etc.
                 #
-               $a = explode( "\n", $text );
+               $textLines = explode( "\n", $text );
  
-               $lastPref = $text = $lastLine = '';
+               $lastPrefix = $output = $lastLine = '';
                 $this->mDTopen = $inBlockElem = false;
-               $npl = 0;
-               $pstack = false;
-
-               if ( ! $linestart ) { $text .= array_shift( $a ); }
-               foreach ( $a as $t ) {
-                       $oLine = $t;
-                       $opl = strlen( $lastPref );
-                       $npl = strspn( $t, "*#:;" );
-                       $pref = substr( $t, 0, $npl );
-                       $pref2 = str_replace( ";", ":", $pref );
-                       $t = substr( $t, $npl );
-                       // list generation
-                       if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
-                               $text .= $this->nextItem( substr( $pref, -1 ) );
-                               if ( $pstack ) { $pstack = false; }
+               $prefixLength = 0;
+               $paragraphStack = false;
+
+               if ( !$linestart ) {
+                       $output .= array_shift( $textLines );
+               }
+               foreach ( $textLines as $oLine ) {
+                       $lastPrefixLength = strlen( $lastPrefix );
+                       $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
+                       $preOpenMatch = preg_match("/<pre/i", $oLine );
+                       if (!$this->mInPre) {
+                               $this->mInPre = !empty($preOpenMatch);
+                       }
+                       if ( !$this->mInPre ) {
+                               # Multiple prefixes may abut each other for nested lists.
+                               $prefixLength = strspn( $oLine, "*#:;" );
+                               $pref = substr( $oLine, 0, $prefixLength );
+                               
+                               # eh?
+                               $pref2 = str_replace( ";", ":", $pref );
+                               $t = substr( $oLine, $prefixLength );
+                       } else {
+                               # Don't interpret any other prefixes in preformatted text
+                               $prefixLength = 0;
+                               $pref = $pref2 = '';
+                               $t = $oLine;
+                       }
+
+                       # List generation
+                       if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+                               # Same as the last item, so no need to deal with nesting or opening stuff
+                               $output .= $this->nextItem( substr( $pref, -1 ) );
+                               $paragraphStack = false;
  
                                 if ( ";" == substr( $pref, -1 ) ) {
-                                       $cpos = strpos( $t, ":" );
-                                       if ( false !== $cpos ) {
-                                               $term = substr( $t, 0, $cpos );
-                                               $text .= $term . $this->nextItem( ":" );
-                                               $t = substr( $t, $cpos + 1 );
+                                       # The one nasty exception: definition lists work like this:
+                                       # ; title : definition text
+                                       # So we check for : in the remainder text to split up the
+                                       # title and definition, without b0rking links.
+                                       # FIXME: This is not foolproof. Something better in Tokenizer might help.
+                                       if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
+                                               $term = $match[1];
+                                               $output .= $term . $this->nextItem( ":" );
+                                               $t = $match[2];
                                         }
                                 }
-                       } else if (0 != $npl || 0 != $opl) {
-                               $cpl = $this->getCommon( $pref, $lastPref );
-                               if ( $pstack ) { $pstack = false; }
-
-                               while ( $cpl < $opl ) {
-                                       $text .= $this->closeList( $lastPref{$opl-1} );
-                                       --$opl;
+                       } elseif( $prefixLength || $lastPrefixLength ) {
+                               # Either open or close a level...
+                               $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+                               $paragraphStack = false;
+
+                               while( $commonPrefixLength < $lastPrefixLength ) {
+                                       $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+                                       --$lastPrefixLength;
                                 }
-                               if ( $npl <= $cpl && $cpl > 0 ) {
-                                       $text .= $this->nextItem( $pref{$cpl-1} );
+                               if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
+                                       $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
                                 }
-                               while ( $npl > $cpl ) {
-                                       $char = substr( $pref, $cpl, 1 );
-                                       $text .= $this->openList( $char );
+                               while ( $prefixLength > $commonPrefixLength ) {
+                                       $char = substr( $pref, $commonPrefixLength, 1 );
+                                       $output .= $this->openList( $char );
  
                                         if ( ";" == $char ) {
-                                               $cpos = strpos( $t, ":" );
-                                               if ( ! ( false === $cpos ) ) {
-                                                       $term = substr( $t, 0, $cpos );
-                                                       $text .= $term . $this->nextItem( ":" );
-                                                       $t = substr( $t, $cpos + 1 );
+                                               # FIXME: This is dupe of code above
+                                               if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
+                                                       $term = $match[1];
+                                                       $output .= $term . $this->nextItem( ":" );
+                                                       $t = $match[2];
                                                 }
                                         }
-                                       ++$cpl;
+                                       ++$commonPrefixLength;
                                 }
-                               $lastPref = $pref2;
+                               $lastPrefix = $pref2;
                         }
-                       if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
+                       if( 0 == $prefixLength ) {
+                               # No prefix (not in list)--go to paragraph mode
                                 $uniq_prefix = UNIQ_PREFIX;
                                 // XXX: use a stack for nestable elements like span, table and div
                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
-                               $closematch = preg_match( 
+                               $closematch = preg_match(
                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
                                 if ( $openmatch or $closematch ) {
-                                       if ( $pstack ) { $pstack = false; }
-                                       $text .= $this->closeParagraph();
+                                       $paragraphStack = false;
+                                       $output .= $this->closeParagraph();
+                                       if($preOpenMatch and !$preCloseMatch) {
+                                               $this->mInPre = true;   
+                                       }
                                         if ( $closematch  ) {
                                                 $inBlockElem = false;
                                         } else {
@@ -1088,54 +1217,54 @@ class Parser
                                         if ( " " == $t{0} ) {
                                                 // pre
                                                 if ($this->mLastSection != 'pre') {
-                                                       $pstack = false;
-                                                       $text .= $this->closeParagraph().'<pre>';
+                                                       $paragraphStack = false;
+                                                       $output .= $this->closeParagraph().'<pre>';
                                                         $this->mLastSection = 'pre';
                                                 }
-                                       } else { 
+                                       } else {
                                                 // paragraph
                                                 if ( '' == trim($t) ) {
-                                                       if ( $pstack ) {
-                                                               $text .= $pstack.'<br/>';
-                                                               $pstack = false;
+                                                       if ( $paragraphStack ) {
+                                                               $output .= $paragraphStack.'<br/>';
+                                                               $paragraphStack = false;
                                                                 $this->mLastSection = 'p';
                                                         } else {
                                                                 if ($this->mLastSection != 'p' ) {
-                                                                       $text .= $this->closeParagraph();
+                                                                       $output .= $this->closeParagraph();
                                                                         $this->mLastSection = '';
-                                                                       $pstack = "<p>";
+                                                                       $paragraphStack = "<p>";
                                                                 } else {
-                                                                       $pstack = '</p><p>';
+                                                                       $paragraphStack = '</p><p>';
                                                                 }
                                                         }
                                                 } else {
-                                                       if ( $pstack ) {
-                                                               $text .= $pstack;
-                                                               $pstack = false;
+                                                       if ( $paragraphStack ) {
+                                                               $output .= $paragraphStack;
+                                                               $paragraphStack = false;
                                                                 $this->mLastSection = 'p';
                                                         } else if ($this->mLastSection != 'p') {
-                                                               $text .= $this->closeParagraph().'<p>';
+                                                               $output .= $this->closeParagraph().'<p>';
                                                                 $this->mLastSection = 'p';
                                                         }
                                                 }
                                         }
-                               } 
+                               }
                         }
-                       if ($pstack === false) {
-                               $text .= $t."\n";
+                       if ($paragraphStack === false) {
+                               $output .= $t."\n";
                         }
                 }
-               while ( $npl ) {
-                       $text .= $this->closeList( $pref2{$npl-1} );
-                       --$npl;
+               while ( $prefixLength ) {
+                       $output .= $this->closeList( $pref2{$prefixLength-1} );
+                       --$prefixLength;
                 }
                 if ( "" != $this->mLastSection ) {
-                       $text .= "</" . $this->mLastSection . ">";
+                       $output .= "</" . $this->mLastSection . ">";
                         $this->mLastSection = "";
                 }
-               
+
                 wfProfileOut( $fname );
-               return $text;
+               return $output;
         }
  
         function getVariableValue( $index ) {
@@ -1150,6 +1279,11 @@ class Parser
                                 return $wgLang->getMonthNameGen( date("n") );
                         case MAG_CURRENTDAY:
                                 return date("j");
+                       case MAG_PAGENAME:
+                               return $this->mTitle->getText();
+                       case MAG_NAMESPACE:
+                               # return Namespace::getCanonicalName($this->mTitle->getNamespace());
+                               return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
                         case MAG_CURRENTDAYNAME:
                                 return $wgLang->getWeekdayName( date("w")+1 );
                         case MAG_CURRENTYEAR:
@@ -1183,23 +1317,23 @@ class Parser
  
                 $fname = "Parser::replaceVariables";
                 wfProfileIn( $fname );
-               
+
                 $bail = false;
                 if ( !$this->mVariables ) {
                         $this->initialiseVariables();
                 }
                 $titleChars = Title::legalChars();
                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
-               
+
                 # This function is called recursively. To keep track of arguments we need a stack:
                 array_push( $this->mArgStack, $args );
  
                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
                 $GLOBALS['wgCurParser'] =& $this;
                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
-               
+
                 array_pop( $this->mArgStack );
-               
+
                 return $text;
         }
  
@@ -1210,11 +1344,11 @@ class Parser
                 $found = false;
                 $nowiki = false;
                 $title = NULL;
-               
+
                 # $newline is an optional newline character before the braces
                 # $part1 is the bit before the first |, and must contain only title characters
                 # $args is a list of arguments, starting from index 0, not including $part1
-               
+
                 $newline = $matches[1];
                 $part1 = $matches[2];
                 # If the third subpattern matched anything, it will start with |
@@ -1239,7 +1373,7 @@ class Parser
                         $text = $matches[0];
                         $found = true;
                 }
-               
+
                 # MSG, MSGNW and INT
                 if ( !$found ) {
                         # Check for MSGNW:
@@ -1251,7 +1385,7 @@ class Parser
                                 $mwMsg =& MagicWord::get( MAG_MSG );
                                 $mwMsg->matchStartAndRemove( $part1 );
                         }
-                       
+
                         # Check if it is an internal message
                         $mwInt =& MagicWord::get( MAG_INT );
                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
@@ -1261,7 +1395,7 @@ class Parser
                                 }
                         }
                 }
-       
+
                 # NS
                 if ( !$found ) {
                         # Check for NS: (namespace expansion)
@@ -1279,7 +1413,7 @@ class Parser
                                 }
                         }
                 }
-               
+
                 # LOCALURL and LOCALURLE
                 if ( !$found ) {
                         $mwLocal = MagicWord::get( MAG_LOCALURL );
@@ -1292,7 +1426,7 @@ class Parser
                         } else {
                                 $func = '';
                         }
-                       
+
                         if ( $func !== '' ) {
                                 $title = Title::newFromText( $part1 );
                                 if ( !is_null( $title ) ) {
@@ -1305,14 +1439,14 @@ class Parser
                                 }
                         }
                 }
-               
+
                 # Internal variables
                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
                         $text = $this->mVariables[$part1];
                         $found = true;
                         $this->mOutput->mContainsOldMagic = true;
-               } 
-               
+               }
+
                 # Arguments input from the caller
                 $inputArgs = end( $this->mArgStack );
                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
@@ -1332,9 +1466,9 @@ class Parser
                                         if ( $articleContent !== false ) {
                                                 $found = true;
                                                 $text = $articleContent;
-                                               
-                                       } 
-                               } 
+
+                                       }
+                               }
  
                                 # If the title is valid but undisplayable, make a link to it
                                 if ( $this->mOutputType == OT_HTML && !$found ) {
@@ -1343,7 +1477,7 @@ class Parser
                                 }
                         }
                 }
-               
+
                 # Recursive parsing, escaping and link table handling
                 # Only for HTML output
                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
@@ -1372,15 +1506,16 @@ class Parser
                         if ( !is_null( $title ) ) {
                                 $wgLinkCache->suspend();
                         }
-                       
+
                         # Run full parser on the included text
                         $text = $this->strip( $text, $this->mStripState );
-                       $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
-                       
-                       # Add the result to the strip state for re-inclusion after 
+                       $text = $this->internalParse( $text, (bool)$newline, $assocArgs, false );
+                       if(!empty($newline)) $text = "\n".$text;
+
+                       # Add the result to the strip state for re-inclusion after
                         # the rest of the processing
                         $text = $this->insertStripItem( $text, $this->mStripState );
-                       
+
                         # Resume the link cache and register the inclusion as a link
                         if ( !is_null( $title ) ) {
                                 $wgLinkCache->resume();
@@ -1391,7 +1526,7 @@ class Parser
                 if ( !$found ) {
                         return $matches[0];
                 } else {
-                       return $newline . $text;
+                       return $text;
                 }
         }
  
@@ -1412,98 +1547,123 @@ class Parser
         # Cleans up HTML, removes dangerous tags and attributes
         /* private */ function removeHTMLtags( $text )
         {
+               global $wgUseTidy, $wgUserHtml;
                 $fname = "Parser::removeHTMLtags";
                 wfProfileIn( $fname );
-               $htmlpairs = array( # Tags that must be closed
-                       "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
-                       "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
-                       "strike", "strong", "tt", "var", "div", "center",
-                       "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
-                       "ruby", "rt" , "rb" , "rp", "p"
-               );
-               $htmlsingle = array(
-                       "br", "hr", "li", "dt", "dd"
-               );
-               $htmlnest = array( # Tags that can be nested--??
-                       "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
-                       "dl", "font", "big", "small", "sub", "sup"
-               );
-               $tabletags = array( # Can only appear inside table
-                       "td", "th", "tr"
-               );
+               
+               if( $wgUserHtml ) {
+                       $htmlpairs = array( # Tags that must be closed
+                               "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
+                               "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
+                               "strike", "strong", "tt", "var", "div", "center",
+                               "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
+                               "ruby", "rt" , "rb" , "rp", "p"
+                       );
+                       $htmlsingle = array(
+                               "br", "hr", "li", "dt", "dd"
+                       );
+                       $htmlnest = array( # Tags that can be nested--??
+                               "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
+                               "dl", "font", "big", "small", "sub", "sup"
+                       );
+                       $tabletags = array( # Can only appear inside table
+                               "td", "th", "tr"
+                       );
+               } else {
+                       $htmlpairs = array();
+                       $htmlsingle = array();
+                       $htmlnest = array();
+                       $tabletags = array();
+               }
  
                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
  
-                $htmlattrs = $this->getHTMLattrs () ;
+               $htmlattrs = $this->getHTMLattrs () ;
  
                 # Remove HTML comments
-               $text = preg_replace( "/<!--.*-->/sU", "", $text );
+               $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
  
                 $bits = explode( "<", $text );
                 $text = array_shift( $bits );
-               $tagstack = array(); $tablestack = array();
-
-               foreach ( $bits as $x ) {
-                       $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
-                       preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
-                         $x, $regs );
-                       list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
-                       error_reporting( $prev );
-
-                       $badtag = 0 ;
-                       if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
-                               # Check our stack
-                               if ( $slash ) {
-                                       # Closing a tag...
-                                       if ( ! in_array( $t, $htmlsingle ) &&
-                                         ( $ot = array_pop( $tagstack ) ) != $t ) {
-                                               array_push( $tagstack, $ot );
-                                               $badtag = 1;
+               if(!$wgUseTidy) {
+                       $tagstack = array(); $tablestack = array();
+                       foreach ( $bits as $x ) {
+                               $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
+                               preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+                               $x, $regs );
+                               list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+                               error_reporting( $prev );
+
+                               $badtag = 0 ;
+                               if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+                                       # Check our stack
+                                       if ( $slash ) {
+                                               # Closing a tag...
+                                               if ( ! in_array( $t, $htmlsingle ) &&
+                                               ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
+                                                       if(!empty($ot)) array_push( $tagstack, $ot );
+                                                       $badtag = 1;
+                                               } else {
+                                                       if ( $t == "table" ) {
+                                                               $tagstack = array_pop( $tablestack );
+                                                       }
+                                                       $newparams = "";
+                                               }
                                         } else {
-                                               if ( $t == "table" ) {
-                                                       $tagstack = array_pop( $tablestack );
+                                               # Keep track for later
+                                               if ( in_array( $t, $tabletags ) &&
+                                               ! in_array( "table", $tagstack ) ) {
+                                                       $badtag = 1;
+                                               } else if ( in_array( $t, $tagstack ) &&
+                                               ! in_array ( $t , $htmlnest ) ) {
+                                                       $badtag = 1 ;
+                                               } else if ( ! in_array( $t, $htmlsingle ) ) {
+                                                       if ( $t == "table" ) {
+                                                               array_push( $tablestack, $tagstack );
+                                                               $tagstack = array();
+                                                       }
+                                                       array_push( $tagstack, $t );
                                                 }
-                                               $newparams = "";
+                                               # Strip non-approved attributes from the tag
+                                               $newparams = $this->fixTagAttributes($params);
+
                                         }
-                               } else {
-                                       # Keep track for later
-                                       if ( in_array( $t, $tabletags ) &&
-                                         ! in_array( "table", $tagstack ) ) {
-                                               $badtag = 1;
-                                       } else if ( in_array( $t, $tagstack ) &&
-                                         ! in_array ( $t , $htmlnest ) ) {
-                                               $badtag = 1 ;
-                                       } else if ( ! in_array( $t, $htmlsingle ) ) {
-                                               if ( $t == "table" ) {
-                                                       array_push( $tablestack, $tagstack );
-                                                       $tagstack = array();
-                                               }
-                                               array_push( $tagstack, $t );
+                                       if ( ! $badtag ) {
+                                               $rest = str_replace( ">", "&gt;", $rest );
+                                               $text .= "<$slash$t $newparams$brace$rest";
+                                               continue;
                                         }
-                                       # Strip non-approved attributes from the tag
-                                       $newparams = $this->fixTagAttributes($params);
-                                               
                                 }
-                               if ( ! $badtag ) {
+                               $text .= "&lt;" . str_replace( ">", "&gt;", $x);
+                       }
+                       # Close off any remaining tags
+                       while ( $t = array_pop( $tagstack ) ) {
+                               $text .= "</$t>\n";
+                               if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+                       }
+               } else {
+                       # this might be possible using tidy itself
+                       foreach ( $bits as $x ) {
+                               preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+                               $x, $regs );
+                               @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+                               if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+                                       $newparams = $this->fixTagAttributes($params);
                                         $rest = str_replace( ">", "&gt;", $rest );
                                         $text .= "<$slash$t $newparams$brace$rest";
-                                       continue;
+                               } else {
+                                       $text .= "&lt;" . str_replace( ">", "&gt;", $x);
                                 }
-                       }
-                       $text .= "&lt;" . str_replace( ">", "&gt;", $x);
-               }
-               # Close off any remaining tags
-               while ( $t = array_pop( $tagstack ) ) {
-                       $text .= "</$t>\n";
-                       if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+                       }       
                 }
                 wfProfileOut( $fname );
                 return $text;
         }
  
-/* 
- * 
+
+/*
+ *
   * This function accomplishes several tasks:
   * 1) Auto-number headings if that option is enabled
   * 2) Add an [edit] link to sections for logged in users who have enabled the option
@@ -1512,11 +1672,15 @@ class Parser
   *
   * It loops through all headlines, collects the necessary data, then splits up the
   * string and re-inserts the newly formatted headlines.
- * 
+ *
   */
  
-       /* private */ function formatHeadings( $text )
+       /* private */ function formatHeadings( $text, $isMain=true )
         {
+               global $wgInputEncoding,$wgRequest,$wgOut;
+               
+               $startsection=$wgRequest->getVal('section');    
+               if($startsection) { $startsection--;}
                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
                 $doShowToc = $this->mOptions->getShowToc();
                 if( !$this->mTitle->userCanEdit() ) {
@@ -1532,7 +1696,7 @@ class Parser
                 if( $esw->matchAndRemove( $text ) ) {
                         $showEditLink = 0;
                 }
-               # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, 
+               # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
                 # do not add TOC
                 $mw =& MagicWord::get( MAG_NOTOC );
                 if( $mw->matchAndRemove( $text ) ) {
@@ -1583,12 +1747,12 @@ class Parser
                                 $prevlevel = $level;
                         }
                         $level = $matches[1][$headlineCount];
-                       if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { 
+                       if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
                                 # reset when we enter a new level
                                 $sublevelCount[$level] = 0;
                                 $toc .= $sk->tocIndent( $level - $prevlevel );
                                 $toclevel += $level - $prevlevel;
-                       } 
+                       }
                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
                                 # reset when we step back a level
                                 $sublevelCount[$level+1]=0;
@@ -1605,7 +1769,7 @@ class Parser
                                                         $numbering .= ".";
                                                 }
                                                 $numbering .= $sublevelCount[$i];
-                                               $dot = 1;                                       
+                                               $dot = 1;
                                         }
                                 }
                         }
@@ -1613,29 +1777,29 @@ class Parser
                         # The canonized header is a version of the header text safe to use for links
                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
-                       
+
                         # strip out HTML
                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
-                       $tocline = trim( $canonized_headline ); 
-                       $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
+                       $tocline = trim( $canonized_headline );
+                       $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
                         $refer[$headlineCount] = $canonized_headline;
-                       
+
                         # count how many in assoc. array so we can track dupes in anchors
                         @$refers[$canonized_headline]++;
                         $refcount[$headlineCount]=$refers[$canonized_headline];
  
                         # Prepend the number to the heading text
-                       
+
                         if( $doNumberHeadings || $doShowToc ) {
                                 $tocline = $numbering . " " . $tocline;
-                               
+
                                 # Don't number the heading if it is the only one (looks silly)
                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
                                         # the two are different if the line contains a link
                                         $headline=$numbering . " " . $headline;
                                 }
                         }
-                       
+
                         # Create the anchor for linking from the TOC to the section
                         $anchor = $canonized_headline;
                         if($refcount[$headlineCount] > 1 ) {
@@ -1648,19 +1812,19 @@ class Parser
                                 if ( empty( $head[$headlineCount] ) ) {
                                         $head[$headlineCount] = "";
                                 }
-                               $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
+                               $head[$headlineCount] .= $sk->editSectionLink($startsection+$headlineCount+1);
                         }
-                               
+
                         # Add the edit section span
                         if( $rightClickHack ) {
-                               $headline = $sk->editSectionScript($headlineCount+1,$headline); 
+                               $headline = $sk->editSectionScript($startsection+$headlineCount+1,$headline);
                         }
  
                         # give headline the correct <h#> tag
                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
-                       
+
                         $headlineCount++;
-               }               
+               }
  
                 if( $doShowToc ) {
                         $toclines = $headlineCount;
@@ -1669,13 +1833,13 @@ class Parser
                 }
  
                 # split up and insert constructed headlines
-               
+
                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
                 $i = 0;
  
                 foreach( $blocks as $block ) {
                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
-                           # This is the [edit] link that appears for the top block of text when 
+                           # This is the [edit] link that appears for the top block of text when
                                 # section editing is enabled
  
                                 # Disabled because it broke block formatting
@@ -1687,16 +1851,133 @@ class Parser
                         # Top anchor now in skin
                                 $full = $full.$toc;
                         }
+                       
+                       # If a page is viewed in collapsed mode, a TOC generated
+                       # from the wikisource is stored in the title object.
+                       # This TOC is now fetched and inserted here if it exists.
+                       $collapsedtoc=$wgOut->getToc();
+                       if ($collapsedtoc && !$i && $isMain) {
+                               $full = $full.$collapsedtoc;            
+                       }                       
  
                         if( !empty( $head[$i] ) ) {
                                 $full .= $head[$i];
                         }
                         $i++;
                 }
-               
+
                 return $full;
         }
  
+       /* Generates a HTML-formatted table of contents which links to individual sections 
+          from the wikisource. Used for collapsing long pages.
+        */        
+       /* static */ function getTocFromSource( $text ) {               
+               
+               global $wgUser,$wgInputEncoding,$wgTitle,$wgOut,$wgParser;              
+               $sk = $wgUser->getSkin();               
+               
+               $striparray=array();
+               $oldtype=$wgParser->mOutputType;
+               $wgParser->mOutputType=OT_WIKI;
+               $text=$wgParser->strip($text, $striparray, true);
+               $wgParser->mOutputType=$oldtype;        
+               
+               $numMatches = preg_match_all( "/^(=+)(.*?)=+|^<h([1-6]).*?>(.*?)<\/h[1-6].*?>/mi",$text,$matches);
+               
+               # no headings: text cannot be collapsed
+               if( $numMatches == 0 ) {
+                       return "";
+               }
+               
+               # We combine the headlines into a bundle and convert them to HTML
+               # in order to make stripping out the wikicrap easier.
+               $combined=implode("!@@@!",$matches[2]);
+               $myout=$wgParser->parse($combined,$wgTitle,$wgOut->mParserOptions);                     
+               $combined_html=$myout->getText();               
+               $headlines=array();
+               $headlines=explode("!@@@!",$combined_html);
+               
+               # headline counter
+               $headlineCount = 0;             
+               $toclevel = 0;
+               $toc = "";
+               $full = "";
+               $head = array();
+               $sublevelCount = array();
+               $level = 0;
+               $prevlevel = 0;
+               foreach( $headlines as $headline ) {                    
+                       $headline=trim($headline);
+                       $numbering = "";
+                       if( $level ) {
+                               $prevlevel = $level;
+                       }
+                       $level = $matches[1][$headlineCount];
+                       
+                       # wikisource headings need to be converted into numbers
+                       # =foo= equals <h1>foo</h1>, ==foo== equals <h2>foo</h2> etc.
+                       if(strpos($level,"=")!==false) {
+                               $level=strlen($level);                  
+                       }
+                       
+                       if(  $prevlevel && $level > $prevlevel ) {
+                               # reset when we enter a new level
+                               $sublevelCount[$level] = 0;
+                               $toc .= $sk->tocIndent( $level - $prevlevel );
+                               $toclevel += $level - $prevlevel;
+                       }
+                       if( $level < $prevlevel ) {
+                               # reset when we step back a level
+                               $sublevelCount[$level+1]=0;
+                               $toc .= $sk->tocUnindent( $prevlevel - $level );
+                               $toclevel -= $prevlevel - $level;
+                       }
+                       # count number of headlines for each level
+                       @$sublevelCount[$level]++;                      
+                       $dot = 0;
+                       for( $i = 1; $i <= $level; $i++ ) {
+                               if( !empty( $sublevelCount[$i] ) ) {
+                                       if( $dot ) {
+                                               $numbering .= ".";
+                                       }
+                                       $numbering .= $sublevelCount[$i];
+                                       $dot = 1;
+                               }
+                       }
+                       
+
+                       # The canonized header is a version of the header text safe to use for links
+                       # Avoid insertion of weird stuff like <math> by expanding the relevant sections
+                       $state=array();
+                       $canonized_headline = Parser::unstrip( $headline, $state);                      
+                       
+                       # strip out HTML
+                       $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
+                       $tocline = trim( $canonized_headline );
+                       $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
+                       $refer[$headlineCount] = $canonized_headline;
+
+                       # count how many in assoc. array so we can track dupes in anchors
+                       @$refers[$canonized_headline]++;
+                       $refcount[$headlineCount]=$refers[$canonized_headline];
+                       $tocline = $numbering . " " . $tocline;
+
+                       # Create the anchor for linking from the TOC to the section
+                       $anchor = trim($canonized_headline);
+                       
+                       if($refcount[$headlineCount] > 1 ) {
+                               $anchor .= "_" . $refcount[$headlineCount];
+                       }                       
+                       $headlineCount++;
+                       $toc .= $sk->tocLine($anchor,$tocline,$toclevel,$headlineCount);
+               }
+               $toclines = $headlineCount;
+               $toc .= $sk->tocUnindent( $toclevel );
+               $toc = $sk->tocTable( $toc );
+               return $toc;
+       
+       }
         /* private */ function doMagicISBN( &$tokenizer )
         {
                 global $wgLang;
@@ -1727,7 +2008,7 @@ class Parser
                         }
                         $num = str_replace( "-", "", $isbn );
                         $num = str_replace( " ", "", $num );
-               
+
                         if ( "" == $num ) {
                                 $text = "ISBN $blank$x";
                         } else {
@@ -1770,7 +2051,7 @@ class Parser
                                 $rfc .= $x{0};
                                 $x = substr( $x, 1 );
                         }
-               
+
                         if ( "" == $rfc ) {
                                 $text .= "RFC $blank$x";
                         } else {
@@ -1791,11 +2072,11 @@ class Parser
                 $this->mOptions = $options;
                 $this->mTitle =& $title;
                 $this->mOutputType = OT_WIKI;
-               
+
                 if ( $clearState ) {
                         $this->clearState();
                 }
-               
+
                 $stripState = false;
                 $pairs = array(
                         "\r\n" => "\n",
@@ -1866,16 +2147,16 @@ class Parser
                 } else {
                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
                 }
-               
+
                 /*
                 $mw =& MagicWord::get( MAG_SUBST );
                 $wgCurParser = $this->fork();
                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
                 $this->merge( $wgCurParser );
                 */
-               
+
                 # Trim trailing whitespace
-               # MAG_END (__END__) tag allows for trailing 
+               # MAG_END (__END__) tag allows for trailing
                 # whitespace to be deliberately included
                 $text = rtrim( $text );
                 $mw =& MagicWord::get( MAG_END );
@@ -1886,7 +2167,7 @@ class Parser
  
         # Set up some variables which are usually set up in parse()
         # so that an external function can call some class members with confidence
-       function startExternalParse( &$title, $options, $outputType, $clearState = true ) 
+       function startExternalParse( &$title, $options, $outputType, $clearState = true )
         {
                 $this->mTitle =& $title;
                 $this->mOptions = $options;
@@ -1899,7 +2180,7 @@ class Parser
         function transformMsg( $text, $options ) {
                 global $wgTitle;
                 static $executing = false;
-               
+
                 # Guard against infinite recursion
                 if ( $executing ) {
                         return $text;
@@ -1911,7 +2192,7 @@ class Parser
                 $this->mOutputType = OT_MSG;
                 $this->clearState();
                 $text = $this->replaceVariables( $text );
-               
+
                 $executing = false;
                 return $text;
         }
@@ -1986,17 +2267,17 @@ class ParserOptions
         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
  
-       /* static */ function newFromUser( &$user ) 
+       /* static */ function newFromUser( &$user )
         {
                 $popts = new ParserOptions;
-               $popts->initialiseFromUser( &$user );
+               $popts->initialiseFromUser( $user );
                 return $popts;
         }
  
-       function initialiseFromUser( &$userInput ) 
+       function initialiseFromUser( &$userInput )
         {
                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
-               
+
                 if ( !$userInput ) {
                         $user = new User;
                         $user->setLoaded( true );