X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/modifier.php?a=blobdiff_plain;f=includes%2FParser.php;h=c120ad16cf7303aae15560dc14425897613e2fe4;hb=de18da5eebe16c0636fc140f63aeb595df3dc49e;hp=7927aa5268e0f6e5a4d6b71f8493e6ac7c2cbc62;hpb=5733d215639e574db8f8fee1ee255b87dcb71ce7;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Parser.php b/includes/Parser.php index 7927aa5268..c120ad16cf 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -6,20 +6,20 @@ if( $GLOBALS['wgUseWikiHiero'] ){ include_once('wikihiero.php'); } -# PHP Parser -# +# PHP Parser +# # Processes wiki markup # -# There are two main entry points into the Parser class: parse() and preSaveTransform(). +# There are two main entry points into the Parser class: parse() and preSaveTransform(). # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup. # -# Globals used: +# Globals used: # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser # # NOT $wgArticle, $wgUser or $wgTitle. Keep them away! # # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*, -# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*, +# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*, # $wgLocaltimezone # # * only within ParserOptions @@ -29,8 +29,8 @@ if( $GLOBALS['wgUseWikiHiero'] ){ # Variable substitution O(N^2) attack #----------------------------------------- # Without countermeasures, it would be possible to attack the parser by saving a page -# filled with a large number of inclusions of large pages. The size of the generated -# page would be proportional to the square of the input size. Hence, we limit the number +# filled with a large number of inclusions of large pages. The size of the generated +# page would be proportional to the square of the input size. Hence, we limit the number # of inclusions of any given page, thus bringing any attack back to O(N). # @@ -47,8 +47,8 @@ define( "UNIQ_PREFIX", "NaodW29"); class Parser { # Cleared with clearState(): - var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array(); - var $mVariables, $mIncludeCount, $mArgStack; + var $mOutput, $mAutonumber, $mDTopen, $mStripState = array(); + var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre; # Temporary: var $mOptions, $mTitle, $mOutputType; @@ -69,7 +69,7 @@ class Parser $this->mStripState = array(); $this->mArgStack = array(); } - + # First pass--just handle sections, pass the rest off # to internalParse() which does all the real work. # @@ -83,16 +83,30 @@ class Parser if ( $clearState ) { $this->clearState(); } - + $this->mOptions = $options; $this->mTitle =& $title; $this->mOutputType = OT_HTML; - + $stripState = NULL; $text = $this->strip( $text, $this->mStripState ); $text = $this->internalParse( $text, $linestart ); $text = $this->unstrip( $text, $this->mStripState ); - + # Clean up special characters, only run once, next-to-last before doBlockLevels + $fixtags = array( + "/
/i" => '
', + "/
/i" => '
', + "/
/i"=>'
', + "/<\\/center *>/i" => '
', + # Clean up spare ampersands; note that we probably ought to be + # more careful about named entities. + '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + + # only once and last + $text = $this->doBlockLevels( $text, $linestart ); + $this->mOutput->setText( $text ); wfProfileOut( $fname ); return $this->mOutput; @@ -103,7 +117,7 @@ class Parser return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); } - # Replaces all occurences of <$tag>content in the text + # Replaces all occurrences of <$tag>content in the text # with a random marker and returns the new text. the output parameter # $content will be an associative array filled with data on the form # $unique_marker => content. @@ -121,8 +135,8 @@ class Parser while ( "" != $text ) { $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 ); $stripped .= $p[0]; - if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { - $text = ""; + if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { + $text = ""; } else { $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 ); $marker = $rnd . sprintf("%08X", $n++); @@ -132,7 +146,7 @@ class Parser } } return $stripped; - } + } # Strips ,
 and 
 	# Returns the text, and fills an array with data needed in unstrip()
@@ -141,7 +155,7 @@ class Parser
 	function strip( $text, &$state )
 	{
 		$render = ($this->mOutputType == OT_HTML);
-		$nowiki_content = array(); 
+		$nowiki_content = array();
 		$hiero_content = array();
 		$math_content = array();
 		$pre_content = array();
@@ -190,7 +204,7 @@ class Parser
 				$pre_content[$marker] = "
$content
"; } } - + # Merge state with the pre-existing state, if there is one if ( $state ) { $state['nowiki'] = $state['nowiki'] + $nowiki_content; @@ -198,11 +212,11 @@ class Parser $state['math'] = $state['math'] + $math_content; $state['pre'] = $state['pre'] + $pre_content; } else { - $state = array( + $state = array( 'nowiki' => $nowiki_content, 'hiero' => $hiero_content, - 'math' => $math_content, - 'pre' => $pre_content, + 'math' => $math_content, + 'pre' => $pre_content, 'item' => $item_content ); } @@ -213,15 +227,15 @@ class Parser { # Must expand in reverse order, otherwise nested tags will be corrupted $contentDict = end( $state ); - for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { + for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { $text = str_replace( key( $contentDict ), $content, $text ); } } - + return $text; } - + # Add an item to the strip state # Returns the unique tag which must be inserted into the stripped text # The tag will be replaced with the original text in unstrip() @@ -230,7 +244,7 @@ class Parser { $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString(); if ( !$state ) { - $state = array( + $state = array( 'nowiki' => array(), 'hiero' => array(), 'math' => array(), @@ -241,24 +255,21 @@ class Parser $state['item'][$rnd] = $text; return $rnd; } - + function categoryMagic () { global $wgLang , $wgUser ; if ( !$this->mOptions->getUseCategoryMagic() ) return ; $id = $this->mTitle->getArticleID() ; - $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ; + $cns = Namespace::getCategory() ; + if ( $this->mTitle->getNamespace() != $cns ) return "" ; $ti = $this->mTitle->getText() ; - $ti = explode ( ":" , $ti , 2 ) ; - if ( $cat != $ti[0] ) return "" ; - $r = '
\n'; + $r = "
\n"; $articles = array() ; $parents = array () ; $children = array() ; - -# $sk =& $this->mGetSkin(); $sk =& $wgUser->getSkin() ; $data = array () ; @@ -271,16 +282,14 @@ class Parser $res = wfQuery ( $sql2, DB_READ ) ; while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; - foreach ( $data AS $x ) { $t = $wgLang->getNsText ( $x->cur_namespace ) ; if ( $t != "" ) $t .= ":" ; $t .= $x->cur_title ; - $y = explode ( ":" , $t , 2 ) ; - if ( count ( $y ) == 2 && $y[0] == $cat ) { - array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ; + if ( $x->cur_namespace == $cns ) { + array_push ( $children , $sk->makeLink ( $t ) ) ; } else { array_push ( $articles , $sk->makeLink ( $t ) ) ; } @@ -299,7 +308,7 @@ class Parser if ( count ( $articles ) > 0 ) { asort ( $articles ) ; - $h = wfMsg( "category_header", $ti[1] ); + $h = wfMsg( "category_header", $ti ); $r .= "

{$h}

\n" ; $r .= implode ( ", " , $articles ) ; } @@ -329,7 +338,7 @@ class Parser { if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-) $htmlattrs = $this->getHTMLattrs() ; - + # Strip non-approved attributes from the tag $t = preg_replace( "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e", @@ -338,7 +347,7 @@ class Parser # Strip javascript "expression" from stylesheets. Brute force approach: # If anythin offensive is found, all attributes of the HTML tag are dropped - if( preg_match( + if( preg_match( "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is", wfMungeToUtf8( $t ) ) ) { @@ -357,7 +366,7 @@ class Parser $ltr = array () ; # tr attributes foreach ( $t AS $k => $x ) { - $x = rtrim ( $x ) ; + $x = trim ( $x ) ; $fc = substr ( $x , 0 , 1 ) ; if ( "{|" == substr ( $x , 0 , 2 ) ) { @@ -378,7 +387,7 @@ class Parser $t[$k] = $z ; } /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption - { + { $z = trim ( substr ( $x , 2 ) ) ; $t[$k] = "{$z}\n" ; }*/ @@ -412,7 +421,7 @@ class Parser { $z = "" ; if ( $fc != "+" ) - { + { $tra = array_pop ( $ltr ) ; if ( !array_pop ( $tr ) ) $z = "\n" ; array_push ( $tr , true ) ; @@ -469,25 +478,11 @@ class Parser $text = $this->formatHeadings( $text ); $sk =& $this->mOptions->getSkin(); $text = $sk->transformContent( $text ); - - $fixtags = array( - "/
/i" => '
', - "/
/i" => '
', - "/
/i"=>'', - "/<\\/center *>/i" => '' - ); - $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); - // another round, but without regex - $fixtags = array( - '& ' => '&', - '&<' => '&<', - ); - $text = str_replace( array_keys($fixtags), array_values($fixtags), $text ); - - $text .= $this->categoryMagic () ; - - # needs to be called last - $text = $this->doBlockLevels( $text, $linestart ); + + if ( !isset ( $this->categoryMagicDone ) ) { + $text .= $this->categoryMagic () ; + $this->categoryMagicDone = true ; + } wfProfileOut( $fname ); return $text; @@ -522,18 +517,18 @@ class Parser wfProfileOut( $fname ); return $text; } - + /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) { $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3"; $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF"; - - # this is the list of separators that should be ignored if they + + # this is the list of separators that should be ignored if they # are the last character of an URL but that should be included # if they occur within the URL, e.g. "go to www.foo.com, where .." # in this case, the last comma should not become part of the URL, # but in "www.foo.com/123,2342,32.htm" it should. - $sep = ",;\.:"; + $sep = ",;\.:"; $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF"; $images = "gif|png|jpg|jpeg"; @@ -542,7 +537,7 @@ class Parser # that the content of the string should be inserted there). $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." . "((?i){$images})([^{$uc}]|$)/"; - + $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/"; $sk =& $this->mOptions->getSkin(); @@ -572,7 +567,7 @@ class Parser } else if ( preg_match( $e2, $line, $m ) ) { $link = "{$protocol}:{$m[1]}"; $text = $m[2]; - $trail = $m[3]; + $trail = $m[3]; } else { $s .= "[{$protocol}:" . $line; continue; @@ -626,7 +621,7 @@ class Parser } return $s; } - + /* private */ function handle5Quotes( &$state, $token ) { $s = ""; @@ -658,13 +653,13 @@ class Parser $tokenizer=Tokenizer::newFromString( $str ); $tokenStack = array(); - + $s=""; $state["em"] = FALSE; $state["strong"] = FALSE; $tagIsOpen = FALSE; $threeopen = false; - + # The tokenizer splits the text into tokens and returns them one by one. # Every call to the tokenizer returns a new token. while ( $token = $tokenizer->nextToken() ) @@ -685,13 +680,13 @@ class Parser array_push( $tokenStack, $token ); $txt=""; break; - + case "]]]": case "]]": # link close tag. # get text from stack, glue it together, and call the code to handle a # link - + if ( count( $tokenStack ) == 0 ) { # stack empty. Found a ]] without an opening [[ @@ -706,16 +701,16 @@ class Parser } $lastToken = array_pop( $tokenStack ); } - + $txt = $linkText ."]]"; - + if( isset( $lastToken["text"] ) ) { $prefix = $lastToken["text"]; } else { $prefix = ""; } $nextToken = $tokenizer->previewToken(); - if ( $nextToken["type"] == "text" ) + if ( $nextToken["type"] == "text" ) { # Preview just looks at it. Now we have to fetch it. $nextToken = $tokenizer->nextToken(); @@ -723,13 +718,13 @@ class Parser } $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix ); - # did the tag start with 3 [ ? + # did the tag start with 3 [ ? if($threeopen) { # show the first as text $txt = "[".$txt; $threeopen=false; } - + } $tagIsOpen = (count( $tokenStack ) != 0); break; @@ -795,7 +790,7 @@ class Parser $txt = $lastToken["text"] . $txt; } else { $txt = $lastToken["type"] . $txt; - } + } } $s .= $txt; } @@ -822,7 +817,7 @@ class Parser #$e2 = "/^(.*)\\b(\\w+)\$/suD"; #$e2 = "/^(.*\\s)(\\S+)\$/suD"; static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD'; - + # Special and Media are pseudo-namespaces; no pages actually exist in them static $image = FALSE; @@ -832,21 +827,21 @@ class Parser if ( !$image ) { $image = Namespace::getImage(); } if ( !$special ) { $special = Namespace::getSpecial(); } if ( !$media ) { $media = Namespace::getMedia(); } - if ( !$category ) { $category = wfMsg ( "category" ) ; } - + if ( !$category ) { $category = Namespace::getCategory(); ; } + $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() ); wfProfileOut( "$fname-setup" ); $s = ""; - + if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; - $trail = $m[3]; + $trail = $m[3]; } else { # Invalid form; output directly $s .= $prefix . "[[" . $line ; return $s; } - + /* Valid link forms: Foobar -- normal :Foobar -- override special treatment of prefix (images, language links) @@ -857,7 +852,7 @@ class Parser $noforce = ($c != ":"); if( $c == "/" ) { # subpage if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown - $m[1]=substr($m[1],1,strlen($m[1])-2); + $m[1]=substr($m[1],1,strlen($m[1])-2); $noslash=$m[1]; } else { $noslash=substr($m[1],1); @@ -865,7 +860,7 @@ class Parser if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash); if( "" == $text ) { - $text= $m[1]; + $text= $m[1]; } # this might be changed for ugliness reasons } else { $link = $noslash; # no subpage allowed, use standard link @@ -895,6 +890,14 @@ class Parser $wgLinkCache->addImageLinkObj( $nt ); return $s; } + if ( $ns == $category ) { + $t = $nt->getText() ; + $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ; + $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix ); + $this->mOutput->mCategoryLinks[] = $t ; + $s .= $prefix . $trail ; + return $s ; + } } if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) && ( strpos( $link, "#" ) == FALSE ) ) { @@ -902,23 +905,6 @@ class Parser return $s; } - # Category feature - $catns = strtoupper ( $nt->getDBkey () ) ; - $catns = explode ( ":" , $catns ) ; - if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ; - else $catns = "" ; - if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) { - $t = explode ( ":" , $nt->getText() ) ; - array_shift ( $t ) ; - $t = implode ( ":" , $t ) ; - $t = $wgLang->ucFirst ( $t ) ; - $nnt = Title::newFromText ( $category.":".$t ) ; - $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix ); - $this->mOutput->mCategoryLinks[] = $t ; - $s .= $prefix . $trail ; - return $s ; - } - if( $ns == $media ) { $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail; $wgLinkCache->addImageLinkObj( $nt ); @@ -941,6 +927,7 @@ class Parser if ( '' != $this->mLastSection ) { $result = "mLastSection . ">\n"; } + $this->mInPre = false; $this->mLastSection = ""; return $result; } @@ -1029,10 +1016,21 @@ class Parser foreach ( $a as $t ) { $oLine = $t; $opl = strlen( $lastPref ); - $npl = strspn( $t, "*#:;" ); - $pref = substr( $t, 0, $npl ); - $pref2 = str_replace( ";", ":", $pref ); - $t = substr( $t, $npl ); + $preCloseMatch = preg_match("/<\\/pre/i", $t ); + $preOpenMatch = preg_match("/
mInPre) {
+				$this->mInPre = !empty($preOpenMatch);
+			}
+			if ( !$this->mInPre ) {
+				$npl = strspn( $t, "*#:;" );
+				$pref = substr( $t, 0, $npl );
+				$pref2 = str_replace( ";", ":", $pref );
+				$t = substr( $t, $npl );
+			} else {
+				$npl = 0;
+				$pref = $pref2 = '';
+			}
+
 			// list generation
 			if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 				$text .= $this->nextItem( substr( $pref, -1 ) );
@@ -1077,12 +1075,15 @@ class Parser
 				$uniq_prefix = UNIQ_PREFIX;
 				// XXX: use a stack for nestable elements like span, table and div
 				$openmatch = preg_match("/(closeParagraph();
+					if($preOpenMatch and !$preCloseMatch) {
+						$this->mInPre = true;	
+					}
 					if ( $closematch  ) {
 						$inBlockElem = false;
 					} else {
@@ -1096,7 +1097,7 @@ class Parser
 							$text .= $this->closeParagraph().'
';
 							$this->mLastSection = 'pre';
 						}
-					} else { 
+					} else {
 						// paragraph
 						if ( '' == trim($t) ) {
 							if ( $pstack ) {
@@ -1123,7 +1124,7 @@ class Parser
 							}
 						}
 					}
-				} 
+				}
 			}
 			if ($pstack === false) {
 				$text .= $t."\n";
@@ -1137,7 +1138,7 @@ class Parser
 			$text .= "mLastSection . ">";
 			$this->mLastSection = "";
 		}
-		
+
 		wfProfileOut( $fname );
 		return $text;
 	}
@@ -1187,23 +1188,23 @@ class Parser
 
 		$fname = "Parser::replaceVariables";
 		wfProfileIn( $fname );
-		
+
 		$bail = false;
 		if ( !$this->mVariables ) {
 			$this->initialiseVariables();
 		}
 		$titleChars = Title::legalChars();
 		$regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
-		
+
 		# This function is called recursively. To keep track of arguments we need a stack:
 		array_push( $this->mArgStack, $args );
 
 		# PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
 		$GLOBALS['wgCurParser'] =& $this;
 		$text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
-		
+
 		array_pop( $this->mArgStack );
-		
+
 		return $text;
 	}
 
@@ -1214,11 +1215,11 @@ class Parser
 		$found = false;
 		$nowiki = false;
 		$title = NULL;
-		
+
 		# $newline is an optional newline character before the braces
 		# $part1 is the bit before the first |, and must contain only title characters
 		# $args is a list of arguments, starting from index 0, not including $part1
-		
+
 		$newline = $matches[1];
 		$part1 = $matches[2];
 		# If the third subpattern matched anything, it will start with |
@@ -1243,7 +1244,7 @@ class Parser
 			$text = $matches[0];
 			$found = true;
 		}
-		
+
 		# MSG, MSGNW and INT
 		if ( !$found ) {
 			# Check for MSGNW:
@@ -1255,7 +1256,7 @@ class Parser
 				$mwMsg =& MagicWord::get( MAG_MSG );
 				$mwMsg->matchStartAndRemove( $part1 );
 			}
-			
+
 			# Check if it is an internal message
 			$mwInt =& MagicWord::get( MAG_INT );
 			if ( $mwInt->matchStartAndRemove( $part1 ) ) {
@@ -1265,7 +1266,7 @@ class Parser
 				}
 			}
 		}
-	
+
 		# NS
 		if ( !$found ) {
 			# Check for NS: (namespace expansion)
@@ -1283,7 +1284,7 @@ class Parser
 				}
 			}
 		}
-		
+
 		# LOCALURL and LOCALURLE
 		if ( !$found ) {
 			$mwLocal = MagicWord::get( MAG_LOCALURL );
@@ -1296,7 +1297,7 @@ class Parser
 			} else {
 				$func = '';
 			}
-			
+
 			if ( $func !== '' ) {
 				$title = Title::newFromText( $part1 );
 				if ( !is_null( $title ) ) {
@@ -1309,14 +1310,14 @@ class Parser
 				}
 			}
 		}
-		
+
 		# Internal variables
 		if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
 			$text = $this->mVariables[$part1];
 			$found = true;
 			$this->mOutput->mContainsOldMagic = true;
-		} 
-		
+		}
+
 		# Arguments input from the caller
 		$inputArgs = end( $this->mArgStack );
 		if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
@@ -1336,9 +1337,9 @@ class Parser
 					if ( $articleContent !== false ) {
 						$found = true;
 						$text = $articleContent;
-						
-					} 
-				} 
+
+					}
+				}
 
 				# If the title is valid but undisplayable, make a link to it
 				if ( $this->mOutputType == OT_HTML && !$found ) {
@@ -1347,7 +1348,7 @@ class Parser
 				}
 			}
 		}
-		
+
 		# Recursive parsing, escaping and link table handling
 		# Only for HTML output
 		if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
@@ -1376,15 +1377,15 @@ class Parser
 			if ( !is_null( $title ) ) {
 				$wgLinkCache->suspend();
 			}
-			
+
 			# Run full parser on the included text
 			$text = $this->strip( $text, $this->mStripState );
 			$text = $this->internalParse( $text, (bool)$newline, $assocArgs );
-			
-			# Add the result to the strip state for re-inclusion after 
+
+			# Add the result to the strip state for re-inclusion after
 			# the rest of the processing
 			$text = $this->insertStripItem( $text, $this->mStripState );
-			
+
 			# Resume the link cache and register the inclusion as a link
 			if ( !is_null( $title ) ) {
 				$wgLinkCache->resume();
@@ -1419,7 +1420,7 @@ class Parser
 		$fname = "Parser::removeHTMLtags";
 		wfProfileIn( $fname );
 		$htmlpairs = array( # Tags that must be closed
-			"b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
+			"b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
 			"h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
 			"strike", "strong", "tt", "var", "div", "center",
 			"blockquote", "ol", "ul", "dl", "table", "caption", "pre",
@@ -1439,7 +1440,7 @@ class Parser
 		$htmlsingle = array_merge( $tabletags, $htmlsingle );
 		$htmlelements = array_merge( $htmlsingle, $htmlpairs );
 
-                $htmlattrs = $this->getHTMLattrs () ;
+		$htmlattrs = $this->getHTMLattrs () ;
 
 		# Remove HTML comments
 		$text = preg_replace( "//sU", "", $text );
@@ -1487,7 +1488,7 @@ class Parser
 					}
 					# Strip non-approved attributes from the tag
 					$newparams = $this->fixTagAttributes($params);
-						
+
 				}
 				if ( ! $badtag ) {
 					$rest = str_replace( ">", ">", $rest );
@@ -1506,8 +1507,8 @@ class Parser
 		return $text;
 	}
 
-/* 
- * 
+/*
+ *
  * This function accomplishes several tasks:
  * 1) Auto-number headings if that option is enabled
  * 2) Add an [edit] link to sections for logged in users who have enabled the option
@@ -1516,7 +1517,7 @@ class Parser
  *
  * It loops through all headlines, collects the necessary data, then splits up the
  * string and re-inserts the newly formatted headlines.
- * 
+ *
  */
 
 	/* private */ function formatHeadings( $text )
@@ -1536,7 +1537,7 @@ class Parser
 		if( $esw->matchAndRemove( $text ) ) {
 			$showEditLink = 0;
 		}
-		# if the string __NOTOC__ (not case-sensitive) occurs in the HTML, 
+		# if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
 		# do not add TOC
 		$mw =& MagicWord::get( MAG_NOTOC );
 		if( $mw->matchAndRemove( $text ) ) {
@@ -1587,12 +1588,12 @@ class Parser
 				$prevlevel = $level;
 			}
 			$level = $matches[1][$headlineCount];
-			if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { 
+			if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
 				# reset when we enter a new level
 				$sublevelCount[$level] = 0;
 				$toc .= $sk->tocIndent( $level - $prevlevel );
 				$toclevel += $level - $prevlevel;
-			} 
+			}
 			if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
 				# reset when we step back a level
 				$sublevelCount[$level+1]=0;
@@ -1609,7 +1610,7 @@ class Parser
 							$numbering .= ".";
 						}
 						$numbering .= $sublevelCount[$i];
-						$dot = 1;					
+						$dot = 1;
 					}
 				}
 			}
@@ -1617,29 +1618,29 @@ class Parser
 			# The canonized header is a version of the header text safe to use for links
 			# Avoid insertion of weird stuff like  by expanding the relevant sections
 			$canonized_headline = $this->unstrip( $headline, $this->mStripState );
-			
+
 			# strip out HTML
 			$canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
-			$tocline = trim( $canonized_headline );	
-			$canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
+			$tocline = trim( $canonized_headline );
+			$canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
 			$refer[$headlineCount] = $canonized_headline;
-			
+
 			# count how many in assoc. array so we can track dupes in anchors
 			@$refers[$canonized_headline]++;
 			$refcount[$headlineCount]=$refers[$canonized_headline];
 
 			# Prepend the number to the heading text
-			
+
 			if( $doNumberHeadings || $doShowToc ) {
 				$tocline = $numbering . " " . $tocline;
-				
+
 				# Don't number the heading if it is the only one (looks silly)
 				if( $doNumberHeadings && count( $matches[3] ) > 1) {
 					# the two are different if the line contains a link
 					$headline=$numbering . " " . $headline;
 				}
 			}
-			
+
 			# Create the anchor for linking from the TOC to the section
 			$anchor = $canonized_headline;
 			if($refcount[$headlineCount] > 1 ) {
@@ -1654,17 +1655,17 @@ class Parser
 				}
 				$head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
 			}
-				
+
 			# Add the edit section span
 			if( $rightClickHack ) {
-				$headline = $sk->editSectionScript($headlineCount+1,$headline);	
+				$headline = $sk->editSectionScript($headlineCount+1,$headline);
 			}
 
 			# give headline the correct  tag
 			@$head[$headlineCount] .= "";
-			
+
 			$headlineCount++;
-		}		
+		}
 
 		if( $doShowToc ) {
 			$toclines = $headlineCount;
@@ -1673,13 +1674,13 @@ class Parser
 		}
 
 		# split up and insert constructed headlines
-		
+
 		$blocks = preg_split( "/.*?<\/H[1-6]>/i", $text );
 		$i = 0;
 
 		foreach( $blocks as $block ) {
 			if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
-			    # This is the [edit] link that appears for the top block of text when 
+			    # This is the [edit] link that appears for the top block of text when
 				# section editing is enabled
 
 				# Disabled because it broke block formatting
@@ -1697,7 +1698,7 @@ class Parser
 			}
 			$i++;
 		}
-		
+
 		return $full;
 	}
 
@@ -1731,7 +1732,7 @@ class Parser
 			}
 			$num = str_replace( "-", "", $isbn );
 			$num = str_replace( " ", "", $num );
-		
+
 			if ( "" == $num ) {
 				$text = "ISBN $blank$x";
 			} else {
@@ -1774,7 +1775,7 @@ class Parser
 				$rfc .= $x{0};
 				$x = substr( $x, 1 );
 			}
-		
+
 			if ( "" == $rfc ) {
 				$text .= "RFC $blank$x";
 			} else {
@@ -1795,11 +1796,11 @@ class Parser
 		$this->mOptions = $options;
 		$this->mTitle =& $title;
 		$this->mOutputType = OT_WIKI;
-		
+
 		if ( $clearState ) {
 			$this->clearState();
 		}
-		
+
 		$stripState = false;
 		$pairs = array(
 			"\r\n" => "\n",
@@ -1870,16 +1871,16 @@ class Parser
 		} else {
 			$text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
 		}
-		
+
 		/*
 		$mw =& MagicWord::get( MAG_SUBST );
 		$wgCurParser = $this->fork();
 		$text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
 		$this->merge( $wgCurParser );
 		*/
-		
+
 		# Trim trailing whitespace
-		# MAG_END (__END__) tag allows for trailing 
+		# MAG_END (__END__) tag allows for trailing
 		# whitespace to be deliberately included
 		$text = rtrim( $text );
 		$mw =& MagicWord::get( MAG_END );
@@ -1890,7 +1891,7 @@ class Parser
 
 	# Set up some variables which are usually set up in parse()
 	# so that an external function can call some class members with confidence
-	function startExternalParse( &$title, $options, $outputType, $clearState = true ) 
+	function startExternalParse( &$title, $options, $outputType, $clearState = true )
 	{
 		$this->mTitle =& $title;
 		$this->mOptions = $options;
@@ -1903,7 +1904,7 @@ class Parser
 	function transformMsg( $text, $options ) {
 		global $wgTitle;
 		static $executing = false;
-		
+
 		# Guard against infinite recursion
 		if ( $executing ) {
 			return $text;
@@ -1915,7 +1916,7 @@ class Parser
 		$this->mOutputType = OT_MSG;
 		$this->clearState();
 		$text = $this->replaceVariables( $text );
-		
+
 		$executing = false;
 		return $text;
 	}
@@ -1990,17 +1991,17 @@ class ParserOptions
 	function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
 	function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
 
-	/* static */ function newFromUser( &$user ) 
+	/* static */ function newFromUser( &$user )
 	{
 		$popts = new ParserOptions;
-		$popts->initialiseFromUser( &$user );
+		$popts->initialiseFromUser( $user );
 		return $popts;
 	}
 
-	function initialiseFromUser( &$userInput ) 
+	function initialiseFromUser( &$userInput )
 	{
 		global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
-		
+
 		if ( !$userInput ) {
 			$user = new User;
 			$user->setLoaded( true );