X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/modifier.php?a=blobdiff_plain;f=includes%2FParser.php;h=c120ad16cf7303aae15560dc14425897613e2fe4;hb=de18da5eebe16c0636fc140f63aeb595df3dc49e;hp=93d92bda87a7e9b55aa4740ae48c5a0e850655ea;hpb=a19696f5117e9d96dad628a92261b381cc06cc23;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Parser.php b/includes/Parser.php index 93d92bda87..c120ad16cf 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -6,20 +6,20 @@ if( $GLOBALS['wgUseWikiHiero'] ){ include_once('wikihiero.php'); } -# PHP Parser -# +# PHP Parser +# # Processes wiki markup # -# There are two main entry points into the Parser class: parse() and preSaveTransform(). +# There are two main entry points into the Parser class: parse() and preSaveTransform(). # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup. # -# Globals used: +# Globals used: # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser # # NOT $wgArticle, $wgUser or $wgTitle. Keep them away! # # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*, -# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*, +# $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*, # $wgLocaltimezone # # * only within ParserOptions @@ -29,24 +29,26 @@ if( $GLOBALS['wgUseWikiHiero'] ){ # Variable substitution O(N^2) attack #----------------------------------------- # Without countermeasures, it would be possible to attack the parser by saving a page -# filled with a large number of inclusions of large pages. The size of the generated -# page would be proportional to the square of the input size. Hence, we limit the number +# filled with a large number of inclusions of large pages. The size of the generated +# page would be proportional to the square of the input size. Hence, we limit the number # of inclusions of any given page, thus bringing any attack back to O(N). # -define( "MAX_INCLUDE_REPEAT", 5 ); -# Recursion depth of variable/inclusion evaluation -define( "MAX_INCLUDE_PASSES", 3 ); +define( "MAX_INCLUDE_REPEAT", 5 ); # Allowed values for $mOutputType define( "OT_HTML", 1 ); define( "OT_WIKI", 2 ); +define( "OT_MSG", 3 ); + +# prefix for escaping, used in two functions at least +define( "UNIQ_PREFIX", "NaodW29"); class Parser { # Cleared with clearState(): - var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array(); - var $mVariables, $mIncludeCount; + var $mOutput, $mAutonumber, $mDTopen, $mStripState = array(); + var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre; # Temporary: var $mOptions, $mTitle, $mOutputType; @@ -65,10 +67,11 @@ class Parser $this->mVariables = false; $this->mIncludeCount = array(); $this->mStripState = array(); + $this->mArgStack = array(); } - + # First pass--just handle sections, pass the rest off - # to doWikiPass2() which does all the real work. + # to internalParse() which does all the real work. # # Returns a ParserOutput # @@ -80,16 +83,30 @@ class Parser if ( $clearState ) { $this->clearState(); } - + $this->mOptions = $options; $this->mTitle =& $title; $this->mOutputType = OT_HTML; - + $stripState = NULL; $text = $this->strip( $text, $this->mStripState ); - $text = $this->doWikiPass2( $text, $linestart ); + $text = $this->internalParse( $text, $linestart ); $text = $this->unstrip( $text, $this->mStripState ); - + # Clean up special characters, only run once, next-to-last before doBlockLevels + $fixtags = array( + "/
/i" => '
', + "/
/i" => '
', + "/
/i"=>'
', + "/<\\/center *>/i" => '
', + # Clean up spare ampersands; note that we probably ought to be + # more careful about named entities. + '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + + # only once and last + $text = $this->doBlockLevels( $text, $linestart ); + $this->mOutput->setText( $text ); wfProfileOut( $fname ); return $this->mOutput; @@ -100,23 +117,26 @@ class Parser return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); } - # Replaces all occurences of <$tag>content in the text + # Replaces all occurrences of <$tag>content in the text # with a random marker and returns the new text. the output parameter # $content will be an associative array filled with data on the form # $unique_marker => content. + # If $content is already set, the additional entries will be appended + /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){ - $result = array(); - $rnd = $uniq_prefix . Parser::getRandomString(); - $content = array( ); + $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString(); + if ( !$content ) { + $content = array( ); + } $n = 1; $stripped = ""; while ( "" != $text ) { $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 ); $stripped .= $p[0]; - if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { - $text = ""; + if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { + $text = ""; } else { $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 ); $marker = $rnd . sprintf("%08X", $n++); @@ -126,22 +146,24 @@ class Parser } } return $stripped; - } + } # Strips ,
 and 
 	# Returns the text, and fills an array with data needed in unstrip()
+	# If the $state is already a valid strip state, it adds to the state
 	#
 	function strip( $text, &$state )
 	{
 		$render = ($this->mOutputType == OT_HTML);
-		$nowiki_content = array(); 
+		$nowiki_content = array();
 		$hiero_content = array();
 		$math_content = array();
 		$pre_content = array();
+		$item_content = array();
 
 		# Replace any instances of the placeholders
-		$uniq_prefix = "NaodW29";
-		$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
+		$uniq_prefix = UNIQ_PREFIX;
+		#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 
 		$text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 		foreach( $nowiki_content as $marker => $content ){
@@ -182,39 +204,72 @@ class Parser
 				$pre_content[$marker] = "
$content
"; } } - - # Must expand in reverse order, otherwise nested tags will be corrupted - $state = array( $pre_content, $math_content, $hiero_content, $nowiki_content ); + + # Merge state with the pre-existing state, if there is one + if ( $state ) { + $state['nowiki'] = $state['nowiki'] + $nowiki_content; + $state['hiero'] = $state['hiero'] + $hiero_content; + $state['math'] = $state['math'] + $math_content; + $state['pre'] = $state['pre'] + $pre_content; + } else { + $state = array( + 'nowiki' => $nowiki_content, + 'hiero' => $hiero_content, + 'math' => $math_content, + 'pre' => $pre_content, + 'item' => $item_content + ); + } return $text; } function unstrip( $text, &$state ) { - foreach( $state as $content_dict ){ - foreach( $content_dict as $marker => $content ){ - $text = str_replace( $marker, $content, $text ); + # Must expand in reverse order, otherwise nested tags will be corrupted + $contentDict = end( $state ); + for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { + for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { + $text = str_replace( key( $contentDict ), $content, $text ); } } + return $text; } + # Add an item to the strip state + # Returns the unique tag which must be inserted into the stripped text + # The tag will be replaced with the original text in unstrip() + + function insertStripItem( $text, &$state ) + { + $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString(); + if ( !$state ) { + $state = array( + 'nowiki' => array(), + 'hiero' => array(), + 'math' => array(), + 'pre' => array(), + 'item' => array() + ); + } + $state['item'][$rnd] = $text; + return $rnd; + } + function categoryMagic () { global $wgLang , $wgUser ; if ( !$this->mOptions->getUseCategoryMagic() ) return ; $id = $this->mTitle->getArticleID() ; - $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ; + $cns = Namespace::getCategory() ; + if ( $this->mTitle->getNamespace() != $cns ) return "" ; $ti = $this->mTitle->getText() ; - $ti = explode ( ":" , $ti , 2 ) ; - if ( $cat != $ti[0] ) return "" ; - $r = "
\n" ; + $r = "
\n"; $articles = array() ; $parents = array () ; $children = array() ; - -# $sk =& $this->mGetSkin(); $sk =& $wgUser->getSkin() ; $data = array () ; @@ -227,16 +282,14 @@ class Parser $res = wfQuery ( $sql2, DB_READ ) ; while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; - foreach ( $data AS $x ) { $t = $wgLang->getNsText ( $x->cur_namespace ) ; if ( $t != "" ) $t .= ":" ; $t .= $x->cur_title ; - $y = explode ( ":" , $t , 2 ) ; - if ( count ( $y ) == 2 && $y[0] == $cat ) { - array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ; + if ( $x->cur_namespace == $cns ) { + array_push ( $children , $sk->makeLink ( $t ) ) ; } else { array_push ( $articles , $sk->makeLink ( $t ) ) ; } @@ -255,7 +308,7 @@ class Parser if ( count ( $articles ) > 0 ) { asort ( $articles ) ; - $h = wfMsg( "category_header", $ti[1] ); + $h = wfMsg( "category_header", $ti ); $r .= "

{$h}

\n" ; $r .= implode ( ", " , $articles ) ; } @@ -285,7 +338,7 @@ class Parser { if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-) $htmlattrs = $this->getHTMLattrs() ; - + # Strip non-approved attributes from the tag $t = preg_replace( "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e", @@ -294,7 +347,7 @@ class Parser # Strip javascript "expression" from stylesheets. Brute force approach: # If anythin offensive is found, all attributes of the HTML tag are dropped - if( preg_match( + if( preg_match( "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is", wfMungeToUtf8( $t ) ) ) { @@ -313,7 +366,7 @@ class Parser $ltr = array () ; # tr attributes foreach ( $t AS $k => $x ) { - $x = rtrim ( $x ) ; + $x = trim ( $x ) ; $fc = substr ( $x , 0 , 1 ) ; if ( "{|" == substr ( $x , 0 , 2 ) ) { @@ -334,7 +387,7 @@ class Parser $t[$k] = $z ; } /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption - { + { $z = trim ( substr ( $x , 2 ) ) ; $t[$k] = "{$z}\n" ; }*/ @@ -368,7 +421,7 @@ class Parser { $z = "" ; if ( $fc != "+" ) - { + { $tra = array_pop ( $ltr ) ; if ( !array_pop ( $tr ) ) $z = "\n" ; array_push ( $tr , true ) ; @@ -377,9 +430,9 @@ class Parser $l = array_pop ( $ltd ) ; if ( array_pop ( $td ) ) $z = "" . $z ; - if ( $fc == "|" ) $l = "TD" ; - else if ( $fc == "!" ) $l = "TH" ; - else if ( $fc == "+" ) $l = "CAPTION" ; + if ( $fc == "|" ) $l = "td" ; + else if ( $fc == "!" ) $l = "th" ; + else if ( $fc == "+" ) $l = "caption" ; else $l = "" ; array_push ( $ltd , $l ) ; $y = explode ( "|" , $theline , 2 ) ; @@ -404,38 +457,32 @@ class Parser return $t ; } - # Well, OK, it's actually about 14 passes. But since all the - # hard lifting is done inside PHP's regex code, it probably - # wouldn't speed things up much to add a real parser. - # - function doWikiPass2( $text, $linestart ) + function internalParse( $text, $linestart, $args = array() ) { - $fname = "Parser::doWikiPass2"; + $fname = "Parser::internalParse"; wfProfileIn( $fname ); - + $text = $this->removeHTMLtags( $text ); - $text = $this->replaceVariables( $text ); + $text = $this->replaceVariables( $text, $args ); # $text = preg_replace( "/(^|\n)-----*/", "\\1
", $text ); - $text = str_replace ( "
", "
", $text ); $text = $this->doHeadings( $text ); - if($this->mOptions->getUseDynamicDates()) { global $wgDateFormatter; $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text ); } - $text = $this->replaceExternalLinks( $text ); $text = $this->doTokenizedParser ( $text ); $text = $this->doTableStuff ( $text ) ; - $text = $this->formatHeadings( $text ); - $sk =& $this->mOptions->getSkin(); $text = $sk->transformContent( $text ); - $text = $this->doBlockLevels( $text, $linestart ); - $text .= $this->categoryMagic () ; + + if ( !isset ( $this->categoryMagicDone ) ) { + $text .= $this->categoryMagic () ; + $this->categoryMagicDone = true ; + } wfProfileOut( $fname ); return $text; @@ -470,18 +517,18 @@ class Parser wfProfileOut( $fname ); return $text; } - + /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) { $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3"; $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF"; - - # this is the list of separators that should be ignored if they + + # this is the list of separators that should be ignored if they # are the last character of an URL but that should be included # if they occur within the URL, e.g. "go to www.foo.com, where .." # in this case, the last comma should not become part of the URL, # but in "www.foo.com/123,2342,32.htm" it should. - $sep = ",;\.:"; + $sep = ",;\.:"; $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF"; $images = "gif|png|jpg|jpeg"; @@ -490,7 +537,7 @@ class Parser # that the content of the string should be inserted there). $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." . "((?i){$images})([^{$uc}]|$)/"; - + $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/"; $sk =& $this->mOptions->getSkin(); @@ -520,7 +567,7 @@ class Parser } else if ( preg_match( $e2, $line, $m ) ) { $link = "{$protocol}:{$m[1]}"; $text = $m[2]; - $trail = $m[3]; + $trail = $m[3]; } else { $s .= "[{$protocol}:" . $line; continue; @@ -540,8 +587,8 @@ class Parser /* private */ function handle3Quotes( &$state, $token ) { - if ( $state["strong"] ) { - if ( $state["em"] && $state["em"] > $state["strong"] ) + if ( $state["strong"] !== false ) { + if ( $state["em"] !== false && $state["em"] > $state["strong"] ) { # ''' lala ''lala ''' $s = ""; @@ -551,15 +598,15 @@ class Parser $state["strong"] = FALSE; } else { $s = ""; - $state["strong"] = $token["pos"]; + $state["strong"] = isset($token["pos"]) ? $token["pos"] : true; } return $s; } /* private */ function handle2Quotes( &$state, $token ) { - if ( $state["em"] ) { - if ( $state["strong"] && $state["strong"] > $state["em"] ) + if ( $state["em"] !== false ) { + if ( $state["strong"] !== false && $state["strong"] > $state["em"] ) { # ''lala'''lala'' ....''' $s = ""; @@ -569,32 +616,33 @@ class Parser $state["em"] = FALSE; } else { $s = ""; - $state["em"] = $token["pos"]; + $state["em"] = isset($token["pos"]) ? $token["pos"] : true; + } return $s; } - + /* private */ function handle5Quotes( &$state, $token ) { $s = ""; - if ( $state["em"] && $state["strong"] ) { + if ( $state["em"] !== false && $state["strong"] !== false ) { if ( $state["em"] < $state["strong"] ) { $s .= ""; } else { $s .= ""; } $state["strong"] = $state["em"] = FALSE; - } elseif ( $state["em"] ) { + } elseif ( $state["em"] !== false ) { $s .= ""; $state["em"] = FALSE; $state["strong"] = $token["pos"]; - } elseif ( $state["strong"] ) { + } elseif ( $state["strong"] !== false ) { $s .= ""; $state["strong"] = FALSE; $state["em"] = $token["pos"]; } else { # not $em and not $strong $s .= ""; - $state["strong"] = $state["em"] = $token["pos"]; + $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true; } return $s; } @@ -605,18 +653,17 @@ class Parser $tokenizer=Tokenizer::newFromString( $str ); $tokenStack = array(); - + $s=""; $state["em"] = FALSE; $state["strong"] = FALSE; $tagIsOpen = FALSE; $threeopen = false; - + # The tokenizer splits the text into tokens and returns them one by one. # Every call to the tokenizer returns a new token. while ( $token = $tokenizer->nextToken() ) { - $threeopen = false; switch ( $token["type"] ) { case "text": @@ -633,13 +680,13 @@ class Parser array_push( $tokenStack, $token ); $txt=""; break; - + case "]]]": case "]]": # link close tag. # get text from stack, glue it together, and call the code to handle a # link - + if ( count( $tokenStack ) == 0 ) { # stack empty. Found a ]] without an opening [[ @@ -654,30 +701,30 @@ class Parser } $lastToken = array_pop( $tokenStack ); } - + $txt = $linkText ."]]"; - + if( isset( $lastToken["text"] ) ) { $prefix = $lastToken["text"]; } else { $prefix = ""; } $nextToken = $tokenizer->previewToken(); - if ( $nextToken["type"] == "text" ) + if ( $nextToken["type"] == "text" ) { # Preview just looks at it. Now we have to fetch it. $nextToken = $tokenizer->nextToken(); $txt .= $nextToken["text"]; } - $txt = $this->handleInternalLink( $txt, $prefix ); + $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix ); - # did the tag start with 3 [ ? + # did the tag start with 3 [ ? if($threeopen) { # show the first as text $txt = "[".$txt; $threeopen=false; } - + } $tagIsOpen = (count( $tokenStack ) != 0); break; @@ -770,7 +817,7 @@ class Parser #$e2 = "/^(.*)\\b(\\w+)\$/suD"; #$e2 = "/^(.*\\s)(\\S+)\$/suD"; static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD'; - + # Special and Media are pseudo-namespaces; no pages actually exist in them static $image = FALSE; @@ -780,21 +827,21 @@ class Parser if ( !$image ) { $image = Namespace::getImage(); } if ( !$special ) { $special = Namespace::getSpecial(); } if ( !$media ) { $media = Namespace::getMedia(); } - if ( !$category ) { $category = wfMsg ( "category" ) ; } - + if ( !$category ) { $category = Namespace::getCategory(); ; } + $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() ); wfProfileOut( "$fname-setup" ); $s = ""; - + if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; - $trail = $m[3]; + $trail = $m[3]; } else { # Invalid form; output directly $s .= $prefix . "[[" . $line ; return $s; } - + /* Valid link forms: Foobar -- normal :Foobar -- override special treatment of prefix (images, language links) @@ -805,7 +852,7 @@ class Parser $noforce = ($c != ":"); if( $c == "/" ) { # subpage if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown - $m[1]=substr($m[1],1,strlen($m[1])-2); + $m[1]=substr($m[1],1,strlen($m[1])-2); $noslash=$m[1]; } else { $noslash=substr($m[1],1); @@ -813,7 +860,7 @@ class Parser if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash); if( "" == $text ) { - $text= $m[1]; + $text= $m[1]; } # this might be changed for ugliness reasons } else { $link = $noslash; # no subpage allowed, use standard link @@ -836,14 +883,21 @@ class Parser if( $noforce ) { if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) { array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() ); - $s .= $prefix . $trail; - return $s; + return (trim($s) == '')? '': $s; } if( $ns == $image ) { $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail; $wgLinkCache->addImageLinkObj( $nt ); return $s; } + if ( $ns == $category ) { + $t = $nt->getText() ; + $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ; + $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix ); + $this->mOutput->mCategoryLinks[] = $t ; + $s .= $prefix . $trail ; + return $s ; + } } if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) && ( strpos( $link, "#" ) == FALSE ) ) { @@ -851,23 +905,6 @@ class Parser return $s; } - # Category feature - $catns = strtoupper ( $nt->getDBkey () ) ; - $catns = explode ( ":" , $catns ) ; - if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ; - else $catns = "" ; - if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) { - $t = explode ( ":" , $nt->getText() ) ; - array_shift ( $t ) ; - $t = implode ( ":" , $t ) ; - $t = $wgLang->ucFirst ( $t ) ; - $nnt = Title::newFromText ( $category.":".$t ) ; - $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix ); - $this->mOutput->mCategoryLinks[] = $t ; - $s .= $prefix . $trail ; - return $s ; - } - if( $ns == $media ) { $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail; $wgLinkCache->addImageLinkObj( $nt ); @@ -888,10 +925,11 @@ class Parser { $result = ""; if ( '' != $this->mLastSection ) { - $result = "mLastSection . ">"; + $result = "mLastSection . ">\n"; } + $this->mInPre = false; $this->mLastSection = ""; - return $result."\n"; + return $result; } # getCommon() returns the length of the longest common substring # of both arguments, starting at the beginning of both. @@ -968,27 +1006,39 @@ class Parser # and making lists from lines starting with * # : etc. # $a = explode( "\n", $text ); - $a[0] = "\n".$a[0]; - $lastPref = $text = ''; + + $lastPref = $text = $lastLine = ''; $this->mDTopen = $inBlockElem = false; + $npl = 0; + $pstack = false; if ( ! $linestart ) { $text .= array_shift( $a ); } foreach ( $a as $t ) { - if ( "" != $text ) { $text .= "\n"; } - $oLine = $t; $opl = strlen( $lastPref ); - $npl = strspn( $t, "*#:;" ); - $pref = substr( $t, 0, $npl ); - $pref2 = str_replace( ";", ":", $pref ); - $t = substr( $t, $npl ); + $preCloseMatch = preg_match("/<\\/pre/i", $t ); + $preOpenMatch = preg_match("/
mInPre) {
+				$this->mInPre = !empty($preOpenMatch);
+			}
+			if ( !$this->mInPre ) {
+				$npl = strspn( $t, "*#:;" );
+				$pref = substr( $t, 0, $npl );
+				$pref2 = str_replace( ";", ":", $pref );
+				$t = substr( $t, $npl );
+			} else {
+				$npl = 0;
+				$pref = $pref2 = '';
+			}
 
+			// list generation
 			if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 				$text .= $this->nextItem( substr( $pref, -1 ) );
+				if ( $pstack ) { $pstack = false; }
 
 				if ( ";" == substr( $pref, -1 ) ) {
 					$cpos = strpos( $t, ":" );
-					if ( ! ( false === $cpos ) ) {
+					if ( false !== $cpos ) {
 						$term = substr( $t, 0, $cpos );
 						$text .= $term . $this->nextItem( ":" );
 						$t = substr( $t, $cpos + 1 );
@@ -996,6 +1046,7 @@ class Parser
 				}
 			} else if (0 != $npl || 0 != $opl) {
 				$cpl = $this->getCommon( $pref, $lastPref );
+				if ( $pstack ) { $pstack = false; }
 
 				while ( $cpl < $opl ) {
 					$text .= $this->closeList( $lastPref{$opl-1} );
@@ -1020,42 +1071,64 @@ class Parser
 				}
 				$lastPref = $pref2;
 			}
-			if ( 0 == $npl ) { # No prefix--go to paragraph mode
-				if ( preg_match(
-				  "/(closeParagraph();
-					$inBlockElem = true;
-				} else if ( preg_match("/(closeParagraph();
-					$inBlockElem = false;
-				}
-				if ( ! $inBlockElem ) {
-					if ( " " == $t{0} ) {
-						$newSection = "pre";
-						# $t = wfEscapeHTML( $t );
+					if($preOpenMatch and !$preCloseMatch) {
+						$this->mInPre = true;	
+					}
+					if ( $closematch  ) {
+						$inBlockElem = false;
+					} else {
+						$inBlockElem = true;
 					}
-					else { $newSection = "p"; }
-
-					if ( '' == trim( $oLine ) ) {
-						if ( $this->mLastSection != 'p') {
-							$text .= $this->closeParagraph();
-							$text .= "<" . $newSection . ">";
-							$this->mLastSection = $newSection;
-						} else if ( $this->mLastSection == 'p') {
-							$text .= '
'; + } else if ( !$inBlockElem ) { + if ( " " == $t{0} ) { + // pre + if ($this->mLastSection != 'pre') { + $pstack = false; + $text .= $this->closeParagraph().'
';
+							$this->mLastSection = 'pre';
+						}
+					} else {
+						// paragraph
+						if ( '' == trim($t) ) {
+							if ( $pstack ) {
+								$text .= $pstack.'
'; + $pstack = false; + $this->mLastSection = 'p'; + } else { + if ($this->mLastSection != 'p' ) { + $text .= $this->closeParagraph(); + $this->mLastSection = ''; + $pstack = "

"; + } else { + $pstack = '

'; + } + } + } else { + if ( $pstack ) { + $text .= $pstack; + $pstack = false; + $this->mLastSection = 'p'; + } else if ($this->mLastSection != 'p') { + $text .= $this->closeParagraph().'

'; + $this->mLastSection = 'p'; + } } - } else if ( $this->mLastSection == $newSection and $newSection != 'p' ) { - $text .= $this->closeParagraph(); - $text .= "<" . $newSection . ">"; - $this->mLastSection = $newSection; } } - if ( $inBlockElem && - preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|<\\/p<\\/div|<\\/pre)/i", $t ) ) { - $inBlockElem = false; - } } - $text .= $t; + if ($pstack === false) { + $text .= $t."\n"; + } } while ( $npl ) { $text .= $this->closeList( $pref2{$npl-1} ); @@ -1065,12 +1138,13 @@ class Parser $text .= "mLastSection . ">"; $this->mLastSection = ""; } + wfProfileOut( $fname ); return $text; } function getVariableValue( $index ) { - global $wgLang; + global $wgLang, $wgSitename, $wgServer; switch ( $index ) { case MAG_CURRENTMONTH: @@ -1089,6 +1163,10 @@ class Parser return $wgLang->time( wfTimestampNow(), false ); case MAG_NUMBEROFARTICLES: return wfNumberOfArticles(); + case MAG_SITENAME: + return $wgSitename; + case MAG_SERVER: + return $wgServer; default: return NULL; } @@ -1104,76 +1182,58 @@ class Parser } } - /* private */ function replaceVariables( $text ) + /* private */ function replaceVariables( $text, $args = array() ) { - global $wgLang, $wgCurParser; - global $wgScript, $wgArticlePath; + global $wgLang, $wgScript, $wgArticlePath; $fname = "Parser::replaceVariables"; wfProfileIn( $fname ); - + $bail = false; if ( !$this->mVariables ) { $this->initialiseVariables(); } $titleChars = Title::legalChars(); - $regex = "/{{([$titleChars]*?)}}/s"; - - # "Recursive" variable expansion: run it through a couple of passes - for ( $i=0; $ifork(); - - $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); - if ( $oldText == $text ) { - $bail = true; - } - $this->merge( $wgCurParser ); - } + $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s"; - return $text; - } + # This function is called recursively. To keep track of arguments we need a stack: + array_push( $this->mArgStack, $args ); - # Returns a copy of this object except with various variables cleared - # This copy can be re-merged with the parent after operations on the copy - function fork() - { - $copy = $this; - $copy->mOutput = new ParserOutput; - return $copy; - } + # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array + $GLOBALS['wgCurParser'] =& $this; + $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); - # Merges a copy split off with fork() - function merge( &$copy ) - { - $this->mOutput->merge( $copy->mOutput ); - - # Merge include throttling arrays - foreach( $copy->mIncludeCount as $dbk => $count ) { - if ( array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] += $count; - } else { - $this->mIncludeCount[$dbk] = $count; - } - } + array_pop( $this->mArgStack ); + + return $text; } function braceSubstitution( $matches ) { - global $wgLinkCache; + global $wgLinkCache, $wgLang; $fname = "Parser::braceSubstitution"; $found = false; $nowiki = false; - - $text = $matches[1]; + $title = NULL; + + # $newline is an optional newline character before the braces + # $part1 is the bit before the first |, and must contain only title characters + # $args is a list of arguments, starting from index 0, not including $part1 + + $newline = $matches[1]; + $part1 = $matches[2]; + # If the third subpattern matched anything, it will start with | + if ( $matches[3] !== "" ) { + $args = explode( "|", substr( $matches[3], 1 ) ); + } else { + $args = array(); + } + $argc = count( $args ); # SUBST $mwSubst =& MagicWord::get( MAG_SUBST ); - if ( $mwSubst->matchStartAndRemove( $text ) ) { - if ( $this->mOutputType == OT_HTML ) { + if ( $mwSubst->matchStartAndRemove( $part1 ) ) { + if ( $this->mOutputType != OT_WIKI ) { # Invalid SUBST not replaced at PST time # Return without further processing $text = $matches[0]; @@ -1184,66 +1244,102 @@ class Parser $text = $matches[0]; $found = true; } - - # Various prefixes + + # MSG, MSGNW and INT if ( !$found ) { # Check for MSGNW: $mwMsgnw =& MagicWord::get( MAG_MSGNW ); - if ( $mwMsgnw->matchStartAndRemove( $text ) ) { + if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { $nowiki = true; } else { # Remove obsolete MSG: $mwMsg =& MagicWord::get( MAG_MSG ); - $mwMsg->matchStartAndRemove( $text ); + $mwMsg->matchStartAndRemove( $part1 ); } - + # Check if it is an internal message $mwInt =& MagicWord::get( MAG_INT ); - if ( $mwInt->matchStartAndRemove( $text ) ) { - $text = wfMsg( $text ); - $found = true; + if ( $mwInt->matchStartAndRemove( $part1 ) ) { + if ( $this->incrementIncludeCount( "int:$part1" ) ) { + $text = wfMsgReal( $part1, $args, true ); + $found = true; + } + } + } + + # NS + if ( !$found ) { + # Check for NS: (namespace expansion) + $mwNs = MagicWord::get( MAG_NS ); + if ( $mwNs->matchStartAndRemove( $part1 ) ) { + if ( intval( $part1 ) ) { + $text = $wgLang->getNsText( intval( $part1 ) ); + $found = true; + } else { + $index = Namespace::getCanonicalIndex( strtolower( $part1 ) ); + if ( !is_null( $index ) ) { + $text = $wgLang->getNsText( $index ); + $found = true; + } + } + } + } + + # LOCALURL and LOCALURLE + if ( !$found ) { + $mwLocal = MagicWord::get( MAG_LOCALURL ); + $mwLocalE = MagicWord::get( MAG_LOCALURLE ); + + if ( $mwLocal->matchStartAndRemove( $part1 ) ) { + $func = 'getLocalURL'; + } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) { + $func = 'escapeLocalURL'; + } else { + $func = ''; + } + + if ( $func !== '' ) { + $title = Title::newFromText( $part1 ); + if ( !is_null( $title ) ) { + if ( $argc > 0 ) { + $text = $title->$func( $args[0] ); + } else { + $text = $title->$func(); + } + $found = true; + } } } - - # Check for a match against internal variables - if ( !$found && array_key_exists( $text, $this->mVariables ) ) { - $text = $this->mVariables[$text]; + + # Internal variables + if ( !$found && array_key_exists( $part1, $this->mVariables ) ) { + $text = $this->mVariables[$part1]; $found = true; $this->mOutput->mContainsOldMagic = true; - } - + } + + # Arguments input from the caller + $inputArgs = end( $this->mArgStack ); + if ( !$found && array_key_exists( $part1, $inputArgs ) ) { + $text = $inputArgs[$part1]; + $found = true; + } + # Load from database if ( !$found ) { - $title = Title::newFromText( $text, NS_TEMPLATE ); - if ( !is_null( $text ) && !$title->isExternal() ) { + $title = Title::newFromText( $part1, NS_TEMPLATE ); + if ( !is_null( $title ) && !$title->isExternal() ) { # Check for excessive inclusion $dbk = $title->getPrefixedDBkey(); - if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] = 0; - } - if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { + if ( $this->incrementIncludeCount( $dbk ) ) { $article = new Article( $title ); $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals(); if ( $articleContent !== false ) { $found = true; $text = $articleContent; - - # Escaping and link table handling - # Not required for preSaveTransform() - if ( $this->mOutputType == OT_HTML ) { - if ( $nowiki ) { - $text = wfEscapeWikiText( $text ); - } else { - $text = $this->removeHTMLtags( $text ); - } - $wgLinkCache->suspend(); - $text = $this->doTokenizedParser( $text ); - $wgLinkCache->resume(); - $wgLinkCache->addLinkObj( $title ); - } - } - } + } + } # If the title is valid but undisplayable, make a link to it if ( $this->mOutputType == OT_HTML && !$found ) { @@ -1253,20 +1349,78 @@ class Parser } } + # Recursive parsing, escaping and link table handling + # Only for HTML output + if ( $nowiki && $found && $this->mOutputType == OT_HTML ) { + $text = wfEscapeWikiText( $text ); + } elseif ( $this->mOutputType == OT_HTML && $found ) { + # Clean up argument array + $assocArgs = array(); + $index = 1; + foreach( $args as $arg ) { + $eqpos = strpos( $arg, "=" ); + if ( $eqpos === false ) { + $assocArgs[$index++] = $arg; + } else { + $name = trim( substr( $arg, 0, $eqpos ) ); + $value = trim( substr( $arg, $eqpos+1 ) ); + if ( $value === false ) { + $value = ""; + } + if ( $name !== false ) { + $assocArgs[$name] = $value; + } + } + } + + # Do not enter included links in link table + if ( !is_null( $title ) ) { + $wgLinkCache->suspend(); + } + + # Run full parser on the included text + $text = $this->strip( $text, $this->mStripState ); + $text = $this->internalParse( $text, (bool)$newline, $assocArgs ); + + # Add the result to the strip state for re-inclusion after + # the rest of the processing + $text = $this->insertStripItem( $text, $this->mStripState ); + + # Resume the link cache and register the inclusion as a link + if ( !is_null( $title ) ) { + $wgLinkCache->resume(); + $wgLinkCache->addLinkObj( $title ); + } + } + if ( !$found ) { return $matches[0]; } else { - return $text; + return $newline . $text; + } + } + + # Returns true if the function is allowed to include this entity + function incrementIncludeCount( $dbk ) + { + if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { + $this->mIncludeCount[$dbk] = 0; + } + if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { + return true; + } else { + return false; } } + # Cleans up HTML, removes dangerous tags and attributes /* private */ function removeHTMLtags( $text ) { $fname = "Parser::removeHTMLtags"; wfProfileIn( $fname ); $htmlpairs = array( # Tags that must be closed - "b", "i", "u", "font", "big", "small", "sub", "sup", "h1", + "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1", "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s", "strike", "strong", "tt", "var", "div", "center", "blockquote", "ol", "ul", "dl", "table", "caption", "pre", @@ -1286,7 +1440,7 @@ class Parser $htmlsingle = array_merge( $tabletags, $htmlsingle ); $htmlelements = array_merge( $htmlsingle, $htmlpairs ); - $htmlattrs = $this->getHTMLattrs () ; + $htmlattrs = $this->getHTMLattrs () ; # Remove HTML comments $text = preg_replace( "//sU", "", $text ); @@ -1334,7 +1488,7 @@ class Parser } # Strip non-approved attributes from the tag $newparams = $this->fixTagAttributes($params); - + } if ( ! $badtag ) { $rest = str_replace( ">", ">", $rest ); @@ -1353,8 +1507,8 @@ class Parser return $text; } -/* - * +/* + * * This function accomplishes several tasks: * 1) Auto-number headings if that option is enabled * 2) Add an [edit] link to sections for logged in users who have enabled the option @@ -1363,7 +1517,7 @@ class Parser * * It loops through all headlines, collects the necessary data, then splits up the * string and re-inserts the newly formatted headlines. - * + * */ /* private */ function formatHeadings( $text ) @@ -1383,7 +1537,7 @@ class Parser if( $esw->matchAndRemove( $text ) ) { $showEditLink = 0; } - # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, + # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, # do not add TOC $mw =& MagicWord::get( MAG_NOTOC ); if( $mw->matchAndRemove( $text ) ) { @@ -1434,12 +1588,12 @@ class Parser $prevlevel = $level; } $level = $matches[1][$headlineCount]; - if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { + if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { # reset when we enter a new level $sublevelCount[$level] = 0; $toc .= $sk->tocIndent( $level - $prevlevel ); $toclevel += $level - $prevlevel; - } + } if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) { # reset when we step back a level $sublevelCount[$level+1]=0; @@ -1456,38 +1610,37 @@ class Parser $numbering .= "."; } $numbering .= $sublevelCount[$i]; - $dot = 1; + $dot = 1; } } } # The canonized header is a version of the header text safe to use for links # Avoid insertion of weird stuff like by expanding the relevant sections - $canonized_headline = Parser::unstrip( $headline, $this->mStripState ); - + $canonized_headline = $this->unstrip( $headline, $this->mStripState ); + # strip out HTML $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline ); - $tocline = trim( $canonized_headline ); - $canonized_headline = preg_replace( "/[^a-z0-9]/i", "_", trim( $canonized_headline ) ); + $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline)); $refer[$headlineCount] = $canonized_headline; - + # count how many in assoc. array so we can track dupes in anchors @$refers[$canonized_headline]++; $refcount[$headlineCount]=$refers[$canonized_headline]; # Prepend the number to the heading text - + if( $doNumberHeadings || $doShowToc ) { $tocline = $numbering . " " . $tocline; - + # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { # the two are different if the line contains a link $headline=$numbering . " " . $headline; } } - + # Create the anchor for linking from the TOC to the section $anchor = $canonized_headline; if($refcount[$headlineCount] > 1 ) { @@ -1502,17 +1655,17 @@ class Parser } $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1); } - + # Add the edit section span if( $rightClickHack ) { - $headline = $sk->editSectionScript($headlineCount+1,$headline); + $headline = $sk->editSectionScript($headlineCount+1,$headline); } # give headline the correct tag @$head[$headlineCount] .= ""; - + $headlineCount++; - } + } if( $doShowToc ) { $toclines = $headlineCount; @@ -1521,20 +1674,23 @@ class Parser } # split up and insert constructed headlines - + $blocks = preg_split( "/.*?<\/H[1-6]>/i", $text ); $i = 0; foreach( $blocks as $block ) { if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { - # This is the [edit] link that appears for the top block of text when + # This is the [edit] link that appears for the top block of text when # section editing is enabled - $full .= $sk->editSectionLink(0); + + # Disabled because it broke block formatting + # For example, a bullet point in the top line + # $full .= $sk->editSectionLink(0); } $full .= $block; if( $doShowToc && !$i) { - # Let's add a top anchor just in case we want to link to the top of the page - $full = "".$full.$toc; + # Top anchor now in skin + $full = $full.$toc; } if( !empty( $head[$i] ) ) { @@ -1542,7 +1698,7 @@ class Parser } $i++; } - + return $full; } @@ -1576,7 +1732,7 @@ class Parser } $num = str_replace( "-", "", $isbn ); $num = str_replace( " ", "", $num ); - + if ( "" == $num ) { $text = "ISBN $blank$x"; } else { @@ -1619,7 +1775,7 @@ class Parser $rfc .= $x{0}; $x = substr( $x, 1 ); } - + if ( "" == $rfc ) { $text .= "RFC $blank$x"; } else { @@ -1640,13 +1796,22 @@ class Parser $this->mOptions = $options; $this->mTitle =& $title; $this->mOutputType = OT_WIKI; - + if ( $clearState ) { $this->clearState(); } - + $stripState = false; - $text = str_replace("\r\n", "\n", $text); + $pairs = array( + "\r\n" => "\n", + ); + $text = str_replace(array_keys($pairs), array_values($pairs), $text); + // now with regexes + $pairs = array( + "//i" => '
', + "/
/i" => "
", + ); + $text = preg_replace(array_keys($pairs), array_values($pairs), $text); $text = $this->strip( $text, $stripState, false ); $text = $this->pstPass2( $text, $user ); $text = $this->unstrip( $text, $stripState ); @@ -1706,16 +1871,16 @@ class Parser } else { $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text ); } - + /* $mw =& MagicWord::get( MAG_SUBST ); $wgCurParser = $this->fork(); $text = $mw->substituteCallback( $text, "wfBraceSubstitution" ); $this->merge( $wgCurParser ); */ - + # Trim trailing whitespace - # MAG_END (__END__) tag allows for trailing + # MAG_END (__END__) tag allows for trailing # whitespace to be deliberately included $text = rtrim( $text ); $mw =& MagicWord::get( MAG_END ); @@ -1726,7 +1891,7 @@ class Parser # Set up some variables which are usually set up in parse() # so that an external function can call some class members with confidence - function startExternalParse( &$title, $options, $outputType, $clearState = true ) + function startExternalParse( &$title, $options, $outputType, $clearState = true ) { $this->mTitle =& $title; $this->mOptions = $options; @@ -1735,6 +1900,26 @@ class Parser $this->clearState(); } } + + function transformMsg( $text, $options ) { + global $wgTitle; + static $executing = false; + + # Guard against infinite recursion + if ( $executing ) { + return $text; + } + $executing = true; + + $this->mTitle = $wgTitle; + $this->mOptions = $options; + $this->mOutputType = OT_MSG; + $this->clearState(); + $text = $this->replaceVariables( $text ); + + $executing = false; + return $text; + } } class ParserOutput @@ -1806,19 +1991,20 @@ class ParserOptions function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); } - /* static */ function newFromUser( &$user ) + /* static */ function newFromUser( &$user ) { $popts = new ParserOptions; - $popts->initialiseFromUser( &$user ); + $popts->initialiseFromUser( $user ); return $popts; } - function initialiseFromUser( &$userInput ) + function initialiseFromUser( &$userInput ) { global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; - + if ( !$userInput ) { $user = new User; + $user->setLoaded( true ); } else { $user =& $userInput; }