- *
- * @access private
- * @static
- */
- function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) {
- $dummy_tags = array();
- $dummy_params = array();
-
- return Parser::extractTagsAndParams( $tag, $text, $content,
- $dummy_tags, $dummy_params, $uniq_prefix );
- }
-
/**
* Strips and renders nowiki, pre, math, hiero
* If $render is set, performs necessary rendering operations on plugins
@@ -402,190 +411,144 @@ class Parser
* will be stripped in addition to other tags. This is important
* for section editing, where these comments cause confusion when
* counting the sections in the wikisource
+ *
+ * @param array dontstrip contains tags which should not be stripped;
+ * used to prevent stipping of when saving (fixes bug 2700)
*
- * @access private
+ * @private
*/
- function strip( $text, &$state, $stripcomments = false ) {
+ function strip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
$render = ($this->mOutputType == OT_HTML);
- $html_content = array();
- $nowiki_content = array();
- $math_content = array();
- $pre_content = array();
- $comment_content = array();
- $ext_content = array();
- $ext_tags = array();
- $ext_params = array();
- $gallery_content = array();
# Replace any instances of the placeholders
$uniq_prefix = $this->mUniqPrefix;
#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
-
- # html
+ $commentState = array();
+
+ $elements = array_merge(
+ array( 'nowiki', 'gallery' ),
+ array_keys( $this->mTagHooks ) );
global $wgRawHtml;
if( $wgRawHtml ) {
- $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
- foreach( $html_content as $marker => $content ) {
- if ($render ) {
- # Raw and unchecked for validity.
- $html_content[$marker] = $content;
- } else {
- $html_content[$marker] = ''.$content.'';
- }
- }
+ $elements[] = 'html';
}
-
- # nowiki
- $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
- foreach( $nowiki_content as $marker => $content ) {
- if( $render ){
- $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
- } else {
- $nowiki_content[$marker] = ''.$content.'';
- }
- }
-
- # math
if( $this->mOptions->getUseTeX() ) {
- $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
- foreach( $math_content as $marker => $content ){
- if( $render ) {
- $math_content[$marker] = renderMath( $content );
- } else {
- $math_content[$marker] = '';
- }
- }
- }
-
- # pre
- $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
- foreach( $pre_content as $marker => $content ){
- if( $render ){
- $pre_content[$marker] = '' . wfEscapeHTMLTagsOnly( $content ) . '
';
- } else {
- $pre_content[$marker] = ''.$content.'
';
- }
- }
-
- # gallery
- $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix);
- foreach( $gallery_content as $marker => $content ) {
- require_once( 'ImageGallery.php' );
- if ( $render ) {
- $gallery_content[$marker] = $this->renderImageGallery( $content );
- } else {
- $gallery_content[$marker] = ''.$content.'';
- }
+ $elements[] = 'math';
}
-
- # Comments
- $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
- foreach( $comment_content as $marker => $content ){
- $comment_content[$marker] = '';
+
+ # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
+ foreach ( $elements AS $k => $v ) {
+ if ( !in_array ( $v , $dontstrip ) ) continue;
+ unset ( $elements[$k] );
}
-
- # Extensions
- foreach ( $this->mTagHooks as $tag => $callback ) {
- $ext_content[$tag] = array();
- $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag],
- $ext_tags[$tag], $ext_params[$tag], $uniq_prefix );
- foreach( $ext_content[$tag] as $marker => $content ) {
- $full_tag = $ext_tags[$tag][$marker];
- $params = $ext_params[$tag][$marker];
- if ( $render )
- $ext_content[$tag][$marker] = call_user_func_array( $callback, array( $content, $params, &$this ) );
- else {
- if ( is_null( $content ) ) {
- // Empty element tag
- $ext_content[$tag][$marker] = $full_tag;
+
+ $matches = array();
+ $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
+
+ foreach( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ if( $render ) {
+ $tagName = strtolower( $element );
+ switch( $tagName ) {
+ case '!--':
+ // Comment
+ if( substr( $tag, -3 ) == '-->' ) {
+ $output = $tag;
} else {
- $ext_content[$tag][$marker] = "$full_tag$content$tag>";
+ // Unclosed comment in input.
+ // Close it so later stripping can remove it
+ $output = "$tag-->";
+ }
+ break;
+ case 'html':
+ if( $wgRawHtml ) {
+ $output = $content;
+ break;
+ }
+ // Shouldn't happen otherwise. :)
+ case 'nowiki':
+ $output = wfEscapeHTMLTagsOnly( $content );
+ break;
+ case 'math':
+ $output = MathRenderer::renderMath( $content );
+ break;
+ case 'gallery':
+ $output = $this->renderImageGallery( $content, $params );
+ break;
+ default:
+ if( isset( $this->mTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTagHooks[$tagName],
+ array( $content, $params, $this ) );
+ } else {
+ throw new MWException( "Invalid call hook $element" );
}
}
+ } else {
+ // Just stripping tags; keep the source
+ $output = $tag;
+ }
+ if( !$stripcomments && $element == '!--' ) {
+ $commentState[$marker] = $output;
+ } else {
+ $state[$element][$marker] = $output;
}
}
-
+
# Unstrip comments unless explicitly told otherwise.
# (The comments are always stripped prior to this point, so as to
# not invoke any extension tags / parser hooks contained within
# a comment.)
if ( !$stripcomments ) {
- $tempstate = array( 'comment' => $comment_content );
- $text = $this->unstrip( $text, $tempstate );
- $comment_content = array();
+ // Put them all back and forget them
+ $text = strtr( $text, $commentState );
}
- # Merge state with the pre-existing state, if there is one
- if ( $state ) {
- $state['html'] = $state['html'] + $html_content;
- $state['nowiki'] = $state['nowiki'] + $nowiki_content;
- $state['math'] = $state['math'] + $math_content;
- $state['pre'] = $state['pre'] + $pre_content;
- $state['gallery'] = $state['gallery'] + $gallery_content;
- $state['comment'] = $state['comment'] + $comment_content;
-
- foreach( $ext_content as $tag => $array ) {
- if ( array_key_exists( $tag, $state ) ) {
- $state[$tag] = $state[$tag] + $array;
- }
- }
- } else {
- $state = array(
- 'html' => $html_content,
- 'nowiki' => $nowiki_content,
- 'math' => $math_content,
- 'pre' => $pre_content,
- 'gallery' => $gallery_content,
- 'comment' => $comment_content,
- ) + $ext_content;
- }
return $text;
}
/**
- * restores pre, math, and hiero removed by strip()
+ * Restores pre, math, and other extensions removed by strip()
*
* always call unstripNoWiki() after this one
- * @access private
+ * @private
*/
function unstrip( $text, &$state ) {
if ( !is_array( $state ) ) {
return $text;
}
- # Must expand in reverse order, otherwise nested tags will be corrupted
- foreach( array_reverse( $state, true ) as $tag => $contentDict ) {
+ $replacements = array();
+ foreach( $state as $tag => $contentDict ) {
if( $tag != 'nowiki' && $tag != 'html' ) {
- foreach( array_reverse( $contentDict, true ) as $uniq => $content ) {
- $text = str_replace( $uniq, $content, $text );
+ foreach( $contentDict as $uniq => $content ) {
+ $replacements[$uniq] = $content;
}
}
}
+ $text = strtr( $text, $replacements );
return $text;
}
/**
- * always call this after unstrip() to preserve the order
+ * Always call this after unstrip() to preserve the order
*
- * @access private
+ * @private
*/
function unstripNoWiki( $text, &$state ) {
if ( !is_array( $state ) ) {
return $text;
}
- # Must expand in reverse order, otherwise nested tags will be corrupted
- for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
- $text = str_replace( key( $state['nowiki'] ), $content, $text );
- }
-
- global $wgRawHtml;
- if ($wgRawHtml) {
- for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
- $text = str_replace( key( $state['html'] ), $content, $text );
+ $replacements = array();
+ foreach( $state as $tag => $contentDict ) {
+ if( $tag == 'nowiki' || $tag == 'html' ) {
+ foreach( $contentDict as $uniq => $content ) {
+ $replacements[$uniq] = $content;
+ }
}
}
+ $text = strtr( $text, $replacements );
return $text;
}
@@ -595,19 +558,12 @@ class Parser
* Returns the unique tag which must be inserted into the stripped text
* The tag will be replaced with the original text in unstrip()
*
- * @access private
+ * @private
*/
function insertStripItem( $text, &$state ) {
$rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString();
if ( !$state ) {
- $state = array(
- 'html' => array(),
- 'nowiki' => array(),
- 'math' => array(),
- 'pre' => array(),
- 'comment' => array(),
- 'gallery' => array(),
- );
+ $state = array();
}
$state['item'][$rnd] = $text;
return $rnd;
@@ -624,7 +580,7 @@ class Parser
*
* @param string $text Hideous HTML input
* @return string Corrected HTML output
- * @access public
+ * @public
* @static
*/
function tidy( $text ) {
@@ -647,7 +603,7 @@ class Parser
/**
* Spawn an external HTML tidy process and get corrected markup back from it.
*
- * @access private
+ * @private
* @static
*/
function externalTidy( $text ) {
@@ -698,7 +654,7 @@ class Parser
*
* 'pear install tidy' should be able to compile the extension module.
*
- * @access private
+ * @private
* @static
*/
function internalTidy( $text ) {
@@ -724,7 +680,7 @@ class Parser
/**
* parse the wiki syntax used to render tables
*
- * @access private
+ * @private
*/
function doTableStuff ( $t ) {
$fname = 'Parser::doTableStuff';
@@ -861,7 +817,7 @@ class Parser
* Helper function for parse() that transforms wiki markup into
* HTML. Only called for $mOutputType == OT_HTML.
*
- * @access private
+ * @private
*/
function internalParse( $text ) {
$args = array();
@@ -873,12 +829,21 @@ class Parser
$text = strtr( $text, array( '' => '' , '' => '' ) );
$text = strtr( $text, array( '' => '', '' => '') );
$text = preg_replace( '/.*?<\/includeonly>/s', '', $text );
-
+
$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) );
+
$text = $this->replaceVariables( $text, $args );
+ // Tables need to come after variable replacement for things to work
+ // properly; putting them before other transformations should keep
+ // exciting things like link expansions from showing up in surprising
+ // places.
+ $text = $this->doTableStuff( $text );
+
$text = preg_replace( '/(^|\n)-----*/', '\\1
', $text );
+ $text = $this->stripToc( $text );
+ $this->stripNoGallery( $text );
$text = $this->doHeadings( $text );
if($this->mOptions->getUseDynamicDates()) {
$df =& DateFormatter::getInstance();
@@ -893,7 +858,6 @@ class Parser
$text = str_replace($this->mUniqPrefix."NOPARSE", "", $text);
$text = $this->doMagicLinks( $text );
- $text = $this->doTableStuff( $text );
$text = $this->formatHeadings( $text, $isMain );
wfProfileOut( $fname );
@@ -904,7 +868,7 @@ class Parser
* Replace special strings like "ISBN xxx" and "RFC xxx" with
* magic external links.
*
- * @access private
+ * @private
*/
function &doMagicLinks( &$text ) {
$text = $this->magicISBN( $text );
@@ -916,14 +880,14 @@ class Parser
/**
* Parse headers and return html
*
- * @access private
+ * @private
*/
function doHeadings( $text ) {
$fname = 'Parser::doHeadings';
wfProfileIn( $fname );
for ( $i = 6; $i >= 1; --$i ) {
$h = str_repeat( '=', $i );
- $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
+ $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
"\\1\\2", $text );
}
wfProfileOut( $fname );
@@ -932,7 +896,7 @@ class Parser
/**
* Replace single quotes with HTML markup
- * @access private
+ * @private
* @return string the altered text
*/
function doAllQuotes( $text ) {
@@ -950,7 +914,7 @@ class Parser
/**
* Helper function for doAllQuotes()
- * @access private
+ * @private
*/
function doQuotes( $text ) {
$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
@@ -1119,7 +1083,7 @@ class Parser
* Note: this is all very hackish and the order of execution matters a lot.
* Make sure to run maintenance/parserTests.php if you change this code.
*
- * @access private
+ * @private
*/
function replaceExternalLinks( $text ) {
global $wgContLang;
@@ -1161,8 +1125,8 @@ class Parser
# No link text, e.g. [http://domain.tld/some.link]
if ( $text == '' ) {
- # Autonumber if allowed
- if ( strpos( HTTP_PROTOCOLS, str_replace('/','\/', $protocol) ) !== false ) {
+ # Autonumber if allowed. See bug #5918
+ if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) {
$text = '[' . ++$this->mAutonumber . ']';
$linktype = 'autonumber';
} else {
@@ -1178,9 +1142,12 @@ class Parser
$text = $wgContLang->markNoConversion($text);
- # Replace & from obsolete syntax with &.
- # All HTML entities will be escaped by makeExternalLink()
- $url = str_replace( '&', '&', $url );
+ # Normalize any HTML entities in input. They will be
+ # re-escaped by makeExternalLink().
+ $url = Sanitizer::decodeCharReferences( $url );
+
+ # Escape any control characters introduced by the above step
+ $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url );
# Process the trail (i.e. everything after this link up until start of the next link),
# replacing any non-bracketed links
@@ -1190,7 +1157,7 @@ class Parser
# This means that users can paste URLs directly into the text
# Funny characters like ö aren't valid in URLs anyway
# This was changed in August 2004
- $s .= $sk->makeExternalLink( $url, $text, false, $linktype ) . $dtrail . $trail;
+ $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail;
# Register link in the output object.
# Replace unnecessary URL escape codes with the referenced character
@@ -1205,7 +1172,7 @@ class Parser
/**
* Replace anything that looks like a URL with a link
- * @access private
+ * @private
*/
function replaceFreeExternalLinks( $text ) {
global $wgContLang;
@@ -1235,7 +1202,7 @@ class Parser
preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m ))
{
# add protocol, arg
- $url .= $bits[$i] . $bits[$i + 1]; # protocol, url as arg to previous link
+ $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
$i += 2;
$trail = $m[2];
}
@@ -1261,16 +1228,18 @@ class Parser
$url = substr( $url, 0, -$numSepChars );
}
- # Replace & from obsolete syntax with &.
- # All HTML entities will be escaped by makeExternalLink()
- # or maybeMakeExternalImage()
- $url = str_replace( '&', '&', $url );
+ # Normalize any HTML entities in input. They will be
+ # re-escaped by makeExternalLink() or maybeMakeExternalImage()
+ $url = Sanitizer::decodeCharReferences( $url );
+
+ # Escape any control characters introduced by the above step
+ $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url );
# Is this an external image?
$text = $this->maybeMakeExternalImage( $url );
if ( $text === false ) {
# Not an image, make a link
- $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free' );
+ $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
$pasteurized = Parser::replaceUnusualEscapes( $url );
@@ -1304,7 +1273,7 @@ class Parser
* Callback function used in replaceUnusualEscapes().
* Replaces unusual URL escape codes with their equivalent character
* @static
- * @access private
+ * @private
*/
function replaceUnusualEscapesCallback( $matches ) {
$char = urldecode( $matches[0] );
@@ -1322,7 +1291,7 @@ class Parser
/**
* make an image if it's allowed, either through the global
* option or through the exception
- * @access private
+ * @private
*/
function maybeMakeExternalImage( $url ) {
$sk =& $this->mOptions->getSkin();
@@ -1342,7 +1311,7 @@ class Parser
/**
* Process [[ ]] wikilinks
*
- * @access private
+ * @private
*/
function replaceInternalLinks( $s ) {
global $wgContLang;
@@ -1376,7 +1345,7 @@ class Parser
$useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
if( is_null( $this->mTitle ) ) {
- wfDebugDieBacktrace( 'nooo' );
+ throw new MWException( 'nooo' );
}
$nottalk = !$this->mTitle->isTalkPage();
@@ -1573,6 +1542,7 @@ class Parser
$sortkey = $text;
}
$sortkey = Sanitizer::decodeCharReferences( $sortkey );
+ $sortkey = str_replace( "\n", '', $sortkey );
$sortkey = $wgContLang->convertCategoryKey( $sortkey );
$this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
@@ -1705,7 +1675,7 @@ class Parser
* @param string $target the source of the link
* @param string &$text the link text, modified as necessary
* @return string the full name of the link
- * @access private
+ * @private
*/
function maybeDoSubpageLink($target, &$text) {
# Valid link forms:
@@ -1771,7 +1741,7 @@ class Parser
/**#@+
* Used by doBlockLevels()
- * @access private
+ * @private
*/
/* private */ function closeParagraph() {
$result = '';
@@ -1848,7 +1818,7 @@ class Parser
/**
* Make lists from lines starting with ':', '*', '#', etc.
*
- * @access private
+ * @private
* @return string the lists rendered as HTML
*/
function doBlockLevels( $text, $linestart ) {
@@ -1937,12 +1907,13 @@ class Parser
wfProfileIn( "$fname-paragraph" );
# No prefix (not in list)--go to paragraph mode
// XXX: use a stack for nestable elements like span, table and div
- $openmatch = preg_match('/(