From 9e56a35f7cd1be77ce5b91946b06c4192ecae253 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 6 Jun 2006 22:56:38 +0000 Subject: [PATCH] * (bug 3202) Attributes now allowed on
 tags *
 Sanitizer::validateTagAttributes now available to discard illegal/unsafe  
 attribute values from an array.

---
 RELEASE-NOTES          |  3 ++
 includes/Parser.php    | 21 +++++++---
 includes/Sanitizer.php | 95 +++++++++++++++++++++++-------------------
 3 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 44b688c3d7..838a4cbd9b 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -443,6 +443,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * (bug 6175) Improvement to German translation (de)
 * Redirect Special:Logs to Special:Log
 * (bug 6206) Linktrail for Swedish localization (se)
+* (bug 3202) Attributes now allowed on 
 tags
+* Sanitizer::validateTagAttributes now available to discard illegal/unsafe
+  attribute values from an array.
 
 
 == Compatibility ==
diff --git a/includes/Parser.php b/includes/Parser.php
index bd58a3cd65..52df980a6c 100644
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -132,6 +132,7 @@ class Parser
 		$this->mTagHooks = array();
 		$this->mFunctionHooks = array();
 		$this->clearState();
+		$this->setHook( 'pre', array( $this, 'renderPreTag' ) );
 	}
 
 	/**
@@ -422,7 +423,7 @@ class Parser
 		$commentState = array();
 		
 		$elements = array_merge(
-			array( 'nowiki', 'pre', 'gallery' ),
+			array( 'nowiki', 'gallery' ),
 			array_keys( $this->mTagHooks ) );
 		global $wgRawHtml;
 		if( $wgRawHtml ) {
@@ -463,11 +464,6 @@ class Parser
 				case 'math':
 					$output = MathRenderer::renderMath( $content );
 					break;
-				case 'pre':
-					// Backwards-compatibility hack
-					$content = preg_replace( '!(.*?)!is', '\\1', $content );
-					$output = '
' . wfEscapeHTMLTagsOnly( $content ) . '
'; - break; case 'gallery': $output = $this->renderImageGallery( $content ); break; @@ -4031,6 +4027,19 @@ class Parser return $matches[0]; } + /** + * Tag hook handler for 'pre'. + */ + function renderPreTag( $text, $attribs, $parser ) { + // Backwards-compatibility hack + $content = preg_replace( '!(.*?)!is', '\\1', $text ); + + $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' ); + return wfOpenElement( 'pre', $attribs ) . + wfEscapeHTMLTagsOnly( $content ) . + '
'; + } + /** * Renders an image gallery from a text with one line per image. * text labels may be given by using |-style alternative text. E.g. diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index f01759217c..ca7831acd2 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -538,53 +538,26 @@ class Sanitizer { } /** - * Take a tag soup fragment listing an HTML element's attributes - * and normalize it to well-formed XML, discarding unwanted attributes. + * Take an array of attribute names and values and normalize or discard + * illegal values for the given element type. * - * - Normalizes attribute names to lowercase * - Discards attributes not on a whitelist for the given element - * - Turns broken or invalid entities into plaintext - * - Double-quotes all attribute values - * - Attributes without values are given the name as attribute - * - Double attributes are discarded * - Unsafe style attributes are discarded - * - Prepends space if there are attributes. * - * @param string $text + * @param array $attribs * @param string $element - * @return string + * @return array * * @todo Check for legal values where the DTD limits things. * @todo Check for unique id attribute :P */ - function fixTagAttributes( $text, $element ) { - if( trim( $text ) == '' ) { - return ''; - } - - # Unquoted attribute - # Since we quote this later, this can be anything distinguishable - # from the end of the attribute - $pairs = array(); - if( !preg_match_all( - MW_ATTRIBS_REGEX, - $text, - $pairs, - PREG_SET_ORDER ) ) { - return ''; - } - + function validateTagAttributes( $attribs, $element ) { $whitelist = array_flip( Sanitizer::attributeWhitelist( $element ) ); - $attribs = array(); - foreach( $pairs as $set ) { - $attribute = strtolower( $set[1] ); + $out = array(); + foreach( $attribs as $attribute => $value ) { if( !isset( $whitelist[$attribute] ) ) { continue; } - - $raw = Sanitizer::getTagAttributeCallback( $set ); - $value = Sanitizer::normalizeAttributeValue( $raw ); - # Strip javascript "expression" from stylesheets. # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp if( $attribute == 'style' ) { @@ -592,7 +565,7 @@ class Sanitizer { // Remove any comments; IE gets token splitting wrong $stripped = preg_replace( '!/\\*.*?\\*/!S', ' ', $stripped ); - $value = htmlspecialchars( $stripped ); + $value = $stripped; // ... and continue checks $stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e', @@ -608,9 +581,48 @@ class Sanitizer { if ( $attribute === 'id' ) $value = Sanitizer::escapeId( $value ); + // If this attribute was previously set, override it. + // Output should only have one attribute of each name. + $out[$attribute] = $value; + } + return $out; + } + + /** + * Take a tag soup fragment listing an HTML element's attributes + * and normalize it to well-formed XML, discarding unwanted attributes. + * Output is safe for further wikitext processing, with escaping of + * values that could trigger problems. + * + * - Normalizes attribute names to lowercase + * - Discards attributes not on a whitelist for the given element + * - Turns broken or invalid entities into plaintext + * - Double-quotes all attribute values + * - Attributes without values are given the name as attribute + * - Double attributes are discarded + * - Unsafe style attributes are discarded + * - Prepends space if there are attributes. + * + * @param string $text + * @param string $element + * @return string + */ + function fixTagAttributes( $text, $element ) { + if( trim( $text ) == '' ) { + return ''; + } + + $stripped = Sanitizer::validateTagAttributes( + Sanitizer::decodeTagAttributes( $text ), $element ); + + $attribs = array(); + foreach( $stripped as $attribute => $value ) { + $encAttribute = htmlspecialchars( $attribute ); + + $encValue = htmlspecialchars( $value ); # Templates and links may be expanded in later parsing, # creating invalid or dangerous output. Suppress this. - $value = strtr( $value, array( + $encValue = strtr( $encValue, array( '<' => '<', // This should never happen, '>' => '>', // we've received invalid input '"' => '"', // which should have been escaped. @@ -625,16 +637,13 @@ class Sanitizer { ) ); # Stupid hack - $value = preg_replace_callback( + $encValue = preg_replace_callback( '/(' . wfUrlProtocols() . ')/', array( 'Sanitizer', 'armorLinksCallback' ), - $value ); - - // If this attribute was previously set, override it. - // Output should only have one attribute of each name. - $attribs[$attribute] = "$attribute=\"$value\""; + $encValue ); + + $attribs[] = "$encAttribute=\"$encValue\""; } - return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; } -- 2.20.1