* (bug 3202) Attributes now allowed on <pre> tags

author Brion Vibber <brion@users.mediawiki.org>

Tue, 6 Jun 2006 22:56:38 +0000 (22:56 +0000)

committer Brion Vibber <brion@users.mediawiki.org>

Tue, 6 Jun 2006 22:56:38 +0000 (22:56 +0000)
author Brion Vibber <brion@users.mediawiki.org>
Tue, 6 Jun 2006 22:56:38 +0000 (22:56 +0000)
committer Brion Vibber <brion@users.mediawiki.org>
Tue, 6 Jun 2006 22:56:38 +0000 (22:56 +0000)
diff --git a/RELEASE-NOTES b/RELEASE-NOTES

index 44b688c..838a4cb 100644 (file)
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -443,6 +443,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
  * (bug 6175) Improvement to German translation (de)
  * Redirect Special:Logs to Special:Log
  * (bug 6206) Linktrail for Swedish localization (se)
+* (bug 3202) Attributes now allowed on <pre> tags
+* Sanitizer::validateTagAttributes now available to discard illegal/unsafe
+  attribute values from an array.
  
  
  == Compatibility ==
diff --git a/includes/Parser.php b/includes/Parser.php

index bd58a3c..52df980 100644 (file)
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -132,6 +132,7 @@ class Parser
                 $this->mTagHooks = array();
                 $this->mFunctionHooks = array();
                 $this->clearState();
+               $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
         }
  
         /**
@@ -422,7 +423,7 @@ class Parser
                 $commentState = array();
                 
                 $elements = array_merge(
-                       array( 'nowiki', 'pre', 'gallery' ),
+                       array( 'nowiki', 'gallery' ),
                         array_keys( $this->mTagHooks ) );
                 global $wgRawHtml;
                 if( $wgRawHtml ) {
@@ -463,11 +464,6 @@ class Parser
                                 case 'math':
                                         $output = MathRenderer::renderMath( $content );
                                         break;
-                               case 'pre':
-                                       // Backwards-compatibility hack
-                                       $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $content );
-                                       $output = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
-                                       break;
                                 case 'gallery':
                                         $output = $this->renderImageGallery( $content );
                                         break;
@@ -4031,6 +4027,19 @@ class Parser
                 return $matches[0];
         }
  
+       /**
+        * Tag hook handler for 'pre'.
+        */
+       function renderPreTag( $text, $attribs, $parser ) {
+               // Backwards-compatibility hack
+               $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $text );
+               
+               $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
+               return wfOpenElement( 'pre', $attribs ) .
+                       wfEscapeHTMLTagsOnly( $content ) .
+                       '</pre>';
+       }
+       
         /**
          * Renders an image gallery from a text with one line per image.
          * text labels may be given by using |-style alternative text. E.g.
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php

index f017592..ca7831a 100644 (file)
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -538,53 +538,26 @@ class Sanitizer {
         }
  
         /**
-        * Take a tag soup fragment listing an HTML element's attributes
-        * and normalize it to well-formed XML, discarding unwanted attributes.
+        * Take an array of attribute names and values and normalize or discard
+        * illegal values for the given element type.
          *
-        * - Normalizes attribute names to lowercase
          * - Discards attributes not on a whitelist for the given element
-        * - Turns broken or invalid entities into plaintext
-        * - Double-quotes all attribute values
-        * - Attributes without values are given the name as attribute
-        * - Double attributes are discarded
          * - Unsafe style attributes are discarded
-        * - Prepends space if there are attributes.
          *
-        * @param string $text
+        * @param array $attribs
          * @param string $element
-        * @return string
+        * @return array
          *
          * @todo Check for legal values where the DTD limits things.
          * @todo Check for unique id attribute :P
          */
-       function fixTagAttributes( $text, $element ) {
-               if( trim( $text ) == '' ) {
-                       return '';
-               }
-
-               # Unquoted attribute
-               # Since we quote this later, this can be anything distinguishable
-               # from the end of the attribute
-               $pairs = array();
-               if( !preg_match_all(
-                       MW_ATTRIBS_REGEX,
-                       $text,
-                       $pairs,
-                       PREG_SET_ORDER ) ) {
-                       return '';
-               }
-
+       function validateTagAttributes( $attribs, $element ) {
                 $whitelist = array_flip( Sanitizer::attributeWhitelist( $element ) );
-               $attribs = array();
-               foreach( $pairs as $set ) {
-                       $attribute = strtolower( $set[1] );
+               $out = array();
+               foreach( $attribs as $attribute => $value ) {
                         if( !isset( $whitelist[$attribute] ) ) {
                                 continue;
                         }
-
-                       $raw   = Sanitizer::getTagAttributeCallback( $set );
-                       $value = Sanitizer::normalizeAttributeValue( $raw );
-
                         # Strip javascript "expression" from stylesheets.
                         # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
                         if( $attribute == 'style' ) {
@@ -592,7 +565,7 @@ class Sanitizer {
  
                                 // Remove any comments; IE gets token splitting wrong
                                 $stripped = preg_replace( '!/\\*.*?\\*/!S', ' ', $stripped );
-                               $value = htmlspecialchars( $stripped );
+                               $value = $stripped;
  
                                 // ... and continue checks
                                 $stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
@@ -608,9 +581,48 @@ class Sanitizer {
                         if ( $attribute === 'id' )
                                 $value = Sanitizer::escapeId( $value );
  
+                       // If this attribute was previously set, override it.
+                       // Output should only have one attribute of each name.
+                       $out[$attribute] = $value;
+               }
+               return $out;
+       }
+
+       /**
+        * Take a tag soup fragment listing an HTML element's attributes
+        * and normalize it to well-formed XML, discarding unwanted attributes.
+        * Output is safe for further wikitext processing, with escaping of
+        * values that could trigger problems.
+        *
+        * - Normalizes attribute names to lowercase
+        * - Discards attributes not on a whitelist for the given element
+        * - Turns broken or invalid entities into plaintext
+        * - Double-quotes all attribute values
+        * - Attributes without values are given the name as attribute
+        * - Double attributes are discarded
+        * - Unsafe style attributes are discarded
+        * - Prepends space if there are attributes.
+        *
+        * @param string $text
+        * @param string $element
+        * @return string
+        */
+       function fixTagAttributes( $text, $element ) {
+               if( trim( $text ) == '' ) {
+                       return '';
+               }
+               
+               $stripped = Sanitizer::validateTagAttributes(
+                       Sanitizer::decodeTagAttributes( $text ), $element );
+               
+               $attribs = array();
+               foreach( $stripped as $attribute => $value ) {
+                       $encAttribute = htmlspecialchars( $attribute );
+                       
+                       $encValue = htmlspecialchars( $value );
                         # Templates and links may be expanded in later parsing,
                         # creating invalid or dangerous output. Suppress this.
-                       $value = strtr( $value, array(
+                       $encValue = strtr( $encValue, array(
                                 '<'    => '&lt;',   // This should never happen,
                                 '>'    => '&gt;',   // we've received invalid input
                                 '"'    => '&quot;', // which should have been escaped.
@@ -625,16 +637,13 @@ class Sanitizer {
                         ) );
  
                         # Stupid hack
-                       $value = preg_replace_callback(
+                       $encValue = preg_replace_callback(
                                 '/(' . wfUrlProtocols() . ')/',
                                 array( 'Sanitizer', 'armorLinksCallback' ),
-                               $value );
-
-                       // If this attribute was previously set, override it.
-                       // Output should only have one attribute of each name.
-                       $attribs[$attribute] = "$attribute=\"$value\"";
+                               $encValue );
+                       
+                       $attribs[] = "$encAttribute=\"$encValue\"";
                 }
-
                 return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
         }
author	Brion Vibber <brion@users.mediawiki.org>
	Tue, 6 Jun 2006 22:56:38 +0000 (22:56 +0000)
committer	Brion Vibber <brion@users.mediawiki.org>
	Tue, 6 Jun 2006 22:56:38 +0000 (22:56 +0000)
RELEASE-NOTES		patch \| blob \| history
includes/Parser.php		patch \| blob \| history
includes/Sanitizer.php		patch \| blob \| history