Escape '<' in attribute values for well-formed XML
authorAryeh Gregor <simetrical@users.mediawiki.org>
Thu, 1 Oct 2009 01:30:58 +0000 (01:30 +0000)
committerAryeh Gregor <simetrical@users.mediawiki.org>
Thu, 1 Oct 2009 01:30:58 +0000 (01:30 +0000)
This fixes r56407, which fixed bug 20655.  Now $wgWellFormedXml is used,
not $wgHtml5.  The previous code was outputting malformed XML if
$wgHtml5 and $wgWellFormedXml were both true.

I wish we had unit tests for this.  :(

includes/Html.php

index 1998283..9573269 100644 (file)
@@ -353,17 +353,23 @@ class Html {
                                # and we don't need <> escaped here, we may as well not call
                                # htmlspecialchars().  FIXME: verify that we actually need to
                                # escape \n\r\t here, and explain why, exactly.
-                               if ( $wgHtml5 ) {
-                                       $ret .= " $key=$quote" . strtr( $value, array(
-                                               '&' => '&amp;',
-                                               '"' => '&quot;',
-                                               "\n" => '&#10;',
-                                               "\r" => '&#13;',
-                                               "\t" => '&#9;'
-                                       ) ) . $quote;
-                               } else {
-                                       $ret .= " $key=$quote" . Sanitizer::encodeAttribute( $value ) . $quote;
+                               #
+                               # We could call Sanitizer::encodeAttribute() for this, but we
+                               # don't because we're stubborn and like our marginal savings on
+                               # byte size from not having to encode unnecessary quotes.
+                               $map = array(
+                                       '&' => '&amp;',
+                                       '"' => '&quot;',
+                                       "\n" => '&#10;',
+                                       "\r" => '&#13;',
+                                       "\t" => '&#9;'
+                               );
+                               if ( $wgWellFormedXml ) {
+                                       # '<' must be escaped in attributes for XML for some
+                                       # reason, per spec: http://www.w3.org/TR/xml/#NT-AttValue
+                                       $map['<'] = '&lt;';
                                }
+                               $ret .= " $key=$quote" . strtr( $value, $map ) . $quote;
                        }
                }
                return $ret;