/**
* This class is a collection of static functions that serve two purposes:
*
- * 1) Implement any algorithms specified by HTML 5, or other HTML
+ * 1) Implement any algorithms specified by HTML5, or other HTML
* specifications, in a convenient and self-contained way.
*
* 2) Allow HTML elements to be conveniently and safely generated, like the
* <a> elements.
*/
class Html {
- # List of void elements from HTML 5, section 9.1.2 as of 2009-08-10
+ # List of void elements from HTML5, section 9.1.2 as of 2009-08-10
private static $voidElements = array(
'area',
'base',
);
# Boolean attributes, which may have the value omitted entirely. Manually
- # collected from the HTML 5 spec as of 2009-08-10.
+ # collected from the HTML5 spec as of 2009-08-10.
private static $boolAttribs = array(
'async',
'autobuffer',
*
* @param $element string The element's name, e.g., 'a'
* @param $attribs array Associative array of attributes, e.g., array(
- * 'href' => 'http://www.mediawiki.org/' ). Values will be HTML-escaped.
- * A value of false means to omit the attribute.
+ * 'href' => 'http://www.mediawiki.org/' ). See expandAttributes() for
+ * further documentation.
* @param $contents string The raw HTML contents of the element: *not*
* escaped!
* @return string Raw HTML
*/
public static function rawElement( $element, $attribs = array(), $contents = '' ) {
+ global $wgWellFormedXml;
+ $start = self::openElement( $element, $attribs );
+ if ( in_array( $element, self::$voidElements ) ) {
+ if ( $wgWellFormedXml ) {
+ # Silly XML.
+ return substr( $start, 0, -1 ) . ' />';
+ }
+ return $start;
+ } else {
+ return "$start$contents</$element>";
+ }
+ }
+
+ /**
+ * Identical to rawElement(), but HTML-escapes $contents (like
+ * Xml::element()).
+ */
+ public static function element( $element, $attribs = array(), $contents = '' ) {
+ return self::rawElement( $element, $attribs, strtr( $contents, array(
+ # There's no point in escaping quotes, >, etc. in the contents of
+ # elements.
+ '&' => '&',
+ '<' => '<'
+ ) ) );
+ }
+
+ /**
+ * Identical to rawElement(), but has no third parameter and omits the end
+ * tag (and the self-closing / in XML mode for empty elements).
+ */
+ public static function openElement( $element, $attribs = array() ) {
global $wgHtml5, $wgWellFormedXml;
- # This is not required in HTML 5, but let's do it anyway, for
+ $attribs = (array)$attribs;
+ # This is not required in HTML5, but let's do it anyway, for
# consistency and better compression.
$element = strtolower( $element );
- # Element-specific hacks to slim down output and ensure validity
- if ( $element == 'input' ) {
- if ( !$wgHtml5 ) {
- # With $wgHtml5 off we want to validate as XHTML 1, so we
- # strip out any fancy HTML 5-only input types for now.
- #
- # Whitelist of valid types:
+ # Remove HTML5-only attributes if we aren't doing HTML5
+ if ( !$wgHtml5 ) {
+ if ( $element == 'input' ) {
+ # Whitelist of valid XHTML1 types
$validTypes = array(
'hidden',
'text',
# Fall back to type=text, the default
unset( $attribs['type'] );
}
- # Here we're blacklisting some HTML5-only attributes...
- $html5attribs = array(
- 'autocomplete',
- 'autofocus',
- 'max',
- 'min',
- 'multiple',
- 'pattern',
- 'placeholder',
- 'required',
- 'step',
- );
- foreach ( $html5attribs as $badAttr ) {
- unset( $attribs[$badAttr] );
- }
}
- }
-
- $start = "<$element" . self::expandAttributes(
- self::dropDefaults( $element, $attribs ) );
- if ( in_array( $element, self::$voidElements ) ) {
- if ( $wgWellFormedXml ) {
- return "$start />";
+ if ( $element == 'textarea' && isset( $attribs['maxlength'] ) ) {
+ unset( $attribs['maxlength'] );
+ }
+ # Here we're blacklisting some HTML5-only attributes...
+ $html5attribs = array(
+ 'autocomplete',
+ 'autofocus',
+ 'max',
+ 'min',
+ 'multiple',
+ 'pattern',
+ 'placeholder',
+ 'required',
+ 'step',
+ 'spellcheck',
+ );
+ foreach ( $html5attribs as $badAttr ) {
+ unset( $attribs[$badAttr] );
}
- return "$start>";
- } else {
- return "$start>$contents</$element>";
}
- }
- /**
- * Identical to rawElement(), but HTML-escapes $contents (like
- * Xml::element()).
- */
- public static function element( $element, $attribs = array(), $contents = '' ) {
- return self::rawElement( $element, $attribs, strtr( $contents, array(
- # There's no point in escaping quotes, >, etc. in the contents of
- # elements.
- '&' => '&',
- '<' => '<'
- ) ) );
+ return "<$element" . self::expandAttributes(
+ self::dropDefaults( $element, $attribs ) ) . '>';
}
/**
*
* @param $element string Name of the element, e.g., 'a'
* @param $attribs array Associative array of attributes, e.g., array(
- * 'href' => 'http://www.mediawiki.org/' ).
+ * 'href' => 'http://www.mediawiki.org/' ). See expandAttributes() for
+ * further documentation.
* @return array An array of attributes functionally identical to $attribs
*/
private static function dropDefaults( $element, $attribs ) {
'link' => array( 'media' => 'all' ),
'menu' => array( 'type' => 'list' ),
# Note: the use of text/javascript here instead of other JavaScript
- # MIME types follows the HTML 5 spec.
+ # MIME types follows the HTML5 spec.
'script' => array( 'type' => 'text/javascript' ),
'style' => array(
'media' => 'all',
*
* @param $attribs array Associative array of attributes, e.g., array(
* 'href' => 'http://www.mediawiki.org/' ). Values will be HTML-escaped.
- * A value of false means to omit the attribute.
+ * A value of false means to omit the attribute. For boolean attributes,
+ * you can omit the key, e.g., array( 'checked' ) instead of
+ * array( 'checked' => 'checked' ) or such.
* @return string HTML fragment that goes between element name and '>'
* (starting with a space if at least one attribute is output)
*/
$key = $value;
}
- # Not technically required in HTML 5, but required in XHTML 1.0,
+ # Not technically required in HTML5, but required in XHTML 1.0,
# and we'd like consistency and better compression anyway.
$key = strtolower( $key );
- # See the "Attributes" section in the HTML syntax part of HTML 5,
+ # See the "Attributes" section in the HTML syntax part of HTML5,
# 9.1.2.3 as of 2009-08-10. Most attributes can have quotation
# marks omitted, but not all. (Although a literal " is not
# permitted, we don't check for that, since it will be escaped
# anyway.)
+ #
+ # See also research done on further characters that need to be
+ # escaped: http://code.google.com/p/html5lib/issues/detail?id=93
+ $badChars = "\\x00- '=<>`/\x{00a0}\x{1680}\x{180e}\x{180F}\x{2000}\x{2001}"
+ . "\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}"
+ . "\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}";
if ( $wgWellFormedXml || $value === ''
- || preg_match( "/[ '=<>]/", $value ) ) {
+ || preg_match( "![$badChars]!u", $value ) ) {
$quote = '"';
} else {
$quote = '';
if ( in_array( $key, self::$boolAttribs ) ) {
# In XHTML 1.0 Transitional, the value needs to be equal to the
- # key. In HTML 5, we can leave the value empty instead. If we
+ # key. In HTML5, we can leave the value empty instead. If we
# don't need well-formed XML, we can omit the = entirely.
if ( !$wgWellFormedXml ) {
$ret .= " $key";
# and we don't need <> escaped here, we may as well not call
# htmlspecialchars(). FIXME: verify that we actually need to
# escape \n\r\t here, and explain why, exactly.
- if ( $wgHtml5 ) {
- $ret .= " $key=$quote" . strtr( $value, array(
- '&' => '&',
- '"' => '"',
- "\n" => ' ',
- "\r" => ' ',
- "\t" => '	'
- ) ) . $quote;
- } else {
- $ret .= " $key=$quote" . Sanitizer::encodeAttribute( $value ) . $quote;
+ #
+ # We could call Sanitizer::encodeAttribute() for this, but we
+ # don't because we're stubborn and like our marginal savings on
+ # byte size from not having to encode unnecessary quotes.
+ $map = array(
+ '&' => '&',
+ '"' => '"',
+ "\n" => ' ',
+ "\r" => ' ',
+ "\t" => '	'
+ );
+ if ( $wgWellFormedXml ) {
+ # '<' must be escaped in attributes for XML for some
+ # reason, per spec: http://www.w3.org/TR/xml/#NT-AttValue
+ $map['<'] = '<';
}
+ $ret .= " $key=$quote" . strtr( $value, $map ) . $quote;
}
}
return $ret;
/**
* Convenience function to produce an <input> element. This supports the
- * new HTML 5 input types and attributes, and will silently strip them if
+ * new HTML5 input types and attributes, and will silently strip them if
* $wgHtml5 is false.
*
* @param $name string name attribute
public static function hidden( $name, $value, $attribs = array() ) {
return self::input( $name, $value, 'hidden', $attribs );
}
+
+ /**
+ * Convenience function to produce an <input> element. This supports leaving
+ * out the cols= and rows= which Xml requires and are required by HTML4/XHTML
+ * but not required by HTML5 and will silently set cols="" and rows="" if
+ * $wgHtml5 is false and cols and rows are omitted (HTML4 validates present
+ * but empty cols="" and rows="" as valid).
+ *
+ * @param $name string name attribute
+ * @param $value string value attribute
+ * @param $attribs array Associative array of miscellaneous extra
+ * attributes, passed to Html::element()
+ * @return string Raw HTML
+ */
+ public static function textarea( $name, $value = '', $attribs = array() ) {
+ global $wgHtml5;
+ $attribs['name'] = $name;
+ if ( !$wgHtml5 ) {
+ if ( !array_key_exists('cols', $attribs) )
+ $attribs['cols'] = "";
+ if ( !array_key_exists('rows', $attribs) )
+ $attribs['rows'] = "";
+ }
+ return self::element( 'textarea', $attribs, $value );
+ }
}