From: Daniel Friesen Date: Sun, 14 Aug 2011 16:41:53 +0000 (+0000) Subject: Add code to the sanitizer to convert presontational attributes that were removed... X-Git-Tag: 1.31.0-rc.0~28273 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/comptes/?a=commitdiff_plain;h=6007684f5734924c3347bbdc59a3d346f2f7729e;p=lhc%2Fweb%2Fwiklou.git Add code to the sanitizer to convert presontational attributes that were removed in html5 into inline css. This allows wiki to keep using them in short loose WikiText but still output valid modern markup. Note that there were some attributes excluded. Namely stuff on img and object, and the table cellspacing and cellpading which aren't easily converted into inline css. --- diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index ee11da89ac..96b7fa2bb9 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -33,6 +33,9 @@ production. * Added two new GetLocalURL hooks to better serve extensions working on a limited type of titles. * Added a --no-updates flag to importDump.php that skips updating the links tables. +* Most presentational html attributes like valign are now converted to inline + css style rules. These attributes were removed from html5 and so we clean them up + when $wgHtml5 is enabled. This can be disabled using $wgCleanupPresentationalAttributes. === Bug fixes in 1.19 === * $wgUploadNavigationUrl should be used for file redlinks if diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 9d89cb32b9..240ec13755 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2166,6 +2166,11 @@ $wgAllowRdfaAttributes = false; */ $wgAllowMicrodataAttributes = false; +/** + * Cleanup as much presentational html like valign -> css vertical-align as we can + */ +$wgCleanupPresentationalAttributes = false; + /** * Should we try to make our HTML output well-formed XML? If set to false, * output will be a few bytes shorter, and the HTML will arguably be more diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 118f170b0d..5080582621 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -602,6 +602,89 @@ class Sanitizer { return $text; } + /** + * Take an array of attribute names and values and fix some deprecated values + * for the given element type. + * This does not validate properties, so you should ensure that you call + * validateTagAttributes AFTER this to ensure that the resulting style rule + * this may add is safe. + * + * - Converts most presentational attributes like align into inline css + * + * @param $attribs Array + * @param $element String + * @return Array + */ + static function fixDeprecatedAttributes( $attribs, $element ) { + global $wgHtml5, $wgCleanupPresentationalAttributes; + + // presentational attributes were removed from html5, we can leave them + // in when html5 is turned off + if ( !$wgHtml5 || !$wgCleanupPresentationalAttributes ) { + return $attribs; + } + + $table = array( 'table' ); + $cells = array( 'td', 'th' ); + $colls = array( 'col', 'colgroup' ); + $tblocks = array( 'tbody', 'tfoot', 'thead' ); + $h = array( 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ); + + $presentationalAttribs = array( + 'align' => array( 'text-align', array_merge( array( 'caption', 'hr', 'div', 'p', 'tr' ), $table, $cells, $colls, $tblocks, $h ) ), + 'clear' => array( 'clear', array( 'br' ) ), + 'height' => array( 'height', $cells ), + 'nowrap' => array( 'white-space', $cells ), + 'size' => array( 'height', array( 'hr' ) ), + 'type' => array( 'list-style-type', array( 'li', 'ol', 'ul' ) ), + 'valign' => array( 'vertical-align', array_merge( $cells, $colls, $tblocks ) ), + 'width' => array( 'width', array_merge( array( 'hr', 'pre' ), $table, $cells, $colls ) ), + ); + + $style = ""; + foreach ( $presentationalAttribs as $attribute => $info ) { + list( $property, $elements ) = $info; + + // Skip if this attribute is not relevant to this element + if ( !in_array( $element, $elements ) ) { + continue; + } + + // Skip if the attribute is not used + if ( !array_key_exists( $attribute, $attribs ) ) { + continue; + } + + $value = $attribs[$attribute]; + + // For nowrap the value should be nowrap instead of whatever text is in the value + if ( $attribute === 'nowrap' ) { + $value = 'nowrap'; + } + + // Size based properties should have px applied to them if they have no unit + if ( in_array( $attribute, array( 'height', 'width', 'size' ) ) ) { + if ( preg_match( '/^[\d.]+$/', $value ) ) { + $value = "{$value}px"; + } + } + + $style .= " $property: $value;"; + + unset( $attribs[$attribute] ); + } + + if ( !empty($style) ) { + // Prepend our style rules so that they can be overridden by user css + if ( isset($attribs['style']) ) { + $style .= " " . $attribs['style']; + } + $attribs['style'] = trim($style); + } + + return $attribs; + } + /** * Take an array of attribute names and values and normalize or discard * illegal values for the given element type. @@ -849,8 +932,9 @@ class Sanitizer { return ''; } - $stripped = Sanitizer::validateTagAttributes( - Sanitizer::decodeTagAttributes( $text ), $element ); + $decoded = Sanitizer::decodeTagAttributes( $text ); + $decoded = Sanitizer::fixDeprecatedAttributes( $decoded, $element ); + $stripped = Sanitizer::validateTagAttributes( $decoded, $element ); $attribs = array(); foreach( $stripped as $attribute => $value ) {