From 5d9d5ba8bab437dd5b773502a6d44b11b9a58b3a Mon Sep 17 00:00:00 2001 From: Daniel Kinzler Date: Sat, 7 Nov 2009 09:43:34 +0000 Subject: [PATCH] allow tags and RDFa attributes to support RDFa output from license templates etc --- includes/Sanitizer.php | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 0cb5b0b8bc..ab521e829a 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -353,7 +353,7 @@ class Sanitizer { if ( !$staticInitialised ) { $htmlpairsStatic = array( # Tags that must be closed - 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', + 'a', 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', @@ -605,6 +605,8 @@ class Sanitizer { */ static function validateAttributes( $attribs, $whitelist ) { $whitelist = array_flip( $whitelist ); + $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/'; + $out = array(); foreach( $attribs as $attribute => $value ) { if( !isset( $whitelist[$attribute] ) ) { @@ -626,6 +628,23 @@ class Sanitizer { $wgEnforceHtmlIds ? 'noninitial' : 'xml' ); } + if ( $attribute === 'href' || $attribute === 'src' ) { + if ( !preg_match( $hrefExp, $value ) ) { + continue; //drop any href or src attributes not using an allowed protocol. + //NOTE: this also drops all relative URLs + } + } + + //RDFa properties allow URIs. check them + if ( $attribute === 'rel' || $attribute === 'rev' || + $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || + $attribute === 'datatype' || $attribute === 'typeof' ) { + //Paranoia. Allow "simple" values but suppress javascript + if ( preg_match( '/(^|\s)javascript\s*:/i', $value ) ) { + continue; + } + } + // If this attribute was previously set, override it. // Output should only have one attribute of each name. $out[$attribute] = $value; @@ -1154,7 +1173,11 @@ class Sanitizer { * @return Array */ static function setupAttributeWhitelist() { - $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); + $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style', + #RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014 + 'about', 'property', 'resource', 'datatype', 'typeof', + ); + $block = array_merge( $common, array( 'align' ) ); $tablealign = array( 'align', 'char', 'charoff', 'valign' ); $tablecell = array( 'abbr', @@ -1260,6 +1283,9 @@ class Sanitizer { 'td' => array_merge( $common, $tablecell, $tablealign ), 'th' => array_merge( $common, $tablecell, $tablealign ), + # 12.2 + 'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa + # 13.2 # Not usually allowed, but may be used for extension-style hooks # such as when it is rasterized -- 2.20.1