From a8b24498599face2909669709263e586aaec2972 Mon Sep 17 00:00:00 2001 From: Daniel Kinzler Date: Sun, 22 Jan 2012 05:58:23 +0000 Subject: [PATCH] Introducing optional support for tags, to be used with microdata resp. RDFa, as originally suggested in r58694. This is reintroduced after a discussion with TimStarling considering the discussion about r58717. Note that a change to the parser will be needed to cause links from tags to be recorded in the externallink table. --- RELEASE-NOTES-1.19 | 3 +++ includes/DefaultSettings.php | 10 ++++++++++ includes/Sanitizer.php | 24 +++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index 2b4cc101b0..b6789634cb 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -26,6 +26,9 @@ production. * (bug 32239) Removed wgEnableTooltipsAndAccesskeys. * Removed $wgVectorShowVariantName. * Removed $wgExtensionAliasesFiles. Use wgExtensionMessagesFiles. +* Introduced $wgAllowATag to allow tags to be used for external links, + so rel and ref attributes can be used with microdata resp RDFa. + Defaults is false. === New features in 1.19 === * (bug 19838) Possibility to get all interwiki prefixes if the interwiki diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index f1bc917951..6bc3445e2e 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2242,11 +2242,13 @@ $wgAllowRdfaAttributes = false; /** * Enabled HTML5 microdata attributes for use in wikitext, if $wgHtml5 is also true. + * See also $wgAllowATag. */ $wgAllowMicrodataAttributes = false; /** * Cleanup as much presentational html like valign -> css vertical-align as we can + * See also $wgAllowATag. */ $wgCleanupPresentationalAttributes = true; @@ -2978,6 +2980,14 @@ $wgEnableImageWhitelist = true; */ $wgAllowImageTag = false; +/** + * Allow tags for specifying external links, so it becomes possible to + * provide ref and rel attributes. This allows for microdata/microformats/RDFa + * annotations to be embedded on wiki pages. See also $wgAllowRdfaAttributes + * and $wgAllowMicrodataAttributes. + */ +$wgAllowATag = false; + /** * $wgUseTidy: use tidy to make sure HTML output is sane. * Tidy is a free tool that fixes broken HTML. diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 196abd9f43..3f7175a195 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -407,6 +407,11 @@ class Sanitizer { $htmlsingleonly[] = 'img'; } + global $wgAllowATag; + if ( $wgAllowATag ) { + $htmlpairsStatic[] = 'a'; + } + $htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) ); $htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) ); @@ -797,6 +802,23 @@ class Sanitizer { } } + if ( $attribute === 'href' || $attribute === 'src' ) { + if ( !preg_match( $hrefExp, $value ) ) { + continue; //drop any href or src attributes not using an allowed protocol. + //NOTE: this also drops all relative URLs + } + } + + //RDFa properties allow URIs. check them + if ( $attribute === 'rel' || $attribute === 'rev' || + $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || + $attribute === 'datatype' || $attribute === 'typeof' ) { + //Paranoia. Allow "simple" values but suppress javascript + if ( preg_match( '/(^|\s)javascript\s*:/i', $value ) ) { + continue; + } + } + // If this attribute was previously set, override it. // Output should only have one attribute of each name. $out[$attribute] = $value; @@ -1572,7 +1594,7 @@ class Sanitizer { 'td' => array_merge( $common, $tablecell, $tablealign ), 'th' => array_merge( $common, $tablecell, $tablealign ), - # 12.2 # NOTE: is not allowed directly, but the attrib whitelist is used from the Parser object + # 12.2 'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa # 13.2 -- 2.20.1