From 160d10f11ca4ba8685b285603bdc70ff346c04ed Mon Sep 17 00:00:00 2001 From: Siebrand Mazeland Date: Tue, 7 Jan 2014 13:52:06 +0100 Subject: [PATCH] Update documentation for Sanitizer Also break a few long lines. Change-Id: I77ffc76396e2c488da81b158d2225371fe0b7b67 --- includes/Sanitizer.php | 160 +++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 78 deletions(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 3384af0a6e..a6fb6d0173 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -355,12 +355,12 @@ class Sanitizer { * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments * @private - * @param $text String - * @param $processCallback Callback to do any variable or parameter - * replacements in HTML attribute values - * @param array $args for the processing callback - * @param array $extratags for any extra tags to include - * @param array $removetags for any tags (default or extra) to exclude + * @param string $text + * @param callable $processCallback Callback to do any variable or parameter + * replacements in HTML attribute values + * @param array $args Arguments for the processing callback + * @param array $extratags For any extra tags to include + * @param array $removetags For any tags (default or extra) to exclude * @return string */ static function removeHTMLtags( $text, $processCallback = null, @@ -601,7 +601,7 @@ class Sanitizer { * trailing spaces and one of the newlines. * * @private - * @param $text String + * @param string $text * @return string */ static function removeHTMLcomments( $text ) { @@ -649,8 +649,8 @@ class Sanitizer { * where we may want to allow a tag within content but ONLY when it has * specific attributes set. * - * @param $params - * @param $element + * @param string $params + * @param string $element * @return bool */ static function validateTag( $params, $element ) { @@ -682,9 +682,9 @@ class Sanitizer { * - Unsafe style attributes are discarded * - Invalid id attributes are re-encoded * - * @param $attribs Array - * @param $element String - * @return Array + * @param array $attribs + * @param string $element + * @return array * * @todo Check for legal values where the DTD limits things. * @todo Check for unique id attribute :P @@ -702,9 +702,9 @@ class Sanitizer { * - Unsafe style attributes are discarded * - Invalid id attributes are re-encoded * - * @param $attribs Array + * @param array $attribs * @param array $whitelist list of allowed attribute names - * @return Array + * @return array * * @todo Check for legal values where the DTD limits things. * @todo Check for unique id attribute :P @@ -801,8 +801,8 @@ class Sanitizer { * will be combined (if they're both strings). * * @todo implement merging for other attributes such as style - * @param $a Array - * @param $b Array + * @param array $a + * @param array $b * @return array */ static function mergeAttributes( $a, $b ) { @@ -833,8 +833,8 @@ class Sanitizer { * clever input strings. These character references must * be escaped before the return value is embedded in HTML. * - * @param $value String - * @return String + * @param string $value + * @return string */ static function checkCss( $value ) { // Decode character references like { @@ -925,15 +925,18 @@ class Sanitizer { // Reject problematic keywords and control characters if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) { return '/* invalid control char */'; - } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( | image\s*\( | image-set\s*\( !ix', $value ) ) { + } elseif ( preg_match( + '! expression | filter\s*: | accelerator\s*: | url\s*\( | image\s*\( | image-set\s*\( !ix', + $value + ) ) { return '/* insecure input */'; } return $value; } /** - * @param $matches array - * @return String + * @param array $matches + * @return string */ static function cssDecodeCallback( $matches ) { if ( $matches[1] !== '' ) { @@ -971,9 +974,9 @@ class Sanitizer { * - Unsafe style attributes are discarded * - Prepends space if there are attributes. * - * @param $text String - * @param $element String - * @return String + * @param string $text + * @param string $element + * @return string */ static function fixTagAttributes( $text, $element ) { if ( trim( $text ) == '' ) { @@ -988,8 +991,8 @@ class Sanitizer { /** * Encode an attribute value for HTML output. - * @param $text String - * @return HTML-encoded text fragment + * @param string $text + * @return string HTML-encoded text fragment */ static function encodeAttribute( $text ) { $encValue = htmlspecialchars( $text, ENT_QUOTES ); @@ -1009,8 +1012,8 @@ class Sanitizer { /** * Encode an attribute value for HTML tags, with extra armoring * against further wiki processing. - * @param $text String - * @return HTML-encoded text fragment + * @param string $text + * @return string HTML-encoded text fragment */ static function safeEncodeAttribute( $text ) { $encValue = Sanitizer::encodeAttribute( $text ); @@ -1053,9 +1056,9 @@ class Sanitizer { * (which don't work reliably in fragments cross-browser). * * @see http://www.w3.org/TR/html401/types.html#type-name Valid characters - * in the id and - * name attributes - * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute + * in the id and name attributes + * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with + * the id attribute * @see http://www.whatwg.org/html/elements.html#the-id-attribute * HTML5 definition of id attribute * @@ -1068,7 +1071,7 @@ class Sanitizer { * 'legacy': Behave the way the old HTML 4-based ID escaping worked even * if $wgExperimentalHtmlIds is used, so we can generate extra * anchors and links won't break. - * @return String + * @return string */ static function escapeId( $id, $options = array() ) { global $wgExperimentalHtmlIds; @@ -1111,8 +1114,8 @@ class Sanitizer { * * @see http://www.w3.org/TR/CSS21/syndata.html Valid characters/format * - * @param $class String - * @return String + * @param string $class + * @return string */ static function escapeClass( $class ) { // Convert ugly stuff to underscores and kill underscores in ugly places @@ -1126,8 +1129,8 @@ class Sanitizer { * Given HTML input, escape with htmlspecialchars but un-escape entities. * This allows (generally harmless) entities like   to survive. * - * @param string $html to escape - * @return String: escaped input + * @param string $html HTML to escape + * @return string: escaped input */ static function escapeHtmlAllowEntities( $html ) { $html = Sanitizer::decodeCharReferences( $html ); @@ -1139,7 +1142,7 @@ class Sanitizer { /** * Regex replace callback for armoring links against further processing. - * @param $matches Array + * @param array $matches * @return string */ private static function armorLinksCallback( $matches ) { @@ -1151,8 +1154,8 @@ class Sanitizer { * a partial tag string. Attribute names are forces to lowercase, * character references are decoded to UTF-8 text. * - * @param $text String - * @return Array + * @param string $text + * @return array */ public static function decodeTagAttributes( $text ) { if ( trim( $text ) == '' ) { @@ -1187,8 +1190,8 @@ class Sanitizer { * Build a partial tag string from an associative array of attribute * names and values as returned by decodeTagAttributes. * - * @param $assoc_array Array - * @return String + * @param array $assoc_array + * @return string */ public static function safeEncodeTagAttributes( $assoc_array ) { $attribs = array(); @@ -1205,9 +1208,9 @@ class Sanitizer { * Pick the appropriate attribute value from a match set from the * attribs regex matches. * - * @param $set Array - * @throws MWException - * @return String + * @param array $set + * @throws MWException when tag conditions are not met. + * @return string */ private static function getTagAttributeCallback( $set ) { if ( isset( $set[6] ) ) { @@ -1239,8 +1242,9 @@ class Sanitizer { * but note that we're not returning the value, but are returning * XML source fragments that will be slapped into output. * - * @param $text String - * @return String + * @param string $text + * @return string + * @todo Remove, unused? */ private static function normalizeAttributeValue( $text ) { return str_replace( '"', '"', @@ -1249,8 +1253,8 @@ class Sanitizer { } /** - * @param $text string - * @return mixed + * @param string $text + * @return string */ private static function normalizeWhitespace( $text ) { return preg_replace( @@ -1264,8 +1268,8 @@ class Sanitizer { * by Parser::stripSectionName(), for use in the id's that are used for * section links. * - * @param $section String - * @return String + * @param string $section + * @return string */ static function normalizeSectionNameWhitespace( $section ) { return trim( preg_replace( '/[ _]+/', ' ', $section ) ); @@ -1282,8 +1286,8 @@ class Sanitizer { * c. use lower cased "&#x", not "&#X" * d. fix or reject non-valid attributes * - * @param $text String - * @return String + * @param string $text + * @return string * @private */ static function normalizeCharReferences( $text ) { @@ -1293,8 +1297,8 @@ class Sanitizer { $text ); } /** - * @param $matches String - * @return String + * @param string $matches + * @return string */ static function normalizeCharReferencesCallback( $matches ) { $ret = null; @@ -1319,8 +1323,8 @@ class Sanitizer { * the HTML equivalent. Otherwise, returns HTML-escaped text of * pseudo-entity source (eg &foo;) * - * @param $name String - * @return String + * @param string $name + * @return string */ static function normalizeEntity( $name ) { if ( isset( self::$htmlEntityAliases[$name] ) ) { @@ -1336,7 +1340,7 @@ class Sanitizer { } /** - * @param $codepoint + * @param int $codepoint * @return null|string */ static function decCharReference( $codepoint ) { @@ -1349,7 +1353,7 @@ class Sanitizer { } /** - * @param $codepoint + * @param int $codepoint * @return null|string */ static function hexCharReference( $codepoint ) { @@ -1363,8 +1367,8 @@ class Sanitizer { /** * Returns true if a given Unicode codepoint is a valid character in XML. - * @param $codepoint Integer - * @return Boolean + * @param int $codepoint + * @return bool */ private static function validateCodepoint( $codepoint ) { return $codepoint == 0x09 @@ -1379,8 +1383,8 @@ class Sanitizer { * Decode any character references, numeric or named entities, * in the text and return a UTF-8 string. * - * @param $text String - * @return String + * @param string $text + * @return string */ public static function decodeCharReferences( $text ) { return preg_replace_callback( @@ -1396,8 +1400,8 @@ class Sanitizer { * This is useful for page titles, not for text to be displayed, * MediaWiki allows HTML entities to escape normalization as a feature. * - * @param string $text (already normalized, containing entities) - * @return String (still normalized, without entities) + * @param string $text Already normalized, containing entities + * @return string Still normalized, without entities */ public static function decodeCharReferencesAndNormalize( $text ) { global $wgContLang; @@ -1414,8 +1418,8 @@ class Sanitizer { } /** - * @param $matches String - * @return String + * @param string $matches + * @return string */ static function decodeCharReferencesCallback( $matches ) { if ( $matches[1] != '' ) { @@ -1432,8 +1436,8 @@ class Sanitizer { /** * Return UTF-8 string for a codepoint if that is a valid * character reference, otherwise U+FFFD REPLACEMENT CHARACTER. - * @param $codepoint Integer - * @return String + * @param int $codepoint + * @return string * @private */ static function decodeChar( $codepoint ) { @@ -1449,8 +1453,8 @@ class Sanitizer { * return the UTF-8 encoding of that character. Otherwise, returns * pseudo-entity source (eg "&foo;") * - * @param $name String - * @return String + * @param string $name + * @return string */ static function decodeEntity( $name ) { if ( isset( self::$htmlEntityAliases[$name] ) ) { @@ -1466,8 +1470,8 @@ class Sanitizer { /** * Fetch the whitelist of acceptable attributes for a given element name. * - * @param $element String - * @return Array + * @param string $element + * @return array */ static function attributeWhitelist( $element ) { $list = Sanitizer::setupAttributeWhitelist(); @@ -1479,7 +1483,7 @@ class Sanitizer { /** * Foreach array key (an allowed HTML element), return an array * of allowed attributes - * @return Array + * @return array */ static function setupAttributeWhitelist() { global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes; @@ -1702,7 +1706,7 @@ class Sanitizer { * inclusion in HTML output as of 1.10! * * @param string $text HTML fragment - * @return String + * @return string */ static function stripAllTags( $text ) { # Actual @@ -1722,7 +1726,7 @@ class Sanitizer { * * Use for passing XHTML fragments to PHP's XML parsing functions * - * @return String + * @return string */ static function hackDocType() { $out = "