|&\#[xX]([0-9A-Fa-f]+);
|(&)/x';
+ /**
+ * Blacklist for evil uris like javascript:
+ * WARNING: DO NOT use this in any place that actually requires blacklisting
+ * for security reasons. There are NUMEROUS[1] ways to bypass blacklisting, the
+ * only way to be secure from javascript: uri based xss vectors is to whitelist
+ * things that you know are safe and deny everything else.
+ * [1]: http://ha.ckers.org/xss.html
+ */
const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i';
const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/";
'amp' => 38,
'and' => 8743,
'ang' => 8736,
- 'apos' => 39,
+ 'apos' => 39, // New in XHTML & HTML 5; avoid in output for compatibility with IE.
'Aring' => 197,
'aring' => 229,
'asymp' => 8776,
return $value;
}
+ /**
+ * @param $matches array
+ * @return String
+ */
static function cssDecodeCallback( $matches ) {
if ( $matches[1] !== '' ) {
// Line continuation
Sanitizer::normalizeCharReferences( $text ) ) );
}
+ /**
+ * @param $text string
+ * @return mixed
+ */
private static function normalizeWhitespace( $text ) {
return preg_replace(
'/\r\n|[\x20\x0d\x0a\x09]/',
}
}
+ /**
+ * @param $codepoint
+ * @return null|string
+ */
static function decCharReference( $codepoint ) {
$point = intval( $codepoint );
if( Sanitizer::validateCodepoint( $point ) ) {
}
}
+ /**
+ * @param $codepoint
+ * @return null|string
+ */
static function hexCharReference( $codepoint ) {
$point = hexdec( $codepoint );
if( Sanitizer::validateCodepoint( $point ) ) {
* return the UTF-8 encoding of that character. Otherwise, returns
* pseudo-entity source (eg &foo;)
*
- * @param $name Strings
+ * @param $name String
* @return String
*/
static function decodeEntity( $name ) {
return $out;
}
+ /**
+ * @param $url string
+ * @return mixed|string
+ */
static function cleanUrl( $url ) {
# Normalize any HTML entities in input. They will be
# re-escaped by makeExternalLink().
$host = preg_replace( $strip, '', $host );
- // @todo Fixme: validate hostnames here
+ // @todo FIXME: Validate hostnames here
return $protocol . $host . $rest;
} else {
}
}
+ /**
+ * @param $matches array
+ * @return string
+ */
static function cleanUrlCallback( $matches ) {
return urlencode( $matches[0] );
}
+
+ /**
+ * Does a string look like an e-mail address?
+ *
+ * This validates an email address using an HTML5 specification found at:
+ * http://www.whatwg.org/specs/web-apps/current-work/multipage/states-of-the-type-attribute.html#valid-e-mail-address
+ * Which as of 2011-01-24 says:
+ *
+ * A valid e-mail address is a string that matches the ABNF production
+ * 1*( atext / "." ) "@" ldh-str *( "." ldh-str ) where atext is defined
+ * in RFC 5322 section 3.2.3, and ldh-str is defined in RFC 1034 section
+ * 3.5.
+ *
+ * This function is an implementation of the specification as requested in
+ * bug 22449.
+ *
+ * Client-side forms will use the same standard validation rules via JS or
+ * HTML 5 validation; additional restrictions can be enforced server-side
+ * by extensions via the 'isValidEmailAddr' hook.
+ *
+ * Note that this validation doesn't 100% match RFC 2822, but is believed
+ * to be liberal enough for wide use. Some invalid addresses will still
+ * pass validation here.
+ *
+ * @since 1.18
+ *
+ * @param $addr String E-mail address
+ * @return Bool
+ */
+ public static function validateEmail( $addr ) {
+ $result = null;
+ if( !wfRunHooks( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
+ return $result;
+ }
+
+ // Please note strings below are enclosed in brackets [], this make the
+ // hyphen "-" a range indicator. Hence it is double backslashed below.
+ // See bug 26948
+ $rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~" ;
+ $rfc1034_ldh_str = "a-z0-9\\-" ;
+
+ $HTML5_email_regexp = "/
+ ^ # start of string
+ [$rfc5322_atext\\.]+ # user part which is liberal :p
+ @ # 'apostrophe'
+ [$rfc1034_ldh_str]+ # First domain part
+ (\\.[$rfc1034_ldh_str]+)* # Following part prefixed with a dot
+ $ # End of string
+ /ix" ; // case Insensitive, eXtended
+
+ return (bool) preg_match( $HTML5_email_regexp, $addr );
+ }
}