3 if (!defined("_ECRIRE_INC_VERSION")) return;
7 * @author Dominic Sayers <dominic_sayers@hotmail.com>
8 * @copyright 2009 Dominic Sayers
9 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
10 * @link http://www.dominicsayers.com/isemail
11 * @version 1.16 - Added optional diagnosis codes (amended all lines with a return statement)
15 Copyright (c) 2008-2010, Dominic Sayers
18 Redistribution and use in source and binary forms, with or without modification,
19 are permitted provided that the following conditions are met:
21 * Redistributions of source code must retain the above copyright notice, this
22 list of conditions and the following disclaimer.
23 * Redistributions in binary form must reproduce the above copyright notice,
24 this list of conditions and the following disclaimer in the documentation
25 and/or other materials provided with the distribution.
26 * Neither the name of Dominic Sayers nor the names of its contributors may be
27 used to endorse or promote products derived from this software without
28 specific prior written permission.
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
33 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
34 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
35 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
36 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
37 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
39 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 require_module 'standard';
44 require_module 'pcre';
46 /*.mixed.*/ function is_email (/*.string.*/ $email, $checkDNS = false, $diagnose = false) {
47 // Check that $email is a valid address. Read the following RFCs to understand the constraints:
48 // (http://tools.ietf.org/html/rfc5322)
49 // (http://tools.ietf.org/html/rfc3696)
50 // (http://tools.ietf.org/html/rfc5321)
51 // (http://tools.ietf.org/html/rfc4291#section-2.2)
52 // (http://tools.ietf.org/html/rfc1123#section-2.1)
54 if (!defined('ISEMAIL_VALID')) {
55 define('ISEMAIL_VALID' , 0);
56 define('ISEMAIL_TOOLONG' , 1);
57 define('ISEMAIL_NOAT' , 2);
58 define('ISEMAIL_NOLOCALPART' , 3);
59 define('ISEMAIL_NODOMAIN' , 4);
60 define('ISEMAIL_ZEROLENGTHELEMENT' , 5);
61 define('ISEMAIL_BADCOMMENT_START' , 6);
62 define('ISEMAIL_BADCOMMENT_END' , 7);
63 define('ISEMAIL_UNESCAPEDDELIM' , 8);
64 define('ISEMAIL_EMPTYELEMENT' , 9);
65 define('ISEMAIL_UNESCAPEDSPECIAL' , 10);
66 define('ISEMAIL_LOCALTOOLONG' , 11);
67 define('ISEMAIL_IPV4BADPREFIX' , 12);
68 define('ISEMAIL_IPV6BADPREFIXMIXED' , 13);
69 define('ISEMAIL_IPV6BADPREFIX' , 14);
70 define('ISEMAIL_IPV6GROUPCOUNT' , 15);
71 define('ISEMAIL_IPV6DOUBLEDOUBLECOLON' , 16);
72 define('ISEMAIL_IPV6BADCHAR' , 17);
73 define('ISEMAIL_IPV6TOOMANYGROUPS' , 18);
74 define('ISEMAIL_TLD' , 19);
75 define('ISEMAIL_DOMAINEMPTYELEMENT' , 20);
76 define('ISEMAIL_DOMAINELEMENTTOOLONG' , 21);
77 define('ISEMAIL_DOMAINBADCHAR' , 22);
78 define('ISEMAIL_DOMAINTOOLONG' , 23);
79 define('ISEMAIL_TLDNUMERIC' , 24);
80 define('ISEMAIL_DOMAINNOTFOUND' , 25);
81 define('ISEMAIL_NOTDEFINED' , 99);
84 // the upper limit on address lengths should normally be considered to be 256
85 // (http://www.rfc-editor.org/errata_search.php?rfc=3696)
86 // NB I think John Klensin is misreading RFC 5321 and the the limit should actually be 254
87 // However, I will stick to the published number until it is changed.
89 // The maximum total length of a reverse-path or forward-path is 256
90 // characters (including the punctuation and element separators)
91 // (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
92 $emailLength = strlen($email);
93 if ($emailLength > 256) return $diagnose ? ISEMAIL_TOOLONG
: false; // Too long
95 // Contemporary email addresses consist of a "local part" separated from
96 // a "domain part" (a fully-qualified domain name) by an at-sign ("@").
97 // (http://tools.ietf.org/html/rfc3696#section-3)
98 $atIndex = strrpos($email,'@');
100 if ($atIndex === false) return $diagnose ? ISEMAIL_NOAT
: false; // No at-sign
101 if ($atIndex === 0) return $diagnose ? ISEMAIL_NOLOCALPART
: false; // No local part
102 if ($atIndex === $emailLength - 1) return $diagnose ? ISEMAIL_NODOMAIN
: false; // No domain part
103 // revision 1.14: Length test bug suggested by Andrew Campbell of Gloucester, MA
106 // - remove nested comments, quotes and dots in comments
107 // - remove parentheses and dots from quoted strings
110 $escapeThisChar = false;
112 for ($i = 0; $i < $emailLength; ++
$i) {
114 $replaceChar = false;
116 if ($char === '\\') {
117 $escapeThisChar = !$escapeThisChar; // Escape the next character?
121 if ($escapeThisChar) {
127 if ($braceDepth++
> 0) $replaceChar = true; // Increment brace depth
133 if ($escapeThisChar) {
139 if (--$braceDepth > 0) $replaceChar = true; // Decrement brace depth
140 if ($braceDepth < 0) $braceDepth = 0;
146 if ($escapeThisChar) {
149 if ($braceDepth === 0) {
150 $inQuote = !$inQuote; // Are we inside a quoted string?
157 case '.': // Dots don't help us either
158 if ($escapeThisChar) {
161 if ($braceDepth > 0) $replaceChar = true;
168 $escapeThisChar = false;
169 // if ($replaceChar) $email[$i] = 'x'; // Replace the offending character with something harmless
170 // revision 1.12: Line above replaced because PHPLint doesn't like that syntax
171 if ($replaceChar) $email = (string) substr_replace($email, 'x', $i, 1); // Replace the offending character with something harmless
175 $localPart = substr($email, 0, $atIndex);
176 $domain = substr($email, $atIndex +
1);
177 $FWS = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))"; // Folding white space
178 // Let's check the local part for RFC compliance...
180 // local-part = dot-atom / quoted-string / obs-local-part
181 // obs-local-part = word *("." word)
182 // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
184 // Problem: need to distinguish between "first.last" and "first"."last"
185 // (i.e. one element or two). And I suck at regexes.
186 $dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
189 foreach ($dotArray as $element) {
190 // Remove any leading or trailing FWS
191 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
192 $elementLength = strlen($element);
194 if ($elementLength === 0) return $diagnose ? ISEMAIL_ZEROLENGTHELEMENT
: false; // Can't have empty element (consecutive dots or dots at the start or end)
195 // revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP
197 // We need to remove any valid comments (i.e. those at the start or end of the element)
198 if ($element[0] === '(') {
199 $indexBrace = strpos($element, ')');
200 if ($indexBrace !== false) {
201 if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
202 return $diagnose ? ISEMAIL_BADCOMMENT_START
: false; // Illegal characters in comment
204 $element = substr($element, $indexBrace +
1, $elementLength - $indexBrace - 1);
205 $elementLength = strlen($element);
209 if ($element[$elementLength - 1] === ')') {
210 $indexBrace = strrpos($element, '(');
211 if ($indexBrace !== false) {
212 if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace +
1, $elementLength - $indexBrace - 2)) > 0) {
213 return $diagnose ? ISEMAIL_BADCOMMENT_END
: false; // Illegal characters in comment
215 $element = substr($element, 0, $indexBrace);
216 $elementLength = strlen($element);
220 // Remove any leading or trailing FWS around the element (inside any comments)
221 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
223 // What's left counts towards the maximum length for this part
224 if ($partLength > 0) $partLength++
; // for the dot
225 $partLength +
= strlen($element);
227 // Each dot-delimited component can be an atom or a quoted string
228 // (because of the obs-local-part provision)
229 if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
230 // Quoted-string tests:
233 $element = preg_replace("/(?<!\\\\)$FWS/", '', $element);
234 // My regex skillz aren't up to distinguishing between \" \\" \\\" \\\\" etc.
235 // So remove all \\ from the string first...
236 $element = preg_replace('/\\\\\\\\/', ' ', $element);
237 if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDDELIM
: false; // ", CR, LF and NUL must be escaped, "" is too short
239 // Unquoted string tests:
241 // Period (".") may...appear, but may not be used to start or end the
242 // local part, nor may two or more consecutive periods appear.
243 // (http://tools.ietf.org/html/rfc3696#section-3)
245 // A zero-length element implies a period at the beginning or end of the
246 // local part, or two periods together. Either way it's not allowed.
247 if ($element === '') return $diagnose ? ISEMAIL_EMPTYELEMENT
: false; // Dots in wrong place
249 // Any ASCII graphic (printing) character other than the
250 // at-sign ("@"), backslash, double quote, comma, or square brackets may
251 // appear without quoting. If any of that list of excluded characters
252 // are to appear, they must be quoted
253 // (http://tools.ietf.org/html/rfc3696#section-3)
255 // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
256 if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDSPECIAL
: false; // These characters must be in a quoted string
260 if ($partLength > 64) return $diagnose ? ISEMAIL_LOCALTOOLONG
: false; // Local part must be 64 characters or less
262 // Now let's check the domain part...
264 // The domain name can also be replaced by an IP address in square brackets
265 // (http://tools.ietf.org/html/rfc3696#section-3)
266 // (http://tools.ietf.org/html/rfc5321#section-4.1.3)
267 // (http://tools.ietf.org/html/rfc4291#section-2.2)
268 if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
269 // It's an address-literal
270 $addressLiteral = substr($domain, 1, strlen($domain) - 2);
271 $matchesIP = array();
273 // Extract IPv4 part from the end of the address-literal (if there is one)
274 if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
275 $index = strrpos($addressLiteral, $matchesIP[0]);
278 // Nothing there except a valid IPv4 address, so...
279 return $diagnose ? ISEMAIL_VALID
: true;
281 // Assume it's an attempt at a mixed address (IPv6 + IPv4)
282 if ($addressLiteral[$index - 1] !== ':') return $diagnose ? ISEMAIL_IPV4BADPREFIX
: false; // Character preceding IPv4 address must be ':'
283 if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIXMIXED
: false; // RFC5321 section 4.1.3
285 $IPv6 = substr($addressLiteral, 5, ($index ===7) ?
2 : $index - 6);
289 // It must be an attempt at pure IPv6
290 if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIX
: false; // RFC5321 section 4.1.3
291 $IPv6 = substr($addressLiteral, 5);
295 $groupCount = preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
296 $index = strpos($IPv6,'::');
298 if ($index === false) {
299 // We need exactly the right number of groups
300 if ($groupCount !== $groupMax) return $diagnose ? ISEMAIL_IPV6GROUPCOUNT
: false; // RFC5321 section 4.1.3
302 if ($index !== strrpos($IPv6,'::')) return $diagnose ? ISEMAIL_IPV6DOUBLEDOUBLECOLON
: false; // More than one '::'
303 $groupMax = ($index === 0 ||
$index === (strlen($IPv6) - 2)) ?
$groupMax : $groupMax - 1;
304 if ($groupCount > $groupMax) return $diagnose ? ISEMAIL_IPV6TOOMANYGROUPS
: false; // Too many IPv6 groups in address
307 // Check for unmatched characters
308 array_multisort($matchesIP[1], SORT_DESC
);
309 if ($matchesIP[1][0] !== '') return $diagnose ? ISEMAIL_IPV6BADCHAR
: false; // Illegal characters in address
311 // It's a valid IPv6 address, so...
312 return $diagnose ? ISEMAIL_VALID
: true;
314 // It's a domain name...
316 // The syntax of a legal Internet host name was specified in RFC-952
317 // One aspect of host name syntax is hereby changed: the
318 // restriction on the first character is relaxed to allow either a
319 // letter or a digit.
320 // (http://tools.ietf.org/html/rfc1123#section-2.1)
322 // NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
324 // Most common applications, including email and the Web, will generally not
325 // permit...escaped strings
326 // (http://tools.ietf.org/html/rfc3696#section-2)
328 // the better strategy has now become to make the "at least one period" test,
329 // to verify LDH conformance (including verification that the apparent TLD name
330 // is not all-numeric)
331 // (http://tools.ietf.org/html/rfc3696#section-2)
333 // Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
334 // labels for SMTP clients or servers
335 // (http://tools.ietf.org/html/rfc5321#section-4.1.2)
337 // RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
338 // (http://tools.ietf.org/html/rfc5321#section-4.1.2)
339 $dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
341 $element = ''; // Since we use $element after the foreach loop let's make sure it has a value
342 // revision 1.13: Line above added because PHPLint now checks for Definitely Assigned Variables
344 if (count($dotArray) === 1) return $diagnose ? ISEMAIL_TLD
: false; // Mail host can't be a TLD (cite? What about localhost?)
346 foreach ($dotArray as $element) {
347 // Remove any leading or trailing FWS
348 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
349 $elementLength = strlen($element);
351 // Each dot-delimited component must be of type atext
352 // A zero-length element implies a period at the beginning or end of the
353 // local part, or two periods together. Either way it's not allowed.
354 if ($elementLength === 0) return $diagnose ? ISEMAIL_DOMAINEMPTYELEMENT
: false; // Dots in wrong place
355 // revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP
357 // Then we need to remove all valid comments (i.e. those at the start or end of the element
358 if ($element[0] === '(') {
359 $indexBrace = strpos($element, ')');
360 if ($indexBrace !== false) {
361 if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
362 return $diagnose ? ISEMAIL_BADCOMMENTSTART
: false; // Illegal characters in comment
364 $element = substr($element, $indexBrace +
1, $elementLength - $indexBrace - 1);
365 $elementLength = strlen($element);
369 if ($element[$elementLength - 1] === ')') {
370 $indexBrace = strrpos($element, '(');
371 if ($indexBrace !== false) {
372 if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace +
1, $elementLength - $indexBrace - 2)) > 0)
373 return $diagnose ? ISEMAIL_BADCOMMENTEND
: false; // Illegal characters in comment
375 $element = substr($element, 0, $indexBrace);
376 $elementLength = strlen($element);
380 // Remove any leading or trailing FWS around the element (inside any comments)
381 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
383 // What's left counts towards the maximum length for this part
384 if ($partLength > 0) $partLength++
; // for the dot
385 $partLength +
= strlen($element);
387 // The DNS defines domain name syntax very generally -- a
388 // string of labels each containing up to 63 8-bit octets,
389 // separated by dots, and with a maximum total of 255
391 // (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
392 if ($elementLength > 63) return $diagnose ? ISEMAIL_DOMAINELEMENTTOOLONG
: false; // Label must be 63 characters or less
394 // Any ASCII graphic (printing) character other than the
395 // at-sign ("@"), backslash, double quote, comma, or square brackets may
396 // appear without quoting. If any of that list of excluded characters
397 // are to appear, they must be quoted
398 // (http://tools.ietf.org/html/rfc3696#section-3)
400 // If the hyphen is used, it is not permitted to appear at
401 // either the beginning or end of a label.
402 // (http://tools.ietf.org/html/rfc3696#section-2)
404 // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
405 if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) {
406 return $diagnose ? ISEMAIL_DOMAINBADCHAR
: false;
410 if ($partLength > 255) return $diagnose ? ISEMAIL_DOMAINTOOLONG
: false; // Domain part must be 255 characters or less (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
412 if (preg_match('/^[0-9]+$/', $element) > 0) return $diagnose ? ISEMAIL_TLDNUMERIC
: false; // TLD can't be all-numeric (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2)
415 if ($checkDNS && function_exists('checkdnsrr')) {
416 if (!(checkdnsrr($domain, 'A') ||
checkdnsrr($domain, 'MX'))) {
417 return $diagnose ? ISEMAIL_DOMAINNOTFOUND
: false; // Domain doesn't actually exist
422 // Eliminate all other factors, and the one which remains must be the truth.
423 // (Sherlock Holmes, The Sign of Four)
424 return $diagnose ? ISEMAIL_VALID
: true;