[SPIP] +2.1.12
[velocampus/web/www.git] / www / plugins / auto / verifier / inc / is_email.php
1 <?php
2
3 if (!defined("_ECRIRE_INC_VERSION")) return;
4
5 /**
6 * @package isemail
7 * @author Dominic Sayers <dominic_sayers@hotmail.com>
8 * @copyright 2009 Dominic Sayers
9 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
10 * @link http://www.dominicsayers.com/isemail
11 * @version 1.16 - Added optional diagnosis codes (amended all lines with a return statement)
12 */
13
14 /*
15 Copyright (c) 2008-2010, Dominic Sayers
16 All rights reserved.
17
18 Redistribution and use in source and binary forms, with or without modification,
19 are permitted provided that the following conditions are met:
20
21 * Redistributions of source code must retain the above copyright notice, this
22 list of conditions and the following disclaimer.
23 * Redistributions in binary form must reproduce the above copyright notice,
24 this list of conditions and the following disclaimer in the documentation
25 and/or other materials provided with the distribution.
26 * Neither the name of Dominic Sayers nor the names of its contributors may be
27 used to endorse or promote products derived from this software without
28 specific prior written permission.
29
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
33 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
34 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
35 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
36 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
37 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
39 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42 /*.
43 require_module 'standard';
44 require_module 'pcre';
45 .*/
46 /*.mixed.*/ function is_email (/*.string.*/ $email, $checkDNS = false, $diagnose = false) {
47 // Check that $email is a valid address. Read the following RFCs to understand the constraints:
48 // (http://tools.ietf.org/html/rfc5322)
49 // (http://tools.ietf.org/html/rfc3696)
50 // (http://tools.ietf.org/html/rfc5321)
51 // (http://tools.ietf.org/html/rfc4291#section-2.2)
52 // (http://tools.ietf.org/html/rfc1123#section-2.1)
53
54 if (!defined('ISEMAIL_VALID')) {
55 define('ISEMAIL_VALID' , 0);
56 define('ISEMAIL_TOOLONG' , 1);
57 define('ISEMAIL_NOAT' , 2);
58 define('ISEMAIL_NOLOCALPART' , 3);
59 define('ISEMAIL_NODOMAIN' , 4);
60 define('ISEMAIL_ZEROLENGTHELEMENT' , 5);
61 define('ISEMAIL_BADCOMMENT_START' , 6);
62 define('ISEMAIL_BADCOMMENT_END' , 7);
63 define('ISEMAIL_UNESCAPEDDELIM' , 8);
64 define('ISEMAIL_EMPTYELEMENT' , 9);
65 define('ISEMAIL_UNESCAPEDSPECIAL' , 10);
66 define('ISEMAIL_LOCALTOOLONG' , 11);
67 define('ISEMAIL_IPV4BADPREFIX' , 12);
68 define('ISEMAIL_IPV6BADPREFIXMIXED' , 13);
69 define('ISEMAIL_IPV6BADPREFIX' , 14);
70 define('ISEMAIL_IPV6GROUPCOUNT' , 15);
71 define('ISEMAIL_IPV6DOUBLEDOUBLECOLON' , 16);
72 define('ISEMAIL_IPV6BADCHAR' , 17);
73 define('ISEMAIL_IPV6TOOMANYGROUPS' , 18);
74 define('ISEMAIL_TLD' , 19);
75 define('ISEMAIL_DOMAINEMPTYELEMENT' , 20);
76 define('ISEMAIL_DOMAINELEMENTTOOLONG' , 21);
77 define('ISEMAIL_DOMAINBADCHAR' , 22);
78 define('ISEMAIL_DOMAINTOOLONG' , 23);
79 define('ISEMAIL_TLDNUMERIC' , 24);
80 define('ISEMAIL_DOMAINNOTFOUND' , 25);
81 define('ISEMAIL_NOTDEFINED' , 99);
82 }
83
84 // the upper limit on address lengths should normally be considered to be 256
85 // (http://www.rfc-editor.org/errata_search.php?rfc=3696)
86 // NB I think John Klensin is misreading RFC 5321 and the the limit should actually be 254
87 // However, I will stick to the published number until it is changed.
88 //
89 // The maximum total length of a reverse-path or forward-path is 256
90 // characters (including the punctuation and element separators)
91 // (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
92 $emailLength = strlen($email);
93 if ($emailLength > 256) return $diagnose ? ISEMAIL_TOOLONG : false; // Too long
94
95 // Contemporary email addresses consist of a "local part" separated from
96 // a "domain part" (a fully-qualified domain name) by an at-sign ("@").
97 // (http://tools.ietf.org/html/rfc3696#section-3)
98 $atIndex = strrpos($email,'@');
99
100 if ($atIndex === false) return $diagnose ? ISEMAIL_NOAT : false; // No at-sign
101 if ($atIndex === 0) return $diagnose ? ISEMAIL_NOLOCALPART : false; // No local part
102 if ($atIndex === $emailLength - 1) return $diagnose ? ISEMAIL_NODOMAIN : false; // No domain part
103 // revision 1.14: Length test bug suggested by Andrew Campbell of Gloucester, MA
104
105 // Sanitize comments
106 // - remove nested comments, quotes and dots in comments
107 // - remove parentheses and dots from quoted strings
108 $braceDepth = 0;
109 $inQuote = false;
110 $escapeThisChar = false;
111
112 for ($i = 0; $i < $emailLength; ++$i) {
113 $char = $email[$i];
114 $replaceChar = false;
115
116 if ($char === '\\') {
117 $escapeThisChar = !$escapeThisChar; // Escape the next character?
118 } else {
119 switch ($char) {
120 case '(':
121 if ($escapeThisChar) {
122 $replaceChar = true;
123 } else {
124 if ($inQuote) {
125 $replaceChar = true;
126 } else {
127 if ($braceDepth++ > 0) $replaceChar = true; // Increment brace depth
128 }
129 }
130
131 break;
132 case ')':
133 if ($escapeThisChar) {
134 $replaceChar = true;
135 } else {
136 if ($inQuote) {
137 $replaceChar = true;
138 } else {
139 if (--$braceDepth > 0) $replaceChar = true; // Decrement brace depth
140 if ($braceDepth < 0) $braceDepth = 0;
141 }
142 }
143
144 break;
145 case '"':
146 if ($escapeThisChar) {
147 $replaceChar = true;
148 } else {
149 if ($braceDepth === 0) {
150 $inQuote = !$inQuote; // Are we inside a quoted string?
151 } else {
152 $replaceChar = true;
153 }
154 }
155
156 break;
157 case '.': // Dots don't help us either
158 if ($escapeThisChar) {
159 $replaceChar = true;
160 } else {
161 if ($braceDepth > 0) $replaceChar = true;
162 }
163
164 break;
165 default:
166 }
167
168 $escapeThisChar = false;
169 // if ($replaceChar) $email[$i] = 'x'; // Replace the offending character with something harmless
170 // revision 1.12: Line above replaced because PHPLint doesn't like that syntax
171 if ($replaceChar) $email = (string) substr_replace($email, 'x', $i, 1); // Replace the offending character with something harmless
172 }
173 }
174
175 $localPart = substr($email, 0, $atIndex);
176 $domain = substr($email, $atIndex + 1);
177 $FWS = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))"; // Folding white space
178 // Let's check the local part for RFC compliance...
179 //
180 // local-part = dot-atom / quoted-string / obs-local-part
181 // obs-local-part = word *("." word)
182 // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
183 //
184 // Problem: need to distinguish between "first.last" and "first"."last"
185 // (i.e. one element or two). And I suck at regexes.
186 $dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
187 $partLength = 0;
188
189 foreach ($dotArray as $element) {
190 // Remove any leading or trailing FWS
191 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
192 $elementLength = strlen($element);
193
194 if ($elementLength === 0) return $diagnose ? ISEMAIL_ZEROLENGTHELEMENT : false; // Can't have empty element (consecutive dots or dots at the start or end)
195 // revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP
196
197 // We need to remove any valid comments (i.e. those at the start or end of the element)
198 if ($element[0] === '(') {
199 $indexBrace = strpos($element, ')');
200 if ($indexBrace !== false) {
201 if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
202 return $diagnose ? ISEMAIL_BADCOMMENT_START : false; // Illegal characters in comment
203 }
204 $element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
205 $elementLength = strlen($element);
206 }
207 }
208
209 if ($element[$elementLength - 1] === ')') {
210 $indexBrace = strrpos($element, '(');
211 if ($indexBrace !== false) {
212 if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
213 return $diagnose ? ISEMAIL_BADCOMMENT_END : false; // Illegal characters in comment
214 }
215 $element = substr($element, 0, $indexBrace);
216 $elementLength = strlen($element);
217 }
218 }
219
220 // Remove any leading or trailing FWS around the element (inside any comments)
221 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
222
223 // What's left counts towards the maximum length for this part
224 if ($partLength > 0) $partLength++; // for the dot
225 $partLength += strlen($element);
226
227 // Each dot-delimited component can be an atom or a quoted string
228 // (because of the obs-local-part provision)
229 if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
230 // Quoted-string tests:
231 //
232 // Remove any FWS
233 $element = preg_replace("/(?<!\\\\)$FWS/", '', $element);
234 // My regex skillz aren't up to distinguishing between \" \\" \\\" \\\\" etc.
235 // So remove all \\ from the string first...
236 $element = preg_replace('/\\\\\\\\/', ' ', $element);
237 if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDDELIM : false; // ", CR, LF and NUL must be escaped, "" is too short
238 } else {
239 // Unquoted string tests:
240 //
241 // Period (".") may...appear, but may not be used to start or end the
242 // local part, nor may two or more consecutive periods appear.
243 // (http://tools.ietf.org/html/rfc3696#section-3)
244 //
245 // A zero-length element implies a period at the beginning or end of the
246 // local part, or two periods together. Either way it's not allowed.
247 if ($element === '') return $diagnose ? ISEMAIL_EMPTYELEMENT : false; // Dots in wrong place
248
249 // Any ASCII graphic (printing) character other than the
250 // at-sign ("@"), backslash, double quote, comma, or square brackets may
251 // appear without quoting. If any of that list of excluded characters
252 // are to appear, they must be quoted
253 // (http://tools.ietf.org/html/rfc3696#section-3)
254 //
255 // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
256 if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDSPECIAL : false; // These characters must be in a quoted string
257 }
258 }
259
260 if ($partLength > 64) return $diagnose ? ISEMAIL_LOCALTOOLONG : false; // Local part must be 64 characters or less
261
262 // Now let's check the domain part...
263
264 // The domain name can also be replaced by an IP address in square brackets
265 // (http://tools.ietf.org/html/rfc3696#section-3)
266 // (http://tools.ietf.org/html/rfc5321#section-4.1.3)
267 // (http://tools.ietf.org/html/rfc4291#section-2.2)
268 if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
269 // It's an address-literal
270 $addressLiteral = substr($domain, 1, strlen($domain) - 2);
271 $matchesIP = array();
272
273 // Extract IPv4 part from the end of the address-literal (if there is one)
274 if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
275 $index = strrpos($addressLiteral, $matchesIP[0]);
276
277 if ($index === 0) {
278 // Nothing there except a valid IPv4 address, so...
279 return $diagnose ? ISEMAIL_VALID : true;
280 } else {
281 // Assume it's an attempt at a mixed address (IPv6 + IPv4)
282 if ($addressLiteral[$index - 1] !== ':') return $diagnose ? ISEMAIL_IPV4BADPREFIX : false; // Character preceding IPv4 address must be ':'
283 if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIXMIXED : false; // RFC5321 section 4.1.3
284
285 $IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
286 $groupMax = 6;
287 }
288 } else {
289 // It must be an attempt at pure IPv6
290 if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIX : false; // RFC5321 section 4.1.3
291 $IPv6 = substr($addressLiteral, 5);
292 $groupMax = 8;
293 }
294
295 $groupCount = preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
296 $index = strpos($IPv6,'::');
297
298 if ($index === false) {
299 // We need exactly the right number of groups
300 if ($groupCount !== $groupMax) return $diagnose ? ISEMAIL_IPV6GROUPCOUNT : false; // RFC5321 section 4.1.3
301 } else {
302 if ($index !== strrpos($IPv6,'::')) return $diagnose ? ISEMAIL_IPV6DOUBLEDOUBLECOLON : false; // More than one '::'
303 $groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
304 if ($groupCount > $groupMax) return $diagnose ? ISEMAIL_IPV6TOOMANYGROUPS : false; // Too many IPv6 groups in address
305 }
306
307 // Check for unmatched characters
308 array_multisort($matchesIP[1], SORT_DESC);
309 if ($matchesIP[1][0] !== '') return $diagnose ? ISEMAIL_IPV6BADCHAR : false; // Illegal characters in address
310
311 // It's a valid IPv6 address, so...
312 return $diagnose ? ISEMAIL_VALID : true;
313 } else {
314 // It's a domain name...
315
316 // The syntax of a legal Internet host name was specified in RFC-952
317 // One aspect of host name syntax is hereby changed: the
318 // restriction on the first character is relaxed to allow either a
319 // letter or a digit.
320 // (http://tools.ietf.org/html/rfc1123#section-2.1)
321 //
322 // NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
323 //
324 // Most common applications, including email and the Web, will generally not
325 // permit...escaped strings
326 // (http://tools.ietf.org/html/rfc3696#section-2)
327 //
328 // the better strategy has now become to make the "at least one period" test,
329 // to verify LDH conformance (including verification that the apparent TLD name
330 // is not all-numeric)
331 // (http://tools.ietf.org/html/rfc3696#section-2)
332 //
333 // Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
334 // labels for SMTP clients or servers
335 // (http://tools.ietf.org/html/rfc5321#section-4.1.2)
336 //
337 // RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
338 // (http://tools.ietf.org/html/rfc5321#section-4.1.2)
339 $dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
340 $partLength = 0;
341 $element = ''; // Since we use $element after the foreach loop let's make sure it has a value
342 // revision 1.13: Line above added because PHPLint now checks for Definitely Assigned Variables
343
344 if (count($dotArray) === 1) return $diagnose ? ISEMAIL_TLD : false; // Mail host can't be a TLD (cite? What about localhost?)
345
346 foreach ($dotArray as $element) {
347 // Remove any leading or trailing FWS
348 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
349 $elementLength = strlen($element);
350
351 // Each dot-delimited component must be of type atext
352 // A zero-length element implies a period at the beginning or end of the
353 // local part, or two periods together. Either way it's not allowed.
354 if ($elementLength === 0) return $diagnose ? ISEMAIL_DOMAINEMPTYELEMENT : false; // Dots in wrong place
355 // revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP
356
357 // Then we need to remove all valid comments (i.e. those at the start or end of the element
358 if ($element[0] === '(') {
359 $indexBrace = strpos($element, ')');
360 if ($indexBrace !== false) {
361 if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
362 return $diagnose ? ISEMAIL_BADCOMMENTSTART : false; // Illegal characters in comment
363 }
364 $element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
365 $elementLength = strlen($element);
366 }
367 }
368
369 if ($element[$elementLength - 1] === ')') {
370 $indexBrace = strrpos($element, '(');
371 if ($indexBrace !== false) {
372 if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0)
373 return $diagnose ? ISEMAIL_BADCOMMENTEND : false; // Illegal characters in comment
374
375 $element = substr($element, 0, $indexBrace);
376 $elementLength = strlen($element);
377 }
378 }
379
380 // Remove any leading or trailing FWS around the element (inside any comments)
381 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
382
383 // What's left counts towards the maximum length for this part
384 if ($partLength > 0) $partLength++; // for the dot
385 $partLength += strlen($element);
386
387 // The DNS defines domain name syntax very generally -- a
388 // string of labels each containing up to 63 8-bit octets,
389 // separated by dots, and with a maximum total of 255
390 // octets.
391 // (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
392 if ($elementLength > 63) return $diagnose ? ISEMAIL_DOMAINELEMENTTOOLONG : false; // Label must be 63 characters or less
393
394 // Any ASCII graphic (printing) character other than the
395 // at-sign ("@"), backslash, double quote, comma, or square brackets may
396 // appear without quoting. If any of that list of excluded characters
397 // are to appear, they must be quoted
398 // (http://tools.ietf.org/html/rfc3696#section-3)
399 //
400 // If the hyphen is used, it is not permitted to appear at
401 // either the beginning or end of a label.
402 // (http://tools.ietf.org/html/rfc3696#section-2)
403 //
404 // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
405 if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) {
406 return $diagnose ? ISEMAIL_DOMAINBADCHAR : false;
407 }
408 }
409
410 if ($partLength > 255) return $diagnose ? ISEMAIL_DOMAINTOOLONG : false; // Domain part must be 255 characters or less (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
411
412 if (preg_match('/^[0-9]+$/', $element) > 0) return $diagnose ? ISEMAIL_TLDNUMERIC : false; // TLD can't be all-numeric (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2)
413
414 // Check DNS?
415 if ($checkDNS && function_exists('checkdnsrr')) {
416 if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
417 return $diagnose ? ISEMAIL_DOMAINNOTFOUND : false; // Domain doesn't actually exist
418 }
419 }
420 }
421
422 // Eliminate all other factors, and the one which remains must be the truth.
423 // (Sherlock Holmes, The Sign of Four)
424 return $diagnose ? ISEMAIL_VALID : true;
425 }
426 ?>