[SPIP][PLUGINS] v3.0-->v3.2
[lhc/web/www.git] / www / plugins / verifier / inc / is_email.php
1 <?php
2 /**
3 * To validate an email address according to RFCs 5321, 5322 and others
4 *
5 * Copyright © 2008-2011, Dominic Sayers
6 * Test schema documentation Copyright © 2011, Daniel Marschall
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without modification,
10 * are permitted provided that the following conditions are met:
11 *
12 * - Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * - Neither the name of Dominic Sayers nor the names of its contributors may be
18 * used to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * @package is_email
33 * @author Dominic Sayers <dominic@sayers.cc>
34 * @copyright 2008-2011 Dominic Sayers
35 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
36 * @link http://www.dominicsayers.com/isemail
37 * @version 3.04.1 - Changed my link to http://isemail.info throughout
38 */
39
40 // The quality of this code has been improved greatly by using PHPLint
41 // Copyright (c) 2010 Umberto Salsi
42 // This is free software; see the license for copying conditions.
43 // More info: http://www.icosaedro.it/phplint/
44 /*.
45 require_module 'standard';
46 require_module 'pcre';
47 .*/
48
49 if (!defined('ISEMAIL_VALID')) {
50 /*:diagnostic constants start:*/
51 // This part of the code is generated using data from test/meta.xml. Beware of making manual alterations
52 // Categories
53 define('ISEMAIL_VALID_CATEGORY', 1);
54 define('ISEMAIL_DNSWARN', 7);
55 define('ISEMAIL_RFC5321', 15);
56 define('ISEMAIL_CFWS', 31);
57 define('ISEMAIL_DEPREC', 63);
58 define('ISEMAIL_RFC5322', 127);
59 define('ISEMAIL_ERR', 255);
60
61 // Diagnoses
62 // Address is valid
63 define('ISEMAIL_VALID', 0);
64 // Address is valid but a DNS check was not successful
65 define('ISEMAIL_DNSWARN_NO_MX_RECORD', 5);
66 define('ISEMAIL_DNSWARN_NO_RECORD', 6);
67 // Address is valid for SMTP but has unusual elements
68 define('ISEMAIL_RFC5321_TLD', 9);
69 define('ISEMAIL_RFC5321_TLDNUMERIC', 10);
70 define('ISEMAIL_RFC5321_QUOTEDSTRING', 11);
71 define('ISEMAIL_RFC5321_ADDRESSLITERAL', 12);
72 define('ISEMAIL_RFC5321_IPV6DEPRECATED', 13);
73 // Address is valid within the message but cannot be used unmodified for the envelope
74 define('ISEMAIL_CFWS_COMMENT', 17);
75 define('ISEMAIL_CFWS_FWS', 18);
76 // Address contains deprecated elements but may still be valid in restricted contexts
77 define('ISEMAIL_DEPREC_LOCALPART', 33);
78 define('ISEMAIL_DEPREC_FWS', 34);
79 define('ISEMAIL_DEPREC_QTEXT', 35);
80 define('ISEMAIL_DEPREC_QP', 36);
81 define('ISEMAIL_DEPREC_COMMENT', 37);
82 define('ISEMAIL_DEPREC_CTEXT', 38);
83 define('ISEMAIL_DEPREC_CFWS_NEAR_AT', 49);
84 // The address is only valid according to the broad definition of RFC 5322. It is otherwise invalid.
85 define('ISEMAIL_RFC5322_DOMAIN', 65);
86 define('ISEMAIL_RFC5322_TOOLONG', 66);
87 define('ISEMAIL_RFC5322_LOCAL_TOOLONG', 67);
88 define('ISEMAIL_RFC5322_DOMAIN_TOOLONG', 68);
89 define('ISEMAIL_RFC5322_LABEL_TOOLONG', 69);
90 define('ISEMAIL_RFC5322_DOMAINLITERAL', 70);
91 define('ISEMAIL_RFC5322_DOMLIT_OBSDTEXT', 71);
92 define('ISEMAIL_RFC5322_IPV6_GRPCOUNT', 72);
93 define('ISEMAIL_RFC5322_IPV6_2X2XCOLON', 73);
94 define('ISEMAIL_RFC5322_IPV6_BADCHAR', 74);
95 define('ISEMAIL_RFC5322_IPV6_MAXGRPS', 75);
96 define('ISEMAIL_RFC5322_IPV6_COLONSTRT', 76);
97 define('ISEMAIL_RFC5322_IPV6_COLONEND', 77);
98 // Address is invalid for any purpose
99 define('ISEMAIL_ERR_EXPECTING_DTEXT', 129);
100 define('ISEMAIL_ERR_NOLOCALPART', 130);
101 define('ISEMAIL_ERR_NODOMAIN', 131);
102 define('ISEMAIL_ERR_CONSECUTIVEDOTS', 132);
103 define('ISEMAIL_ERR_ATEXT_AFTER_CFWS', 133);
104 define('ISEMAIL_ERR_ATEXT_AFTER_QS', 134);
105 define('ISEMAIL_ERR_ATEXT_AFTER_DOMLIT', 135);
106 define('ISEMAIL_ERR_EXPECTING_QPAIR', 136);
107 define('ISEMAIL_ERR_EXPECTING_ATEXT', 137);
108 define('ISEMAIL_ERR_EXPECTING_QTEXT', 138);
109 define('ISEMAIL_ERR_EXPECTING_CTEXT', 139);
110 define('ISEMAIL_ERR_BACKSLASHEND', 140);
111 define('ISEMAIL_ERR_DOT_START', 141);
112 define('ISEMAIL_ERR_DOT_END', 142);
113 define('ISEMAIL_ERR_DOMAINHYPHENSTART', 143);
114 define('ISEMAIL_ERR_DOMAINHYPHENEND', 144);
115 define('ISEMAIL_ERR_UNCLOSEDQUOTEDSTR', 145);
116 define('ISEMAIL_ERR_UNCLOSEDCOMMENT', 146);
117 define('ISEMAIL_ERR_UNCLOSEDDOMLIT', 147);
118 define('ISEMAIL_ERR_FWS_CRLF_X2', 148);
119 define('ISEMAIL_ERR_FWS_CRLF_END', 149);
120 define('ISEMAIL_ERR_CR_NO_LF', 150);
121 // End of generated code
122 /*:diagnostic constants end:*/
123
124 // function control
125 define('ISEMAIL_THRESHOLD', 16);
126
127 // Email parts
128 define('ISEMAIL_COMPONENT_LOCALPART', 0);
129 define('ISEMAIL_COMPONENT_DOMAIN', 1);
130 define('ISEMAIL_COMPONENT_LITERAL', 2);
131 define('ISEMAIL_CONTEXT_COMMENT', 3);
132 define('ISEMAIL_CONTEXT_FWS', 4);
133 define('ISEMAIL_CONTEXT_QUOTEDSTRING', 5);
134 define('ISEMAIL_CONTEXT_QUOTEDPAIR', 6);
135
136 // Miscellaneous string constants
137 define('ISEMAIL_STRING_AT', '@');
138 define('ISEMAIL_STRING_BACKSLASH', '\\');
139 define('ISEMAIL_STRING_DOT', '.');
140 define('ISEMAIL_STRING_DQUOTE', '"');
141 define('ISEMAIL_STRING_OPENPARENTHESIS', '(');
142 define('ISEMAIL_STRING_CLOSEPARENTHESIS', ')');
143 define('ISEMAIL_STRING_OPENSQBRACKET', '[');
144 define('ISEMAIL_STRING_CLOSESQBRACKET', ']');
145 define('ISEMAIL_STRING_HYPHEN', '-');
146 define('ISEMAIL_STRING_COLON', ':');
147 define('ISEMAIL_STRING_DOUBLECOLON', '::');
148 define('ISEMAIL_STRING_SP', ' ');
149 define('ISEMAIL_STRING_HTAB', "\t");
150 define('ISEMAIL_STRING_CR', "\r");
151 define('ISEMAIL_STRING_LF', "\n");
152 define('ISEMAIL_STRING_IPV6TAG', 'IPv6:');
153 // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3)
154 define('ISEMAIL_STRING_SPECIALS', '()<>[]:;@\\,."');
155 }
156
157 /**
158 * Check that an email address conforms to RFCs 5321, 5322 and others
159 *
160 * As of Version 3.0, we are now distinguishing clearly between a Mailbox
161 * as defined by RFC 5321 and an addr-spec as defined by RFC 5322. Depending
162 * on the context, either can be regarded as a valid email address. The
163 * RFC 5321 Mailbox specification is more restrictive (comments, white space
164 * and obsolete forms are not allowed)
165 *
166 * @param string $email The email address to check
167 * @param boolean $checkDNS If true then a DNS check for MX records will be made
168 * @param mixed $errorlevel Determines the boundary between valid and invalid addresses.
169 * Status codes above this number will be returned as-is,
170 * status codes below will be returned as ISEMAIL_VALID. Thus the
171 * calling program can simply look for ISEMAIL_VALID if it is
172 * only interested in whether an address is valid or not. The
173 * errorlevel will determine how "picky" is_email() is about
174 * the address.
175 *
176 * If omitted or passed as false then is_email() will return
177 * true or false rather than an integer error or warning.
178 *
179 * NB Note the difference between $errorlevel = false and
180 * $errorlevel = 0
181 * @param array $parsedata If passed, returns the parsed address components
182 * @return bool|int
183 */
184 /*.mixed.*/ function is_email($email, $checkDNS = false, $errorlevel = false, &$parsedata = array()) {
185 // Check that $email is a valid address. Read the following RFCs to understand the constraints:
186 // (http://tools.ietf.org/html/rfc5321)
187 // (http://tools.ietf.org/html/rfc5322)
188 // (http://tools.ietf.org/html/rfc4291#section-2.2)
189 // (http://tools.ietf.org/html/rfc1123#section-2.1)
190 // (http://tools.ietf.org/html/rfc3696) (guidance only)
191 // version 2.0: Enhance $diagnose parameter to $errorlevel
192 // version 3.0: Introduced status categories
193 // revision 3.1: BUG: $parsedata was passed by value instead of by reference
194
195 if (is_bool($errorlevel)) {
196 $threshold = ISEMAIL_VALID;
197 $diagnose = (bool) $errorlevel;
198 } else {
199 $diagnose = true;
200
201 switch ((int) $errorlevel) {
202 case E_WARNING:
203 $threshold = ISEMAIL_THRESHOLD;
204 break; // For backward compatibility
205 case E_ERROR:
206 $threshold = ISEMAIL_VALID;
207 break; // For backward compatibility
208 default:
209 $threshold = (int) $errorlevel;
210 }
211 }
212
213 $return_status = array(ISEMAIL_VALID);
214
215 // Parse the address into components, character by character
216 $raw_length = strlen($email);
217 $context = ISEMAIL_COMPONENT_LOCALPART; // Where we are
218 $context_stack = array($context); // Where we have been
219 $context_prior = ISEMAIL_COMPONENT_LOCALPART; // Where we just came from
220 $token = ''; // The current character
221 $token_prior = ''; // The previous character
222 $parsedata = array(
223 ISEMAIL_COMPONENT_LOCALPART => '',
224 ISEMAIL_COMPONENT_DOMAIN => ''
225 ); // For the components of the address
226
227 $atomlist = array(
228 ISEMAIL_COMPONENT_LOCALPART => array(''),
229 ISEMAIL_COMPONENT_DOMAIN => array('')
230 ); // For the dot-atom elements of the address
231 $element_count = 0;
232 $element_len = 0;
233 $hyphen_flag = false; // Hyphen cannot occur at the end of a subdomain
234 $end_or_die = false; // CFWS can only appear at the end of the element
235
236 //-echo "<table style=\"clear:left;\">"; // debug
237 for ($i = 0; $i < $raw_length; $i++) {
238 $token = $email[$i];
239 //-echo "<tr><td><strong>$context|",(($end_or_die) ? 'true' : 'false'),
240 //"|$token|" . max($return_status) . "</strong></td>"; // debug
241
242 switch ($context) {
243 //-------------------------------------------------------------
244 // local-part
245 //-------------------------------------------------------------
246 case ISEMAIL_COMPONENT_LOCALPART:
247 // http://tools.ietf.org/html/rfc5322#section-3.4.1
248 // local-part = dot-atom / quoted-string / obs-local-part
249 //
250 // dot-atom = [CFWS] dot-atom-text [CFWS]
251 //
252 // dot-atom-text = 1*atext *("." 1*atext)
253 //
254 // quoted-string = [CFWS]
255 // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
256 // [CFWS]
257 //
258 // obs-local-part = word *("." word)
259 //
260 // word = atom / quoted-string
261 //
262 // atom = [CFWS] 1*atext [CFWS]
263 switch ($token) {
264 // Comment
265 case ISEMAIL_STRING_OPENPARENTHESIS:
266 if ($element_len === 0) {
267 // Comments are OK at the beginning of an element
268 $return_status[] = ($element_count === 0) ?
269 ISEMAIL_CFWS_COMMENT : ISEMAIL_DEPREC_COMMENT;
270 } else {
271 $return_status[] = ISEMAIL_CFWS_COMMENT;
272 $end_or_die = true;
273 // We can't start a comment in the middle of an element, so this better be the end
274 }
275 $context_stack[] = $context;
276 $context = ISEMAIL_CONTEXT_COMMENT;
277 break;
278 // Next dot-atom element
279 case ISEMAIL_STRING_DOT:
280 if ($element_len === 0) {
281 // Another dot, already?
282 // Fatal error
283 $return_status[] = ($element_count === 0)
284 ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS;
285 } else {
286 // The entire local-part can be a quoted string for RFC 5321
287 // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
288 if ($end_or_die) {
289 $return_status[] = ISEMAIL_DEPREC_LOCALPART;
290 }
291 }
292 // CFWS & quoted strings are OK again now we're at the beginning of an element
293 // (although they are obsolete forms)
294 $end_or_die = false;
295 $element_len = 0;
296 $element_count++;
297 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
298 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] = '';
299 break;
300 // Quoted string
301 case ISEMAIL_STRING_DQUOTE:
302 if ($element_len === 0) {
303 // The entire local-part can be a quoted string for RFC 5321
304 // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
305 $return_status[] = ($element_count === 0)
306 ? ISEMAIL_RFC5321_QUOTEDSTRING : ISEMAIL_DEPREC_LOCALPART;
307
308 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
309 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
310 $element_len++;
311 $end_or_die = true; // Quoted string must be the entire element
312 $context_stack[] = $context;
313 $context = ISEMAIL_CONTEXT_QUOTEDSTRING;
314 } else {
315 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
316 }
317
318 break;
319 // Folding White Space
320 case ISEMAIL_STRING_CR:
321 case ISEMAIL_STRING_SP:
322 case ISEMAIL_STRING_HTAB:
323 if (($token === ISEMAIL_STRING_CR)
324 && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {
325 $return_status[] = ISEMAIL_ERR_CR_NO_LF;
326 break;
327 } // Fatal error
328
329 if ($element_len === 0) {
330 $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_FWS : ISEMAIL_DEPREC_FWS;
331 } else {
332 // We can't start FWS in the middle of an element, so this better be the end
333 $end_or_die = true;
334 }
335
336 $context_stack[] = $context;
337 $context = ISEMAIL_CONTEXT_FWS;
338 $token_prior = $token;
339
340 break;
341 // @
342 case ISEMAIL_STRING_AT:
343 // At this point we should have a valid local-part
344 if (count($context_stack) !== 1) {
345 die('Unexpected item on context stack');
346 }
347
348 if ($parsedata[ISEMAIL_COMPONENT_LOCALPART] === '') {
349 $return_status[] = ISEMAIL_ERR_NOLOCALPART; // Fatal error
350 } elseif ($element_len === 0) {
351 $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error
352 } elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART]) > 64) {
353 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1
354 // The maximum total length of a user name or other local-part is 64
355 // octets.
356 $return_status[] = ISEMAIL_RFC5322_LOCAL_TOOLONG;
357 } elseif (($context_prior === ISEMAIL_CONTEXT_COMMENT) || ($context_prior === ISEMAIL_CONTEXT_FWS)) {
358 // http://tools.ietf.org/html/rfc5322#section-3.4.1
359 // Comments and folding white space
360 // SHOULD NOT be used around the "@" in the addr-spec.
361 //
362 // http://tools.ietf.org/html/rfc2119
363 // 4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that
364 // there may exist valid reasons in particular circumstances when the
365 // particular behavior is acceptable or even useful, but the full
366 // implications should be understood and the case carefully weighed
367 // before implementing any behavior described with this label.
368 $return_status[] = ISEMAIL_DEPREC_CFWS_NEAR_AT;
369 }
370 // Clear everything down for the domain parsing
371 $context = ISEMAIL_COMPONENT_DOMAIN; // Where we are
372 $context_stack = array($context); // Where we have been
373 $element_count = 0;
374 $element_len = 0;
375 $end_or_die = false; // CFWS can only appear at the end of the element
376
377 break;
378 // atext
379 default:
380 // http://tools.ietf.org/html/rfc5322#section-3.2.3
381 // atext = ALPHA / DIGIT / ; Printable US-ASCII
382 // "!" / "#" / ; characters not including
383 // "$" / "%" / ; specials. Used for atoms.
384 // "&" / "'" /
385 // "*" / "+" /
386 // "-" / "/" /
387 // "=" / "?" /
388 // "^" / "_" /
389 // "`" / "{" /
390 // "|" / "}" /
391 // "~"
392 if ($end_or_die) {
393 // We have encountered atext where it is no longer valid
394 switch ($context_prior) {
395 case ISEMAIL_CONTEXT_COMMENT:
396 case ISEMAIL_CONTEXT_FWS:
397 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;
398 break;
399 case ISEMAIL_CONTEXT_QUOTEDSTRING:
400 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_QS;
401 break;
402 default:
403 die("More atext found where none is allowed, but unrecognised prior context: $context_prior");
404 }
405 } else {
406 $context_prior = $context;
407 $ord = ord($token);
408 if (($ord < 33) || ($ord > 126) || ($ord === 10)
409 || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token)))) {
410 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
411 }
412
413 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
414 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
415 $element_len++;
416 }
417 }
418 break;
419 //-------------------------------------------------------------
420 // Domain
421 //-------------------------------------------------------------
422 case ISEMAIL_COMPONENT_DOMAIN:
423 // http://tools.ietf.org/html/rfc5322#section-3.4.1
424 // domain = dot-atom / domain-literal / obs-domain
425 //
426 // dot-atom = [CFWS] dot-atom-text [CFWS]
427 //
428 // dot-atom-text = 1*atext *("." 1*atext)
429 //
430 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
431 //
432 // dtext = %d33-90 / ; Printable US-ASCII
433 // %d94-126 / ; characters not including
434 // obs-dtext ; "[", "]", or "\"
435 //
436 // obs-domain = atom *("." atom)
437 //
438 // atom = [CFWS] 1*atext [CFWS]
439
440
441 // http://tools.ietf.org/html/rfc5321#section-4.1.2
442 // Mailbox = Local-part "@" ( Domain / address-literal )
443 //
444 // Domain = sub-domain *("." sub-domain)
445 //
446 // address-literal = "[" ( IPv4-address-literal /
447 // IPv6-address-literal /
448 // General-address-literal ) "]"
449 // ; See Section 4.1.3
450
451 // http://tools.ietf.org/html/rfc5322#section-3.4.1
452 // Note: A liberal syntax for the domain portion of addr-spec is
453 // given here. However, the domain portion contains addressing
454 // information specified by and used in other protocols (e.g.,
455 // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
456 // incumbent upon implementations to conform to the syntax of
457 // addresses for the context in which they are used.
458 // is_email() author's note: it's not clear how to interpret this in
459 // the context of a general email address validator. The conclusion I
460 // have reached is this: "addressing information" must comply with
461 // RFC 5321 (and in turn RFC 1035), anything that is "semantically
462 // invisible" must comply only with RFC 5322.
463 switch ($token) {
464 // Comment
465 case ISEMAIL_STRING_OPENPARENTHESIS:
466 if ($element_len === 0) {
467 // Comments at the start of the domain are deprecated in the text
468 // Comments at the start of a subdomain are obs-domain
469 // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
470 $return_status[] = ($element_count === 0) ?
471 ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_COMMENT;
472 } else {
473 $return_status[] = ISEMAIL_CFWS_COMMENT;
474 // We can't start a comment in the middle of an element, so this better be the end
475 $end_or_die = true;
476 }
477
478 $context_stack[] = $context;
479 $context = ISEMAIL_CONTEXT_COMMENT;
480 break;
481 // Next dot-atom element
482 case ISEMAIL_STRING_DOT:
483 if ($element_len === 0) {
484 // Another dot, already?
485 $return_status[] = ($element_count === 0) ?
486 ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error
487 } elseif ($hyphen_flag) {
488 // Previous subdomain ended in a hyphen
489 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error
490 } else {
491 // Nowhere in RFC 5321 does it say explicitly that the
492 // domain part of a Mailbox must be a valid domain according
493 // to the DNS standards set out in RFC 1035, but this *is*
494 // implied in several places. For instance, wherever the idea
495 // of host routing is discussed the RFC says that the domain
496 // must be looked up in the DNS. This would be nonsense unless
497 // the domain was designed to be a valid DNS domain. Hence we
498 // must conclude that the RFC 1035 restriction on label length
499 // also applies to RFC 5321 domains.
500 //
501 // http://tools.ietf.org/html/rfc1035#section-2.3.4
502 // labels 63 octets or less
503 if ($element_len > 63) {
504 $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;
505 }
506 }
507 // CFWS is OK again now we're at the beginning of an element
508 // (although it may be obsolete CFWS)
509 $end_or_die = false;
510 $element_len = 0;
511 $element_count++;
512 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] = '';
513 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
514
515 break;
516 // Domain literal
517 case ISEMAIL_STRING_OPENSQBRACKET:
518 if ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') {
519 $end_or_die = true; // Domain literal must be the only component
520 $element_len++;
521 $context_stack[] = $context;
522 $context = ISEMAIL_COMPONENT_LITERAL;
523 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
524 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
525 $parsedata[ISEMAIL_COMPONENT_LITERAL] = '';
526 } else {
527 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
528 }
529
530 break;
531 // Folding White Space
532 case ISEMAIL_STRING_CR:
533 case ISEMAIL_STRING_SP:
534 case ISEMAIL_STRING_HTAB:
535 if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length)
536 || ($email[$i] !== ISEMAIL_STRING_LF))) {
537 $return_status[] = ISEMAIL_ERR_CR_NO_LF;
538 break;
539 } // Fatal error
540
541 if ($element_len === 0) {
542 $return_status[] = ($element_count === 0) ?
543 ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_FWS;
544 } else {
545 $return_status[] = ISEMAIL_CFWS_FWS;
546 // We can't start FWS in the middle of an element, so this better be the end
547 $end_or_die = true;
548 }
549
550 $context_stack[] = $context;
551 $context = ISEMAIL_CONTEXT_FWS;
552 $token_prior = $token;
553 break;
554 // atext
555 default:
556 // RFC 5322 allows any atext...
557 // http://tools.ietf.org/html/rfc5322#section-3.2.3
558 // atext = ALPHA / DIGIT / ; Printable US-ASCII
559 // "!" / "#" / ; characters not including
560 // "$" / "%" / ; specials. Used for atoms.
561 // "&" / "'" /
562 // "*" / "+" /
563 // "-" / "/" /
564 // "=" / "?" /
565 // "^" / "_" /
566 // "`" / "{" /
567 // "|" / "}" /
568 // "~"
569
570 // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123)
571 // http://tools.ietf.org/html/rfc5321#section-4.1.2
572 // sub-domain = Let-dig [Ldh-str]
573 //
574 // Let-dig = ALPHA / DIGIT
575 //
576 // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
577 //
578 if ($end_or_die) {
579 // We have encountered atext where it is no longer valid
580 switch ($context_prior) {
581 case ISEMAIL_CONTEXT_COMMENT:
582 case ISEMAIL_CONTEXT_FWS:
583 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;
584 break;
585 case ISEMAIL_COMPONENT_LITERAL:
586 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_DOMLIT;
587 break;
588 default:
589 die("More atext found where none is allowed, but unrecognised prior context: $context_prior");
590 }
591 }
592
593 $ord = ord($token);
594 $hyphen_flag = false; // Assume this token isn't a hyphen unless we discover it is
595
596 if (($ord < 33) || ($ord > 126) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token)))) {
597 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
598 } elseif ($token === ISEMAIL_STRING_HYPHEN) {
599 if ($element_len === 0) {
600 // Hyphens can't be at the beginning of a subdomain
601 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENSTART; // Fatal error
602 }
603
604 $hyphen_flag = true;
605 } elseif (!(($ord > 47 && $ord < 58)
606 || ($ord > 64 && $ord < 91)
607 || ($ord > 96 && $ord < 123))) {
608 // Not an RFC 5321 subdomain, but still OK by RFC 5322
609 $return_status[] = ISEMAIL_RFC5322_DOMAIN;
610 }
611
612 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
613 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
614 $element_len++;
615 }
616 break;
617 //-------------------------------------------------------------
618 // Domain literal
619 //-------------------------------------------------------------
620 case ISEMAIL_COMPONENT_LITERAL:
621 // http://tools.ietf.org/html/rfc5322#section-3.4.1
622 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
623 //
624 // dtext = %d33-90 / ; Printable US-ASCII
625 // %d94-126 / ; characters not including
626 // obs-dtext ; "[", "]", or "\"
627 //
628 // obs-dtext = obs-NO-WS-CTL / quoted-pair
629 switch ($token) {
630 // End of domain literal
631 case ISEMAIL_STRING_CLOSESQBRACKET:
632 if ((int) max($return_status) < ISEMAIL_DEPREC) {
633 // Could be a valid RFC 5321 address literal, so let's check
634
635 // http://tools.ietf.org/html/rfc5321#section-4.1.2
636 // address-literal = "[" ( IPv4-address-literal /
637 // IPv6-address-literal /
638 // General-address-literal ) "]"
639 // ; See Section 4.1.3
640 //
641 // http://tools.ietf.org/html/rfc5321#section-4.1.3
642 // IPv4-address-literal = Snum 3("." Snum)
643 //
644 // IPv6-address-literal = "IPv6:" IPv6-addr
645 //
646 // General-address-literal = Standardized-tag ":" 1*dcontent
647 //
648 // Standardized-tag = Ldh-str
649 // ; Standardized-tag MUST be specified in a
650 // ; Standards-Track RFC and registered with IANA
651 //
652 // dcontent = %d33-90 / ; Printable US-ASCII
653 // %d94-126 ; excl. "[", "\", "]"
654 //
655 // Snum = 1*3DIGIT
656 // ; representing a decimal integer
657 // ; value in the range 0 through 255
658 //
659 // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
660 //
661 // IPv6-hex = 1*4HEXDIG
662 //
663 // IPv6-full = IPv6-hex 7(":" IPv6-hex)
664 //
665 // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
666 // [IPv6-hex *5(":" IPv6-hex)]
667 // ; The "::" represents at least 2 16-bit groups of
668 // ; zeros. No more than 6 groups in addition to the
669 // ; "::" may be present.
670 //
671 // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
672 //
673 // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
674 // [IPv6-hex *3(":" IPv6-hex) ":"]
675 // IPv4-address-literal
676 // ; The "::" represents at least 2 16-bit groups of
677 // ; zeros. No more than 4 groups in addition to the
678 // ; "::" and IPv4-address-literal may be present.
679 //
680 // is_email() author's note: We can't use ip2long() to validate
681 // IPv4 addresses because it accepts abbreviated addresses
682 // (xxx.xxx.xxx), expanding the last group to complete the address.
683 // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3
684 // at least) -- see http://bugs.php.net/bug.php?id=53236 for example
685 $max_groups = 8;
686 $matchesIP = array();
687 /*.mixed.*/ $index = false;
688 $addressliteral = $parsedata[ISEMAIL_COMPONENT_LITERAL];
689
690 // Extract IPv4 part from the end of the address-literal (if there is one)
691 if (preg_match(
692 '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
693 $addressliteral,
694 $matchesIP
695 ) > 0) {
696 $index = strrpos($addressliteral, $matchesIP[0]);
697 if ($index !== 0) {
698 // Convert IPv4 part to IPv6 format for further testing
699 $addressliteral = substr($addressliteral, 0, $index) . '0:0';
700 }
701 }
702
703 if ($index === 0) {
704 // Nothing there except a valid IPv4 address, so...
705 $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;
706 } elseif (strncasecmp($addressliteral, ISEMAIL_STRING_IPV6TAG, 5) !== 0) {
707 $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;
708 } else {
709 $IPv6 = substr($addressliteral, 5);
710 // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
711 $matchesIP = explode(ISEMAIL_STRING_COLON, $IPv6);
712 $groupCount = count($matchesIP);
713 $index = strpos($IPv6, ISEMAIL_STRING_DOUBLECOLON);
714
715 if ($index === false) {
716 // We need exactly the right number of groups
717 if ($groupCount !== $max_groups) {
718 $return_status[] = ISEMAIL_RFC5322_IPV6_GRPCOUNT;
719 }
720 } else {
721 if ($index !== strrpos($IPv6, ISEMAIL_STRING_DOUBLECOLON)) {
722 $return_status[] = ISEMAIL_RFC5322_IPV6_2X2XCOLON;
723 } else {
724 if ($index === 0 || $index === (strlen($IPv6) - 2)) {
725 $max_groups++;
726 // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
727 }
728
729 if ($groupCount > $max_groups) {
730 $return_status[] = ISEMAIL_RFC5322_IPV6_MAXGRPS;
731 } elseif ($groupCount === $max_groups) {
732 $return_status[] = ISEMAIL_RFC5321_IPV6DEPRECATED; // Eliding a single "::"
733 }
734 }
735 }
736
737 // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
738 if ((substr($IPv6, 0, 1) === ISEMAIL_STRING_COLON) && (substr($IPv6, 1, 1) !== ISEMAIL_STRING_COLON)) {
739 $return_status[] = ISEMAIL_RFC5322_IPV6_COLONSTRT; // Address starts with a single colon
740 } elseif ((substr($IPv6, -1) === ISEMAIL_STRING_COLON) && (substr($IPv6, -2, 1) !== ISEMAIL_STRING_COLON)) {
741 $return_status[] = ISEMAIL_RFC5322_IPV6_COLONEND; // Address ends with a single colon
742 } elseif (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
743 $return_status[] = ISEMAIL_RFC5322_IPV6_BADCHAR; // Check for unmatched characters
744 } else {
745 $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;
746 }
747 }
748 } else {
749 $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;
750 }
751
752 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
753 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
754 $element_len++;
755 $context_prior = $context;
756 $context = (int) array_pop($context_stack);
757 break;
758 case ISEMAIL_STRING_BACKSLASH:
759 $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;
760 $context_stack[] = $context;
761 $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
762 break;
763 // Folding White Space
764 case ISEMAIL_STRING_CR:
765 case ISEMAIL_STRING_SP:
766 case ISEMAIL_STRING_HTAB:
767 if (($token === ISEMAIL_STRING_CR)
768 && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {
769 $return_status[] = ISEMAIL_ERR_CR_NO_LF;
770 break;
771 } // Fatal error
772
773 $return_status[] = ISEMAIL_CFWS_FWS;
774
775 $context_stack[] = $context;
776 $context = ISEMAIL_CONTEXT_FWS;
777 $token_prior = $token;
778 break;
779 // dtext
780 default:
781 // http://tools.ietf.org/html/rfc5322#section-3.4.1
782 // dtext = %d33-90 / ; Printable US-ASCII
783 // %d94-126 / ; characters not including
784 // obs-dtext ; "[", "]", or "\"
785 //
786 // obs-dtext = obs-NO-WS-CTL / quoted-pair
787 //
788 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
789 // %d11 / ; characters that do not
790 // %d12 / ; include the carriage
791 // %d14-31 / ; return, line feed, and
792 // %d127 ; white space characters
793 $ord = ord($token);
794
795 // CR, LF, SP & HTAB have already been parsed above
796 if (($ord > 127) || ($ord === 0) || ($token === ISEMAIL_STRING_OPENSQBRACKET)) {
797 $return_status[] = ISEMAIL_ERR_EXPECTING_DTEXT; // Fatal error
798 break;
799 } elseif (($ord < 33) || ($ord === 127)) {
800 $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;
801 }
802
803 $parsedata[ISEMAIL_COMPONENT_LITERAL] .= $token;
804 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
805 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
806 $element_len++;
807 }
808 break;
809 //-------------------------------------------------------------
810 // Quoted string
811 //-------------------------------------------------------------
812 case ISEMAIL_CONTEXT_QUOTEDSTRING:
813 // http://tools.ietf.org/html/rfc5322#section-3.2.4
814 // quoted-string = [CFWS]
815 // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
816 // [CFWS]
817 //
818 // qcontent = qtext / quoted-pair
819 switch ($token) {
820 // Quoted pair
821 case ISEMAIL_STRING_BACKSLASH:
822 $context_stack[] = $context;
823 $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
824 break;
825 // Folding White Space
826 // Inside a quoted string, spaces are allowed as regular characters.
827 // It's only FWS if we include HTAB or CRLF
828 case ISEMAIL_STRING_CR:
829 case ISEMAIL_STRING_HTAB:
830 if (($token === ISEMAIL_STRING_CR)
831 && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {
832 $return_status[] = ISEMAIL_ERR_CR_NO_LF;
833 break;
834 }// Fatal error
835
836 // http://tools.ietf.org/html/rfc5322#section-3.2.2
837 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
838 // structured header field are semantically interpreted as a single
839 // space character.
840
841 // http://tools.ietf.org/html/rfc5322#section-3.2.4
842 // the CRLF in any FWS/CFWS that appears within the quoted-string [is]
843 // semantically "invisible" and therefore not part of the quoted-string
844 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= ISEMAIL_STRING_SP;
845 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= ISEMAIL_STRING_SP;
846 $element_len++;
847
848 $return_status[] = ISEMAIL_CFWS_FWS;
849 $context_stack[] = $context;
850 $context = ISEMAIL_CONTEXT_FWS;
851 $token_prior = $token;
852 break;
853 // End of quoted string
854 case ISEMAIL_STRING_DQUOTE:
855 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
856 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
857 $element_len++;
858 $context_prior = $context;
859 $context = (int) array_pop($context_stack);
860 break;
861 // qtext
862 default:
863 // http://tools.ietf.org/html/rfc5322#section-3.2.4
864 // qtext = %d33 / ; Printable US-ASCII
865 // %d35-91 / ; characters not including
866 // %d93-126 / ; "\" or the quote character
867 // obs-qtext
868 //
869 // obs-qtext = obs-NO-WS-CTL
870 //
871 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
872 // %d11 / ; characters that do not
873 // %d12 / ; include the carriage
874 // %d14-31 / ; return, line feed, and
875 // %d127 ; white space characters
876 $ord = ord($token);
877
878 if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
879 $return_status[] = ISEMAIL_ERR_EXPECTING_QTEXT; // Fatal error
880 } elseif (($ord < 32) || ($ord === 127)) {
881 $return_status[] = ISEMAIL_DEPREC_QTEXT;
882 }
883
884 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
885 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
886 $element_len++;
887 }
888
889 // http://tools.ietf.org/html/rfc5322#section-3.4.1
890 // If the
891 // string can be represented as a dot-atom (that is, it contains no
892 // characters other than atext characters or "." surrounded by atext
893 // characters), then the dot-atom form SHOULD be used and the quoted-
894 // string form SHOULD NOT be used.
895 // To do
896 break;
897 //-------------------------------------------------------------
898 // Quoted pair
899 //-------------------------------------------------------------
900 case ISEMAIL_CONTEXT_QUOTEDPAIR:
901 // http://tools.ietf.org/html/rfc5322#section-3.2.1
902 // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
903 //
904 // VCHAR = %d33-126 ; visible (printing) characters
905 // WSP = SP / HTAB ; white space
906 //
907 // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
908 //
909 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
910 // %d11 / ; characters that do not
911 // %d12 / ; include the carriage
912 // %d14-31 / ; return, line feed, and
913 // %d127 ; white space characters
914 //
915 // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
916 $ord = ord($token);
917
918 if ($ord > 127) {
919 $return_status[] = ISEMAIL_ERR_EXPECTING_QPAIR; // Fatal error
920 } elseif ((($ord < 31) && ($ord !== 9)) || ($ord === 127)) {
921 // SP & HTAB are allowed
922 $return_status[] = ISEMAIL_DEPREC_QP;
923 }
924
925 // At this point we know where this qpair occurred so
926 // we could check to see if the character actually
927 // needed to be quoted at all.
928 // http://tools.ietf.org/html/rfc5321#section-4.1.2
929 // the sending system SHOULD transmit the
930 // form that uses the minimum quoting possible.
931 // To do: check whether the character needs to be quoted (escaped) in this context
932 $context_prior = $context;
933 $context = (int) array_pop($context_stack); // End of qpair
934 $token = ISEMAIL_STRING_BACKSLASH . $token;
935
936 switch ($context) {
937 case ISEMAIL_CONTEXT_COMMENT:
938 break;
939 case ISEMAIL_CONTEXT_QUOTEDSTRING:
940 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
941 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
942 $element_len += 2;
943 // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
944 break;
945 case ISEMAIL_COMPONENT_LITERAL:
946 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
947 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
948 $element_len += 2;
949 // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
950 break;
951 default:
952 die("Quoted pair logic invoked in an invalid context: $context");
953 }
954
955 break;
956 //-------------------------------------------------------------
957 // Comment
958 //-------------------------------------------------------------
959 case ISEMAIL_CONTEXT_COMMENT:
960 // http://tools.ietf.org/html/rfc5322#section-3.2.2
961 // comment = "(" *([FWS] ccontent) [FWS] ")"
962 //
963 // ccontent = ctext / quoted-pair / comment
964 switch ($token) {
965 // Nested comment
966 case ISEMAIL_STRING_OPENPARENTHESIS:
967 // Nested comments are OK
968 $context_stack[] = $context;
969 $context = ISEMAIL_CONTEXT_COMMENT;
970 break;
971 // End of comment
972 case ISEMAIL_STRING_CLOSEPARENTHESIS:
973 $context_prior = $context;
974 $context = (int) array_pop($context_stack);
975
976 // http://tools.ietf.org/html/rfc5322#section-3.2.2
977 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
978 // structured header field are semantically interpreted as a single
979 // space character.
980 //
981 // is_email() author's note: This *cannot* mean that we must add a
982 // space to the address wherever CFWS appears. This would result in
983 // any addr-spec that had CFWS outside a quoted string being invalid
984 // for RFC 5321.
985 // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
986 // $parsedata[$context] .= ISEMAIL_STRING_SP;
987 // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
988 // $element_len++;
989 // }
990
991 break;
992 // Quoted pair
993 case ISEMAIL_STRING_BACKSLASH:
994 $context_stack[] = $context;
995 $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
996 break;
997 // Folding White Space
998 case ISEMAIL_STRING_CR:
999 case ISEMAIL_STRING_SP:
1000 case ISEMAIL_STRING_HTAB:
1001 if (($token === ISEMAIL_STRING_CR)
1002 && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {
1003 $return_status[] = ISEMAIL_ERR_CR_NO_LF;
1004 break;
1005 } // Fatal error
1006
1007 $return_status[] = ISEMAIL_CFWS_FWS;
1008
1009 $context_stack[] = $context;
1010 $context = ISEMAIL_CONTEXT_FWS;
1011 $token_prior = $token;
1012 break;
1013 // ctext
1014 default:
1015 // http://tools.ietf.org/html/rfc5322#section-3.2.3
1016 // ctext = %d33-39 / ; Printable US-ASCII
1017 // %d42-91 / ; characters not including
1018 // %d93-126 / ; "(", ")", or "\"
1019 // obs-ctext
1020 //
1021 // obs-ctext = obs-NO-WS-CTL
1022 //
1023 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
1024 // %d11 / ; characters that do not
1025 // %d12 / ; include the carriage
1026 // %d14-31 / ; return, line feed, and
1027 // %d127 ; white space characters
1028 $ord = ord($token);
1029
1030 if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
1031 $return_status[] = ISEMAIL_ERR_EXPECTING_CTEXT; // Fatal error
1032 break;
1033 } elseif (($ord < 32) || ($ord === 127)) {
1034 $return_status[] = ISEMAIL_DEPREC_CTEXT;
1035 }
1036 }
1037 break;
1038 //-------------------------------------------------------------
1039 // Folding White Space
1040 //-------------------------------------------------------------
1041 case ISEMAIL_CONTEXT_FWS:
1042 // http://tools.ietf.org/html/rfc5322#section-3.2.2
1043 // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
1044 // ; Folding white space
1045
1046 // But note the erratum:
1047 // http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908:
1048 // In the obsolete syntax, any amount of folding white space MAY be
1049 // inserted where the obs-FWS rule is allowed. This creates the
1050 // possibility of having two consecutive "folds" in a line, and
1051 // therefore the possibility that a line which makes up a folded header
1052 // field could be composed entirely of white space.
1053 //
1054 // obs-FWS = 1*([CRLF] WSP)
1055 if ($token_prior === ISEMAIL_STRING_CR) {
1056 if ($token === ISEMAIL_STRING_CR) {
1057 $return_status[] = ISEMAIL_ERR_FWS_CRLF_X2; // Fatal error
1058 break;
1059 }
1060 if (isset($crlf_count)) {
1061 if (++$crlf_count > 1) {
1062 $return_status[] = ISEMAIL_DEPREC_FWS; // Multiple folds = obsolete FWS
1063 }
1064 } else {
1065 $crlf_count = 1;
1066 }
1067 }
1068
1069 switch ($token) {
1070 case ISEMAIL_STRING_CR:
1071 if ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF)) {
1072 $return_status[] = ISEMAIL_ERR_CR_NO_LF; // Fatal error
1073 }
1074 break;
1075 case ISEMAIL_STRING_SP:
1076 case ISEMAIL_STRING_HTAB:
1077 break;
1078 default:
1079 if ($token_prior === ISEMAIL_STRING_CR) {
1080 $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error
1081 break;
1082 }
1083
1084 if (isset($crlf_count)) {
1085 unset($crlf_count);
1086 }
1087
1088 $context_prior = $context;
1089 $context = (int) array_pop($context_stack); // End of FWS
1090
1091 // http://tools.ietf.org/html/rfc5322#section-3.2.2
1092 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
1093 // structured header field are semantically interpreted as a single
1094 // space character.
1095 //
1096 // is_email() author's note: This *cannot* mean that we must add a
1097 // space to the address wherever CFWS appears. This would result in
1098 // any addr-spec that had CFWS outside a quoted string being invalid
1099 // for RFC 5321.
1100 // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
1101 // $parsedata[$context] .= ISEMAIL_STRING_SP;
1102 // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
1103 // $element_len++;
1104 // }
1105
1106 $i--; // Look at this token again in the parent context
1107 }
1108
1109 $token_prior = $token;
1110 break;
1111 //-------------------------------------------------------------
1112 // A context we aren't expecting
1113 //-------------------------------------------------------------
1114 default:
1115 die("Unknown context: $context");
1116 }
1117
1118 //-echo "<td>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</td></tr>"; // debug
1119 if ((int) max($return_status) > ISEMAIL_RFC5322) {
1120 break; // No point going on if we've got a fatal error
1121 }
1122 }
1123
1124 // Some simple final tests
1125 if ((int) max($return_status) < ISEMAIL_RFC5322) {
1126 if ($context === ISEMAIL_CONTEXT_QUOTEDSTRING) {
1127 $return_status[] = ISEMAIL_ERR_UNCLOSEDQUOTEDSTR; // Fatal error
1128 } elseif ($context === ISEMAIL_CONTEXT_QUOTEDPAIR) {
1129 $return_status[] = ISEMAIL_ERR_BACKSLASHEND; // Fatal error
1130 } elseif ($context === ISEMAIL_CONTEXT_COMMENT) {
1131 $return_status[] = ISEMAIL_ERR_UNCLOSEDCOMMENT; // Fatal error
1132 } elseif ($context === ISEMAIL_COMPONENT_LITERAL) {
1133 $return_status[] = ISEMAIL_ERR_UNCLOSEDDOMLIT; // Fatal error
1134 } elseif ($token === ISEMAIL_STRING_CR) {
1135 $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error
1136 } elseif ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') {
1137 $return_status[] = ISEMAIL_ERR_NODOMAIN; // Fatal error
1138 } elseif ($element_len === 0) {
1139 $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error
1140 } elseif ($hyphen_flag) {
1141 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error
1142 } elseif (strlen($parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 255) {
1143 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2
1144 // The maximum total length of a domain name or number is 255 octets.
1145 $return_status[] = ISEMAIL_RFC5322_DOMAIN_TOOLONG;
1146 } elseif (strlen(
1147 $parsedata[ISEMAIL_COMPONENT_LOCALPART] . ISEMAIL_STRING_AT . $parsedata[ISEMAIL_COMPONENT_DOMAIN]
1148 ) > 254) {
1149 // http://tools.ietf.org/html/rfc5321#section-4.1.2
1150 // Forward-path = Path
1151 //
1152 // Path = "<" [ A-d-l ":" ] Mailbox ">"
1153 //
1154 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3
1155 // The maximum total length of a reverse-path or forward-path is 256
1156 // octets (including the punctuation and element separators).
1157 //
1158 // Thus, even without (obsolete) routing information, the Mailbox can
1159 // only be 254 characters long. This is confirmed by this verified
1160 // erratum to RFC 3696:
1161 //
1162 // http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690
1163 // However, there is a restriction in RFC 2821 on the length of an
1164 // address in MAIL and RCPT commands of 254 characters. Since addresses
1165 // that do not fit in those fields are not normally useful, the upper
1166 // limit on address lengths should normally be considered to be 254.
1167 $return_status[] = ISEMAIL_RFC5322_TOOLONG;
1168 } elseif ($element_len > 63) {
1169 // http://tools.ietf.org/html/rfc1035#section-2.3.4
1170 // labels 63 octets or less
1171 $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;
1172 }
1173 }
1174
1175 // Check DNS?
1176 $dns_checked = false;
1177
1178 if ($checkDNS && ((int) max($return_status) < ISEMAIL_DNSWARN) && function_exists('dns_get_record')) {
1179 // http://tools.ietf.org/html/rfc5321#section-2.3.5
1180 // Names that can
1181 // be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
1182 // in Section 5) are permitted, as are CNAME RRs whose targets can be
1183 // resolved, in turn, to MX or address RRs.
1184 //
1185 // http://tools.ietf.org/html/rfc5321#section-5.1
1186 // The lookup first attempts to locate an MX record associated with the
1187 // name. If a CNAME record is found, the resulting name is processed as
1188 // if it were the initial name. ... If an empty list of MXs is returned,
1189 // the address is treated as if it was associated with an implicit MX
1190 // RR, with a preference of 0, pointing to that host.
1191 //
1192 // is_email() author's note: We will regard the existence of a CNAME to be
1193 // sufficient evidence of the domain's existence. For performance reasons
1194 // we will not repeat the DNS lookup for the CNAME's target, but we will
1195 // raise a warning because we didn't immediately find an MX record.
1196 if ($element_count === 0) {
1197 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= '.';
1198 // Checking TLD DNS seems to work only if you explicitly check from the root
1199 }
1200 // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id=51844)
1201 $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_MX);
1202
1203 if ((is_bool($result) && !(bool) $result)) {
1204 $return_status[] = ISEMAIL_DNSWARN_NO_RECORD;
1205 // Domain can't be found in DNS
1206 } else {
1207 if (count($result) === 0) {
1208 $return_status[] = ISEMAIL_DNSWARN_NO_MX_RECORD; // MX-record for domain can't be found
1209 $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_A + DNS_CNAME);
1210
1211 if (count($result) === 0) {
1212 $return_status[] = ISEMAIL_DNSWARN_NO_RECORD;
1213 // No usable records for the domain can be found
1214 }
1215 } else {
1216 $dns_checked = true;
1217 }
1218 }
1219 }
1220
1221 // Check for TLD addresses
1222 // -----------------------
1223 // TLD addresses are specifically allowed in RFC 5321 but they are
1224 // unusual to say the least. We will allocate a separate
1225 // status to these addresses on the basis that they are more likely
1226 // to be typos than genuine addresses (unless we've already
1227 // established that the domain does have an MX record)
1228 //
1229 // http://tools.ietf.org/html/rfc5321#section-2.3.5
1230 // In the case
1231 // of a top-level domain used by itself in an email address, a single
1232 // string is used without any dots. This makes the requirement,
1233 // described in more detail below, that only fully-qualified domain
1234 // names appear in SMTP transactions on the public Internet,
1235 // particularly important where top-level domains are involved.
1236 //
1237 // TLD format
1238 // ----------
1239 // The format of TLDs has changed a number of times. The standards
1240 // used by IANA have been largely ignored by ICANN, leading to
1241 // confusion over the standards being followed. These are not defined
1242 // anywhere, except as a general component of a DNS host name (a label).
1243 // However, this could potentially lead to 123.123.123.123 being a
1244 // valid DNS name (rather than an IP address) and thereby creating
1245 // an ambiguity. The most authoritative statement on TLD formats that
1246 // the author can find is in a (rejected!) erratum to RFC 1123
1247 // submitted by John Klensin, the author of RFC 5321:
1248 //
1249 // http://www.rfc-editor.org/errata_search.php?rfc=1123&eid=1353
1250 // However, a valid host name can never have the dotted-decimal
1251 // form #.#.#.#, since this change does not permit the highest-level
1252 // component label to start with a digit even if it is not all-numeric.
1253 if (!$dns_checked && ((int) max($return_status) < ISEMAIL_DNSWARN)) {
1254 if ($element_count === 0) {
1255 $return_status[] = ISEMAIL_RFC5321_TLD;
1256 }
1257
1258 if (is_numeric($atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count][0])) {
1259 $return_status[] = ISEMAIL_RFC5321_TLDNUMERIC;
1260 }
1261 }
1262
1263 $return_status = array_unique($return_status);
1264 $final_status = (int) max($return_status);
1265
1266 if (count($return_status) !== 1) {
1267 array_shift($return_status); // remove redundant ISEMAIL_VALID
1268 }
1269
1270 $parsedata['status'] = $return_status;
1271
1272 if ($final_status < $threshold) {
1273 $final_status = ISEMAIL_VALID;
1274 }
1275
1276 return ($diagnose) ? $final_status : ($final_status < ISEMAIL_THRESHOLD);
1277 }