[PLUGINS] +set de base
[lhc/web/www.git] / www / plugins / verifier / inc / is_email.php
1 <?php
2 /**
3 * To validate an email address according to RFCs 5321, 5322 and others
4 *
5 * Copyright © 2008-2011, Dominic Sayers <br>
6 * Test schema documentation Copyright © 2011, Daniel Marschall <br>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without modification,
10 * are permitted provided that the following conditions are met:
11 *
12 * - Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * - Neither the name of Dominic Sayers nor the names of its contributors may be
18 * used to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * @package is_email
33 * @author Dominic Sayers <dominic@sayers.cc>
34 * @copyright 2008-2011 Dominic Sayers
35 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
36 * @link http://www.dominicsayers.com/isemail
37 * @version 3.01.1 - Fixed examples and readme.txt
38 */
39
40 // The quality of this code has been improved greatly by using PHPLint
41 // Copyright (c) 2010 Umberto Salsi
42 // This is free software; see the license for copying conditions.
43 // More info: http://www.icosaedro.it/phplint/
44 /*.
45 require_module 'standard';
46 require_module 'pcre';
47 .*/
48
49 if (!defined('ISEMAIL_VALID')) {
50 /*:diagnostic constants start:*/
51 // This part of the code is generated using data from test/meta.xml. Beware of making manual alterations
52 // Categories
53 define('ISEMAIL_VALID_CATEGORY', 1);
54 define('ISEMAIL_DNSWARN', 7);
55 define('ISEMAIL_RFC5321', 15);
56 define('ISEMAIL_CFWS', 31);
57 define('ISEMAIL_DEPREC', 63);
58 define('ISEMAIL_RFC5322', 127);
59 define('ISEMAIL_ERR', 255);
60
61 // Diagnoses
62 // Address is valid
63 define('ISEMAIL_VALID', 0);
64 // Address is valid but a DNS check was not successful
65 define('ISEMAIL_DNSWARN_NO_MX_RECORD', 5);
66 define('ISEMAIL_DNSWARN_NO_RECORD', 6);
67 // Address is valid for SMTP but has unusual elements
68 define('ISEMAIL_RFC5321_TLD', 9);
69 define('ISEMAIL_RFC5321_TLDNUMERIC', 10);
70 define('ISEMAIL_RFC5321_QUOTEDSTRING', 11);
71 define('ISEMAIL_RFC5321_ADDRESSLITERAL', 12);
72 define('ISEMAIL_RFC5321_IPV6DEPRECATED', 13);
73 // Address is valid within the message but cannot be used unmodified for the envelope
74 define('ISEMAIL_CFWS_COMMENT', 17);
75 define('ISEMAIL_CFWS_FWS', 18);
76 // Address contains deprecated elements but may still be valid in restricted contexts
77 define('ISEMAIL_DEPREC_LOCALPART', 33);
78 define('ISEMAIL_DEPREC_FWS', 34);
79 define('ISEMAIL_DEPREC_QTEXT', 35);
80 define('ISEMAIL_DEPREC_QP', 36);
81 define('ISEMAIL_DEPREC_COMMENT', 37);
82 define('ISEMAIL_DEPREC_CTEXT', 38);
83 define('ISEMAIL_DEPREC_CFWS_NEAR_AT', 49);
84 // The address is only valid according to the broad definition of RFC 5322. It is otherwise invalid.
85 define('ISEMAIL_RFC5322_DOMAIN', 65);
86 define('ISEMAIL_RFC5322_TOOLONG', 66);
87 define('ISEMAIL_RFC5322_LOCAL_TOOLONG', 67);
88 define('ISEMAIL_RFC5322_DOMAIN_TOOLONG', 68);
89 define('ISEMAIL_RFC5322_LABEL_TOOLONG', 69);
90 define('ISEMAIL_RFC5322_DOMAINLITERAL', 70);
91 define('ISEMAIL_RFC5322_DOMLIT_OBSDTEXT', 71);
92 define('ISEMAIL_RFC5322_IPV6_GRPCOUNT', 72);
93 define('ISEMAIL_RFC5322_IPV6_2X2XCOLON', 73);
94 define('ISEMAIL_RFC5322_IPV6_BADCHAR', 74);
95 define('ISEMAIL_RFC5322_IPV6_MAXGRPS', 75);
96 define('ISEMAIL_RFC5322_IPV6_COLONSTRT', 76);
97 define('ISEMAIL_RFC5322_IPV6_COLONEND', 77);
98 // Address is invalid for any purpose
99 define('ISEMAIL_ERR_EXPECTING_DTEXT', 129);
100 define('ISEMAIL_ERR_NOLOCALPART', 130);
101 define('ISEMAIL_ERR_NODOMAIN', 131);
102 define('ISEMAIL_ERR_CONSECUTIVEDOTS', 132);
103 define('ISEMAIL_ERR_ATEXT_AFTER_CFWS', 133);
104 define('ISEMAIL_ERR_ATEXT_AFTER_QS', 134);
105 define('ISEMAIL_ERR_ATEXT_AFTER_DOMLIT', 135);
106 define('ISEMAIL_ERR_EXPECTING_QPAIR', 136);
107 define('ISEMAIL_ERR_EXPECTING_ATEXT', 137);
108 define('ISEMAIL_ERR_EXPECTING_QTEXT', 138);
109 define('ISEMAIL_ERR_EXPECTING_CTEXT', 139);
110 define('ISEMAIL_ERR_BACKSLASHEND', 140);
111 define('ISEMAIL_ERR_DOT_START', 141);
112 define('ISEMAIL_ERR_DOT_END', 142);
113 define('ISEMAIL_ERR_DOMAINHYPHENSTART', 143);
114 define('ISEMAIL_ERR_DOMAINHYPHENEND', 144);
115 define('ISEMAIL_ERR_UNCLOSEDQUOTEDSTR', 145);
116 define('ISEMAIL_ERR_UNCLOSEDCOMMENT', 146);
117 define('ISEMAIL_ERR_UNCLOSEDDOMLIT', 147);
118 define('ISEMAIL_ERR_FWS_CRLF_X2', 148);
119 define('ISEMAIL_ERR_FWS_CRLF_END', 149);
120 define('ISEMAIL_ERR_CR_NO_LF', 150);
121 // End of generated code
122 /*:diagnostic constants end:*/
123
124 // function control
125 define('ISEMAIL_THRESHOLD' , 16);
126
127 // Email parts
128 define('ISEMAIL_COMPONENT_LOCALPART' , 0);
129 define('ISEMAIL_COMPONENT_DOMAIN' , 1);
130 define('ISEMAIL_COMPONENT_LITERAL' , 2);
131 define('ISEMAIL_CONTEXT_COMMENT' , 3);
132 define('ISEMAIL_CONTEXT_FWS' , 4);
133 define('ISEMAIL_CONTEXT_QUOTEDSTRING' , 5);
134 define('ISEMAIL_CONTEXT_QUOTEDPAIR' , 6);
135
136 // Miscellaneous string constants
137 define('ISEMAIL_STRING_AT' , '@');
138 define('ISEMAIL_STRING_BACKSLASH' , '\\');
139 define('ISEMAIL_STRING_DOT' , '.');
140 define('ISEMAIL_STRING_DQUOTE' , '"');
141 define('ISEMAIL_STRING_OPENPARENTHESIS' , '(');
142 define('ISEMAIL_STRING_CLOSEPARENTHESIS', ')');
143 define('ISEMAIL_STRING_OPENSQBRACKET' , '[');
144 define('ISEMAIL_STRING_CLOSESQBRACKET' , ']');
145 define('ISEMAIL_STRING_HYPHEN' , '-');
146 define('ISEMAIL_STRING_COLON' , ':');
147 define('ISEMAIL_STRING_DOUBLECOLON' , '::');
148 define('ISEMAIL_STRING_SP' , ' ');
149 define('ISEMAIL_STRING_HTAB' , "\t");
150 define('ISEMAIL_STRING_CR' , "\r");
151 define('ISEMAIL_STRING_LF' , "\n");
152 define('ISEMAIL_STRING_IPV6TAG' , 'IPv6:');
153 // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3)
154 define('ISEMAIL_STRING_SPECIALS' , '()<>[]:;@\\,."');
155 }
156
157 /**
158 * Check that an email address conforms to RFCs 5321, 5322 and others
159 *
160 * As of Version 3.0, we are now distinguishing clearly between a Mailbox
161 * as defined by RFC 5321 and an addr-spec as defined by RFC 5322. Depending
162 * on the context, either can be regarded as a valid email address. The
163 * RFC 5321 Mailbox specification is more restrictive (comments, white space
164 * and obsolete forms are not allowed)
165 *
166 * @param string $email The email address to check
167 * @param boolean $checkDNS If true then a DNS check for MX records will be made
168 * @param mixed $errorlevel Determines the boundary between valid and invalid addresses.
169 * Status codes above this number will be returned as-is,
170 * status codes below will be returned as ISEMAIL_VALID. Thus the
171 * calling program can simply look for ISEMAIL_VALID if it is
172 * only interested in whether an address is valid or not. The
173 * errorlevel will determine how "picky" is_email() is about
174 * the address.
175 *
176 * If omitted or passed as false then is_email() will return
177 * true or false rather than an integer error or warning.
178 *
179 * NB Note the difference between $errorlevel = false and
180 * $errorlevel = 0
181 * @param array $parsedata If passed, returns the parsed address components
182 */
183 /*.mixed.*/ function is_email($email, $checkDNS = false, $errorlevel = false, &$parsedata = array()) {
184 // Check that $email is a valid address. Read the following RFCs to understand the constraints:
185 // (http://tools.ietf.org/html/rfc5321)
186 // (http://tools.ietf.org/html/rfc5322)
187 // (http://tools.ietf.org/html/rfc4291#section-2.2)
188 // (http://tools.ietf.org/html/rfc1123#section-2.1)
189 // (http://tools.ietf.org/html/rfc3696) (guidance only)
190 // version 2.0: Enhance $diagnose parameter to $errorlevel
191 // version 3.0: Introduced status categories
192 // revision 3.1: BUG: $parsedata was passed by value instead of by reference
193
194 if (is_bool($errorlevel)) {
195 $threshold = ISEMAIL_VALID;
196 $diagnose = (bool) $errorlevel;
197 } else {
198 $diagnose = true;
199
200 switch ((int) $errorlevel) {
201 case E_WARNING: $threshold = ISEMAIL_THRESHOLD; break; // For backward compatibility
202 case E_ERROR: $threshold = ISEMAIL_VALID; break; // For backward compatibility
203 default: $threshold = (int) $errorlevel;
204 }
205 }
206
207 $return_status = array(ISEMAIL_VALID);
208
209 // Parse the address into components, character by character
210 $raw_length = strlen($email);
211 $context = ISEMAIL_COMPONENT_LOCALPART; // Where we are
212 $context_stack = array($context); // Where we have been
213 $context_prior = ISEMAIL_COMPONENT_LOCALPART; // Where we just came from
214 $token = ''; // The current character
215 $token_prior = ''; // The previous character
216 $parsedata = array(
217 ISEMAIL_COMPONENT_LOCALPART => '',
218 ISEMAIL_COMPONENT_DOMAIN => ''
219 ); // For the components of the address
220
221 $atomlist = array(
222 ISEMAIL_COMPONENT_LOCALPART => array(''),
223 ISEMAIL_COMPONENT_DOMAIN => array('')
224 ); // For the dot-atom elements of the address
225 $element_count = 0;
226 $element_len = 0;
227 $hyphen_flag = false; // Hyphen cannot occur at the end of a subdomain
228 $end_or_die = false; // CFWS can only appear at the end of the element
229
230 //-echo "<table style=\"clear:left;\">"; // debug
231 for ($i = 0; $i < $raw_length; $i++) {
232 $token = $email[$i];
233 //-echo "<tr><td><strong>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</strong></td>"; // debug
234
235 switch ($context) {
236 //-------------------------------------------------------------
237 // local-part
238 //-------------------------------------------------------------
239 case ISEMAIL_COMPONENT_LOCALPART:
240 // http://tools.ietf.org/html/rfc5322#section-3.4.1
241 // local-part = dot-atom / quoted-string / obs-local-part
242 //
243 // dot-atom = [CFWS] dot-atom-text [CFWS]
244 //
245 // dot-atom-text = 1*atext *("." 1*atext)
246 //
247 // quoted-string = [CFWS]
248 // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
249 // [CFWS]
250 //
251 // obs-local-part = word *("." word)
252 //
253 // word = atom / quoted-string
254 //
255 // atom = [CFWS] 1*atext [CFWS]
256 switch ($token) {
257 // Comment
258 case ISEMAIL_STRING_OPENPARENTHESIS:
259 if ($element_len === 0)
260 // Comments are OK at the beginning of an element
261 $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_COMMENT : ISEMAIL_DEPREC_COMMENT;
262 else {
263 $return_status[] = ISEMAIL_CFWS_COMMENT;
264 $end_or_die = true; // We can't start a comment in the middle of an element, so this better be the end
265 }
266
267 $context_stack[] = $context;
268 $context = ISEMAIL_CONTEXT_COMMENT;
269 break;
270 // Next dot-atom element
271 case ISEMAIL_STRING_DOT:
272 if ($element_len === 0)
273 // Another dot, already?
274 $return_status[] = ($element_count === 0) ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error
275 else
276 // The entire local-part can be a quoted string for RFC 5321
277 // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
278 if ($end_or_die) $return_status[] = ISEMAIL_DEPREC_LOCALPART;
279
280 $end_or_die = false; // CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms)
281 $element_len = 0;
282 $element_count++;
283 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
284 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] = '';
285
286 break;
287 // Quoted string
288 case ISEMAIL_STRING_DQUOTE:
289 if ($element_len === 0) {
290 // The entire local-part can be a quoted string for RFC 5321
291 // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
292 $return_status[] = ($element_count === 0) ? ISEMAIL_RFC5321_QUOTEDSTRING : ISEMAIL_DEPREC_LOCALPART;
293
294 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
295 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
296 $element_len++;
297 $end_or_die = true; // Quoted string must be the entire element
298 $context_stack[] = $context;
299 $context = ISEMAIL_CONTEXT_QUOTEDSTRING;
300 } else {
301 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
302 }
303
304 break;
305 // Folding White Space
306 case ISEMAIL_STRING_CR:
307 case ISEMAIL_STRING_SP:
308 case ISEMAIL_STRING_HTAB:
309 if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
310
311 if ($element_len === 0)
312 $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_FWS : ISEMAIL_DEPREC_FWS;
313 else
314 $end_or_die = true; // We can't start FWS in the middle of an element, so this better be the end
315
316 $context_stack[] = $context;
317 $context = ISEMAIL_CONTEXT_FWS;
318 $token_prior = $token;
319
320 break;
321 // @
322 case ISEMAIL_STRING_AT:
323 // At this point we should have a valid local-part
324 if (count($context_stack) !== 1) die('Unexpected item on context stack');
325
326 if ($parsedata[ISEMAIL_COMPONENT_LOCALPART] === '')
327 $return_status[] = ISEMAIL_ERR_NOLOCALPART; // Fatal error
328 elseif ($element_len === 0) $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error
329 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1
330 // The maximum total length of a user name or other local-part is 64
331 // octets.
332 elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART]) > 64)
333 $return_status[] = ISEMAIL_RFC5322_LOCAL_TOOLONG;
334 // http://tools.ietf.org/html/rfc5322#section-3.4.1
335 // Comments and folding white space
336 // SHOULD NOT be used around the "@" in the addr-spec.
337 //
338 // http://tools.ietf.org/html/rfc2119
339 // 4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that
340 // there may exist valid reasons in particular circumstances when the
341 // particular behavior is acceptable or even useful, but the full
342 // implications should be understood and the case carefully weighed
343 // before implementing any behavior described with this label.
344 elseif (($context_prior === ISEMAIL_CONTEXT_COMMENT) || ($context_prior === ISEMAIL_CONTEXT_FWS))
345 $return_status[] = ISEMAIL_DEPREC_CFWS_NEAR_AT;
346
347 // Clear everything down for the domain parsing
348 $context = ISEMAIL_COMPONENT_DOMAIN; // Where we are
349 $context_stack = array($context); // Where we have been
350 $element_count = 0;
351 $element_len = 0;
352 $end_or_die = false; // CFWS can only appear at the end of the element
353
354 break;
355 // atext
356 default:
357 // http://tools.ietf.org/html/rfc5322#section-3.2.3
358 // atext = ALPHA / DIGIT / ; Printable US-ASCII
359 // "!" / "#" / ; characters not including
360 // "$" / "%" / ; specials. Used for atoms.
361 // "&" / "'" /
362 // "*" / "+" /
363 // "-" / "/" /
364 // "=" / "?" /
365 // "^" / "_" /
366 // "`" / "{" /
367 // "|" / "}" /
368 // "~"
369 if ($end_or_die) {
370 // We have encountered atext where it is no longer valid
371 switch ($context_prior) {
372 case ISEMAIL_CONTEXT_COMMENT:
373 case ISEMAIL_CONTEXT_FWS:
374 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;
375 break;
376 case ISEMAIL_CONTEXT_QUOTEDSTRING:
377 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_QS;
378 break;
379 default:
380 die ("More atext found where none is allowed, but unrecognised prior context: $context_prior");
381 }
382 } else {
383 $context_prior = $context;
384 $ord = ord($token);
385
386 if (($ord < 33) || ($ord > 126) || ($ord === 10) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token))))
387 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
388
389 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
390 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
391 $element_len++;
392 }
393 }
394
395 break;
396 //-------------------------------------------------------------
397 // Domain
398 //-------------------------------------------------------------
399 case ISEMAIL_COMPONENT_DOMAIN:
400 // http://tools.ietf.org/html/rfc5322#section-3.4.1
401 // domain = dot-atom / domain-literal / obs-domain
402 //
403 // dot-atom = [CFWS] dot-atom-text [CFWS]
404 //
405 // dot-atom-text = 1*atext *("." 1*atext)
406 //
407 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
408 //
409 // dtext = %d33-90 / ; Printable US-ASCII
410 // %d94-126 / ; characters not including
411 // obs-dtext ; "[", "]", or "\"
412 //
413 // obs-domain = atom *("." atom)
414 //
415 // atom = [CFWS] 1*atext [CFWS]
416
417
418 // http://tools.ietf.org/html/rfc5321#section-4.1.2
419 // Mailbox = Local-part "@" ( Domain / address-literal )
420 //
421 // Domain = sub-domain *("." sub-domain)
422 //
423 // address-literal = "[" ( IPv4-address-literal /
424 // IPv6-address-literal /
425 // General-address-literal ) "]"
426 // ; See Section 4.1.3
427
428 // http://tools.ietf.org/html/rfc5322#section-3.4.1
429 // Note: A liberal syntax for the domain portion of addr-spec is
430 // given here. However, the domain portion contains addressing
431 // information specified by and used in other protocols (e.g.,
432 // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
433 // incumbent upon implementations to conform to the syntax of
434 // addresses for the context in which they are used.
435 // is_email() author's note: it's not clear how to interpret this in
436 // the context of a general email address validator. The conclusion I
437 // have reached is this: "addressing information" must comply with
438 // RFC 5321 (and in turn RFC 1035), anything that is "semantically
439 // invisible" must comply only with RFC 5322.
440 switch ($token) {
441 // Comment
442 case ISEMAIL_STRING_OPENPARENTHESIS:
443 if ($element_len === 0)
444 // Comments at the start of the domain are deprecated in the text
445 // Comments at the start of a subdomain are obs-domain
446 // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
447 $return_status[] = ($element_count === 0) ? ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_COMMENT;
448 else {
449 $return_status[] = ISEMAIL_CFWS_COMMENT;
450 $end_or_die = true; // We can't start a comment in the middle of an element, so this better be the end
451 }
452
453 $context_stack[] = $context;
454 $context = ISEMAIL_CONTEXT_COMMENT;
455 break;
456 // Next dot-atom element
457 case ISEMAIL_STRING_DOT:
458 if ($element_len === 0)
459 // Another dot, already?
460 $return_status[] = ($element_count === 0) ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error
461 elseif ($hyphen_flag)
462 // Previous subdomain ended in a hyphen
463 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error
464 else
465 // Nowhere in RFC 5321 does it say explicitly that the
466 // domain part of a Mailbox must be a valid domain according
467 // to the DNS standards set out in RFC 1035, but this *is*
468 // implied in several places. For instance, wherever the idea
469 // of host routing is discussed the RFC says that the domain
470 // must be looked up in the DNS. This would be nonsense unless
471 // the domain was designed to be a valid DNS domain. Hence we
472 // must conclude that the RFC 1035 restriction on label length
473 // also applies to RFC 5321 domains.
474 //
475 // http://tools.ietf.org/html/rfc1035#section-2.3.4
476 // labels 63 octets or less
477 if ($element_len > 63) $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;
478
479 $end_or_die = false; // CFWS is OK again now we're at the beginning of an element (although it may be obsolete CFWS)
480 $element_len = 0;
481 $element_count++;
482 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] = '';
483 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
484
485 break;
486 // Domain literal
487 case ISEMAIL_STRING_OPENSQBRACKET:
488 if ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') {
489 $end_or_die = true; // Domain literal must be the only component
490 $element_len++;
491 $context_stack[] = $context;
492 $context = ISEMAIL_COMPONENT_LITERAL;
493 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
494 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
495 $parsedata[ISEMAIL_COMPONENT_LITERAL] = '';
496 } else {
497 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
498 }
499
500 break;
501 // Folding White Space
502 case ISEMAIL_STRING_CR:
503 case ISEMAIL_STRING_SP:
504 case ISEMAIL_STRING_HTAB:
505 if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
506
507 if ($element_len === 0)
508 $return_status[] = ($element_count === 0) ? ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_FWS;
509 else {
510 $return_status[] = ISEMAIL_CFWS_FWS;
511 $end_or_die = true; // We can't start FWS in the middle of an element, so this better be the end
512 }
513
514 $context_stack[] = $context;
515 $context = ISEMAIL_CONTEXT_FWS;
516 $token_prior = $token;
517 break;
518 // atext
519 default:
520 // RFC 5322 allows any atext...
521 // http://tools.ietf.org/html/rfc5322#section-3.2.3
522 // atext = ALPHA / DIGIT / ; Printable US-ASCII
523 // "!" / "#" / ; characters not including
524 // "$" / "%" / ; specials. Used for atoms.
525 // "&" / "'" /
526 // "*" / "+" /
527 // "-" / "/" /
528 // "=" / "?" /
529 // "^" / "_" /
530 // "`" / "{" /
531 // "|" / "}" /
532 // "~"
533
534 // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123)
535 // http://tools.ietf.org/html/rfc5321#section-4.1.2
536 // sub-domain = Let-dig [Ldh-str]
537 //
538 // Let-dig = ALPHA / DIGIT
539 //
540 // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
541 //
542 if ($end_or_die) {
543 // We have encountered atext where it is no longer valid
544 switch ($context_prior) {
545 case ISEMAIL_CONTEXT_COMMENT:
546 case ISEMAIL_CONTEXT_FWS:
547 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;
548 break;
549 case ISEMAIL_COMPONENT_LITERAL:
550 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_DOMLIT;
551 break;
552 default:
553 die ("More atext found where none is allowed, but unrecognised prior context: $context_prior");
554 }
555 }
556
557 $ord = ord($token);
558 $hyphen_flag = false; // Assume this token isn't a hyphen unless we discover it is
559
560 if (($ord < 33) || ($ord > 126) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token)))) {
561 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error
562 } elseif ($token === ISEMAIL_STRING_HYPHEN) {
563 if ($element_len === 0) {
564 // Hyphens can't be at the beginning of a subdomain
565 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENSTART; // Fatal error
566 }
567
568 $hyphen_flag = true;
569 } elseif (!(($ord > 47 && $ord < 58) || ($ord > 64 && $ord < 91) || ($ord > 96 && $ord < 123))) {
570 // Not an RFC 5321 subdomain, but still OK by RFC 5322
571 $return_status[] = ISEMAIL_RFC5322_DOMAIN;
572 }
573
574 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
575 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
576 $element_len++;
577 }
578
579 break;
580 //-------------------------------------------------------------
581 // Domain literal
582 //-------------------------------------------------------------
583 case ISEMAIL_COMPONENT_LITERAL:
584 // http://tools.ietf.org/html/rfc5322#section-3.4.1
585 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
586 //
587 // dtext = %d33-90 / ; Printable US-ASCII
588 // %d94-126 / ; characters not including
589 // obs-dtext ; "[", "]", or "\"
590 //
591 // obs-dtext = obs-NO-WS-CTL / quoted-pair
592 switch ($token) {
593 // End of domain literal
594 case ISEMAIL_STRING_CLOSESQBRACKET:
595 if ((int) max($return_status) < ISEMAIL_DEPREC) {
596 // Could be a valid RFC 5321 address literal, so let's check
597
598 // http://tools.ietf.org/html/rfc5321#section-4.1.2
599 // address-literal = "[" ( IPv4-address-literal /
600 // IPv6-address-literal /
601 // General-address-literal ) "]"
602 // ; See Section 4.1.3
603 //
604 // http://tools.ietf.org/html/rfc5321#section-4.1.3
605 // IPv4-address-literal = Snum 3("." Snum)
606 //
607 // IPv6-address-literal = "IPv6:" IPv6-addr
608 //
609 // General-address-literal = Standardized-tag ":" 1*dcontent
610 //
611 // Standardized-tag = Ldh-str
612 // ; Standardized-tag MUST be specified in a
613 // ; Standards-Track RFC and registered with IANA
614 //
615 // dcontent = %d33-90 / ; Printable US-ASCII
616 // %d94-126 ; excl. "[", "\", "]"
617 //
618 // Snum = 1*3DIGIT
619 // ; representing a decimal integer
620 // ; value in the range 0 through 255
621 //
622 // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
623 //
624 // IPv6-hex = 1*4HEXDIG
625 //
626 // IPv6-full = IPv6-hex 7(":" IPv6-hex)
627 //
628 // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
629 // [IPv6-hex *5(":" IPv6-hex)]
630 // ; The "::" represents at least 2 16-bit groups of
631 // ; zeros. No more than 6 groups in addition to the
632 // ; "::" may be present.
633 //
634 // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
635 //
636 // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
637 // [IPv6-hex *3(":" IPv6-hex) ":"]
638 // IPv4-address-literal
639 // ; The "::" represents at least 2 16-bit groups of
640 // ; zeros. No more than 4 groups in addition to the
641 // ; "::" and IPv4-address-literal may be present.
642 //
643 // is_email() author's note: We can't use ip2long() to validate
644 // IPv4 addresses because it accepts abbreviated addresses
645 // (xxx.xxx.xxx), expanding the last group to complete the address.
646 // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3
647 // at least) -- see http://bugs.php.net/bug.php?id=53236 for example
648 $max_groups = 8;
649 $matchesIP = array();
650 /*.mixed.*/ $index = false;
651 $addressliteral = $parsedata[ISEMAIL_COMPONENT_LITERAL];
652
653 // Extract IPv4 part from the end of the address-literal (if there is one)
654 if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressliteral, $matchesIP) > 0) {
655 $index = strrpos($addressliteral, $matchesIP[0]);
656 if ($index !== 0) $addressliteral = substr($addressliteral, 0, $index) . '0:0'; // Convert IPv4 part to IPv6 format for further testing
657 }
658
659 if ($index === 0) {
660 // Nothing there except a valid IPv4 address, so...
661 $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;
662 } elseif (strncasecmp($addressliteral, ISEMAIL_STRING_IPV6TAG, 5) !== 0) {
663 $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;
664 } else {
665 $IPv6 = substr($addressliteral, 5);
666 $matchesIP = explode(ISEMAIL_STRING_COLON, $IPv6); // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
667 $groupCount = count($matchesIP);
668 $index = strpos($IPv6,ISEMAIL_STRING_DOUBLECOLON);
669
670 if ($index === false) {
671 // We need exactly the right number of groups
672 if ($groupCount !== $max_groups)
673 $return_status[] = ISEMAIL_RFC5322_IPV6_GRPCOUNT;
674 } else {
675 if ($index !== strrpos($IPv6,ISEMAIL_STRING_DOUBLECOLON))
676 $return_status[] = ISEMAIL_RFC5322_IPV6_2X2XCOLON;
677 else {
678 if ($index === 0 || $index === (strlen($IPv6) - 2)) $max_groups++; // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
679
680 if ($groupCount > $max_groups)
681 $return_status[] = ISEMAIL_RFC5322_IPV6_MAXGRPS;
682 elseif ($groupCount === $max_groups)
683 $return_status[] = ISEMAIL_RFC5321_IPV6DEPRECATED; // Eliding a single "::"
684 }
685 }
686
687 // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
688 if ((substr($IPv6, 0, 1) === ISEMAIL_STRING_COLON) && (substr($IPv6, 1, 1) !== ISEMAIL_STRING_COLON))
689 $return_status[] = ISEMAIL_RFC5322_IPV6_COLONSTRT; // Address starts with a single colon
690 elseif ((substr($IPv6, -1) === ISEMAIL_STRING_COLON) && (substr($IPv6, -2, 1) !== ISEMAIL_STRING_COLON))
691 $return_status[] = ISEMAIL_RFC5322_IPV6_COLONEND; // Address ends with a single colon
692 elseif (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0)
693 $return_status[] = ISEMAIL_RFC5322_IPV6_BADCHAR; // Check for unmatched characters
694 else
695 $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;
696 }
697 } else
698 $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;
699
700
701 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
702 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
703 $element_len++;
704 $context_prior = $context;
705 $context = (int) array_pop($context_stack);
706 break;
707 case ISEMAIL_STRING_BACKSLASH:
708 $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;
709 $context_stack[] = $context;
710 $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
711 break;
712 // Folding White Space
713 case ISEMAIL_STRING_CR:
714 case ISEMAIL_STRING_SP:
715 case ISEMAIL_STRING_HTAB:
716 if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
717
718 $return_status[] = ISEMAIL_CFWS_FWS;
719
720 $context_stack[] = $context;
721 $context = ISEMAIL_CONTEXT_FWS;
722 $token_prior = $token;
723 break;
724 // dtext
725 default:
726 // http://tools.ietf.org/html/rfc5322#section-3.4.1
727 // dtext = %d33-90 / ; Printable US-ASCII
728 // %d94-126 / ; characters not including
729 // obs-dtext ; "[", "]", or "\"
730 //
731 // obs-dtext = obs-NO-WS-CTL / quoted-pair
732 //
733 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
734 // %d11 / ; characters that do not
735 // %d12 / ; include the carriage
736 // %d14-31 / ; return, line feed, and
737 // %d127 ; white space characters
738 $ord = ord($token);
739
740 // CR, LF, SP & HTAB have already been parsed above
741 if (($ord > 127) || ($ord === 0) || ($token === ISEMAIL_STRING_OPENSQBRACKET)) {
742 $return_status[] = ISEMAIL_ERR_EXPECTING_DTEXT; // Fatal error
743 break;
744 } elseif (($ord < 33) || ($ord === 127)) {
745 $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;
746 }
747
748 $parsedata[ISEMAIL_COMPONENT_LITERAL] .= $token;
749 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
750 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
751 $element_len++;
752 }
753
754 break;
755 //-------------------------------------------------------------
756 // Quoted string
757 //-------------------------------------------------------------
758 case ISEMAIL_CONTEXT_QUOTEDSTRING:
759 // http://tools.ietf.org/html/rfc5322#section-3.2.4
760 // quoted-string = [CFWS]
761 // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
762 // [CFWS]
763 //
764 // qcontent = qtext / quoted-pair
765 switch ($token) {
766 // Quoted pair
767 case ISEMAIL_STRING_BACKSLASH:
768 $context_stack[] = $context;
769 $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
770 break;
771 // Folding White Space
772 // Inside a quoted string, spaces are allowed as regular characters.
773 // It's only FWS if we include HTAB or CRLF
774 case ISEMAIL_STRING_CR:
775 case ISEMAIL_STRING_HTAB:
776 if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
777
778 // http://tools.ietf.org/html/rfc5322#section-3.2.2
779 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
780 // structured header field are semantically interpreted as a single
781 // space character.
782
783 // http://tools.ietf.org/html/rfc5322#section-3.2.4
784 // the CRLF in any FWS/CFWS that appears within the quoted-string [is]
785 // semantically "invisible" and therefore not part of the quoted-string
786 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= ISEMAIL_STRING_SP;
787 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= ISEMAIL_STRING_SP;
788 $element_len++;
789
790 $return_status[] = ISEMAIL_CFWS_FWS;
791 $context_stack[] = $context;
792 $context = ISEMAIL_CONTEXT_FWS;
793 $token_prior = $token;
794 break;
795 // End of quoted string
796 case ISEMAIL_STRING_DQUOTE:
797 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
798 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
799 $element_len++;
800 $context_prior = $context;
801 $context = (int) array_pop($context_stack);
802 break;
803 // qtext
804 default:
805 // http://tools.ietf.org/html/rfc5322#section-3.2.4
806 // qtext = %d33 / ; Printable US-ASCII
807 // %d35-91 / ; characters not including
808 // %d93-126 / ; "\" or the quote character
809 // obs-qtext
810 //
811 // obs-qtext = obs-NO-WS-CTL
812 //
813 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
814 // %d11 / ; characters that do not
815 // %d12 / ; include the carriage
816 // %d14-31 / ; return, line feed, and
817 // %d127 ; white space characters
818 $ord = ord($token);
819
820 if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
821 $return_status[] = ISEMAIL_ERR_EXPECTING_QTEXT; // Fatal error
822 } elseif (($ord < 32) || ($ord === 127))
823 $return_status[] = ISEMAIL_DEPREC_QTEXT;
824
825 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
826 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
827 $element_len++;
828 }
829
830 // http://tools.ietf.org/html/rfc5322#section-3.4.1
831 // If the
832 // string can be represented as a dot-atom (that is, it contains no
833 // characters other than atext characters or "." surrounded by atext
834 // characters), then the dot-atom form SHOULD be used and the quoted-
835 // string form SHOULD NOT be used.
836 // To do
837 break;
838 //-------------------------------------------------------------
839 // Quoted pair
840 //-------------------------------------------------------------
841 case ISEMAIL_CONTEXT_QUOTEDPAIR:
842 // http://tools.ietf.org/html/rfc5322#section-3.2.1
843 // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
844 //
845 // VCHAR = %d33-126 ; visible (printing) characters
846 // WSP = SP / HTAB ; white space
847 //
848 // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
849 //
850 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
851 // %d11 / ; characters that do not
852 // %d12 / ; include the carriage
853 // %d14-31 / ; return, line feed, and
854 // %d127 ; white space characters
855 //
856 // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
857 $ord = ord($token);
858
859 if ($ord > 127)
860 $return_status[] = ISEMAIL_ERR_EXPECTING_QPAIR; // Fatal error
861 elseif ((($ord < 31) && ($ord !== 9)) || ($ord === 127)) // SP & HTAB are allowed
862 $return_status[] = ISEMAIL_DEPREC_QP;
863
864 // At this point we know where this qpair occurred so
865 // we could check to see if the character actually
866 // needed to be quoted at all.
867 // http://tools.ietf.org/html/rfc5321#section-4.1.2
868 // the sending system SHOULD transmit the
869 // form that uses the minimum quoting possible.
870 // To do: check whether the character needs to be quoted (escaped) in this context
871 $context_prior = $context;
872 $context = (int) array_pop($context_stack); // End of qpair
873 $token = ISEMAIL_STRING_BACKSLASH . $token;
874
875 switch ($context) {
876 case ISEMAIL_CONTEXT_COMMENT:
877 break;
878 case ISEMAIL_CONTEXT_QUOTEDSTRING:
879 $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;
880 $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;
881 $element_len += 2; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
882 break;
883 case ISEMAIL_COMPONENT_LITERAL:
884 $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;
885 $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;
886 $element_len += 2; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
887 break;
888 default:
889 die("Quoted pair logic invoked in an invalid context: $context");
890 }
891
892 break;
893 //-------------------------------------------------------------
894 // Comment
895 //-------------------------------------------------------------
896 case ISEMAIL_CONTEXT_COMMENT:
897 // http://tools.ietf.org/html/rfc5322#section-3.2.2
898 // comment = "(" *([FWS] ccontent) [FWS] ")"
899 //
900 // ccontent = ctext / quoted-pair / comment
901 switch ($token) {
902 // Nested comment
903 case ISEMAIL_STRING_OPENPARENTHESIS:
904 // Nested comments are OK
905 $context_stack[] = $context;
906 $context = ISEMAIL_CONTEXT_COMMENT;
907 break;
908 // End of comment
909 case ISEMAIL_STRING_CLOSEPARENTHESIS:
910 $context_prior = $context;
911 $context = (int) array_pop($context_stack);
912
913 // http://tools.ietf.org/html/rfc5322#section-3.2.2
914 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
915 // structured header field are semantically interpreted as a single
916 // space character.
917 //
918 // is_email() author's note: This *cannot* mean that we must add a
919 // space to the address wherever CFWS appears. This would result in
920 // any addr-spec that had CFWS outside a quoted string being invalid
921 // for RFC 5321.
922 // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
923 // $parsedata[$context] .= ISEMAIL_STRING_SP;
924 // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
925 // $element_len++;
926 // }
927
928 break;
929 // Quoted pair
930 case ISEMAIL_STRING_BACKSLASH:
931 $context_stack[] = $context;
932 $context = ISEMAIL_CONTEXT_QUOTEDPAIR;
933 break;
934 // Folding White Space
935 case ISEMAIL_STRING_CR:
936 case ISEMAIL_STRING_SP:
937 case ISEMAIL_STRING_HTAB:
938 if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error
939
940 $return_status[] = ISEMAIL_CFWS_FWS;
941
942 $context_stack[] = $context;
943 $context = ISEMAIL_CONTEXT_FWS;
944 $token_prior = $token;
945 break;
946 // ctext
947 default:
948 // http://tools.ietf.org/html/rfc5322#section-3.2.3
949 // ctext = %d33-39 / ; Printable US-ASCII
950 // %d42-91 / ; characters not including
951 // %d93-126 / ; "(", ")", or "\"
952 // obs-ctext
953 //
954 // obs-ctext = obs-NO-WS-CTL
955 //
956 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
957 // %d11 / ; characters that do not
958 // %d12 / ; include the carriage
959 // %d14-31 / ; return, line feed, and
960 // %d127 ; white space characters
961 $ord = ord($token);
962
963 if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
964 $return_status[] = ISEMAIL_ERR_EXPECTING_CTEXT; // Fatal error
965 break;
966 } elseif (($ord < 32) || ($ord === 127)) {
967 $return_status[] = ISEMAIL_DEPREC_CTEXT;
968 }
969 }
970
971 break;
972 //-------------------------------------------------------------
973 // Folding White Space
974 //-------------------------------------------------------------
975 case ISEMAIL_CONTEXT_FWS:
976 // http://tools.ietf.org/html/rfc5322#section-3.2.2
977 // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
978 // ; Folding white space
979
980 // But note the erratum:
981 // http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908:
982 // In the obsolete syntax, any amount of folding white space MAY be
983 // inserted where the obs-FWS rule is allowed. This creates the
984 // possibility of having two consecutive "folds" in a line, and
985 // therefore the possibility that a line which makes up a folded header
986 // field could be composed entirely of white space.
987 //
988 // obs-FWS = 1*([CRLF] WSP)
989 if ($token_prior === ISEMAIL_STRING_CR) {
990 if ($token === ISEMAIL_STRING_CR) {
991 $return_status[] = ISEMAIL_ERR_FWS_CRLF_X2; // Fatal error
992 break;
993 }
994
995 if (isset($crlf_count)) {
996 if (++$crlf_count > 1)
997 $return_status[] = ISEMAIL_DEPREC_FWS; // Multiple folds = obsolete FWS
998 } else $crlf_count = 1;
999 }
1000
1001 switch ($token) {
1002 case ISEMAIL_STRING_CR:
1003 if ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))
1004 $return_status[] = ISEMAIL_ERR_CR_NO_LF; // Fatal error
1005
1006 break;
1007 case ISEMAIL_STRING_SP:
1008 case ISEMAIL_STRING_HTAB:
1009 break;
1010 default:
1011 if ($token_prior === ISEMAIL_STRING_CR) {
1012 $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error
1013 break;
1014 }
1015
1016 if (isset($crlf_count)) unset($crlf_count);
1017
1018 $context_prior = $context;
1019 $context = (int) array_pop($context_stack); // End of FWS
1020
1021 // http://tools.ietf.org/html/rfc5322#section-3.2.2
1022 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
1023 // structured header field are semantically interpreted as a single
1024 // space character.
1025 //
1026 // is_email() author's note: This *cannot* mean that we must add a
1027 // space to the address wherever CFWS appears. This would result in
1028 // any addr-spec that had CFWS outside a quoted string being invalid
1029 // for RFC 5321.
1030 // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
1031 // $parsedata[$context] .= ISEMAIL_STRING_SP;
1032 // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
1033 // $element_len++;
1034 // }
1035
1036 $i--; // Look at this token again in the parent context
1037 }
1038
1039 $token_prior = $token;
1040 break;
1041 //-------------------------------------------------------------
1042 // A context we aren't expecting
1043 //-------------------------------------------------------------
1044 default:
1045 die("Unknown context: $context");
1046 }
1047
1048 //-echo "<td>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</td></tr>"; // debug
1049 if ((int) max($return_status) > ISEMAIL_RFC5322) break; // No point going on if we've got a fatal error
1050 }
1051
1052 // Some simple final tests
1053 if ((int) max($return_status) < ISEMAIL_RFC5322) {
1054 if ($context === ISEMAIL_CONTEXT_QUOTEDSTRING) $return_status[] = ISEMAIL_ERR_UNCLOSEDQUOTEDSTR; // Fatal error
1055 elseif ($context === ISEMAIL_CONTEXT_QUOTEDPAIR) $return_status[] = ISEMAIL_ERR_BACKSLASHEND; // Fatal error
1056 elseif ($context === ISEMAIL_CONTEXT_COMMENT) $return_status[] = ISEMAIL_ERR_UNCLOSEDCOMMENT; // Fatal error
1057 elseif ($context === ISEMAIL_COMPONENT_LITERAL) $return_status[] = ISEMAIL_ERR_UNCLOSEDDOMLIT; // Fatal error
1058 elseif ($token === ISEMAIL_STRING_CR) $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error
1059 elseif ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') $return_status[] = ISEMAIL_ERR_NODOMAIN; // Fatal error
1060 elseif ($element_len === 0) $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error
1061 elseif ($hyphen_flag) $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error
1062 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2
1063 // The maximum total length of a domain name or number is 255 octets.
1064 elseif (strlen($parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 255)
1065 $return_status[] = ISEMAIL_RFC5322_DOMAIN_TOOLONG;
1066 // http://tools.ietf.org/html/rfc5321#section-4.1.2
1067 // Forward-path = Path
1068 //
1069 // Path = "<" [ A-d-l ":" ] Mailbox ">"
1070 //
1071 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3
1072 // The maximum total length of a reverse-path or forward-path is 256
1073 // octets (including the punctuation and element separators).
1074 //
1075 // Thus, even without (obsolete) routing information, the Mailbox can
1076 // only be 254 characters long. This is confirmed by this verified
1077 // erratum to RFC 3696:
1078 //
1079 // http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690
1080 // However, there is a restriction in RFC 2821 on the length of an
1081 // address in MAIL and RCPT commands of 254 characters. Since addresses
1082 // that do not fit in those fields are not normally useful, the upper
1083 // limit on address lengths should normally be considered to be 254.
1084 elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART] . ISEMAIL_STRING_AT . $parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 254)
1085 $return_status[] = ISEMAIL_RFC5322_TOOLONG;
1086 // http://tools.ietf.org/html/rfc1035#section-2.3.4
1087 // labels 63 octets or less
1088 elseif ($element_len > 63) $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;
1089 }
1090
1091 // Check DNS?
1092 $dns_checked = false;
1093
1094 if ($checkDNS && ((int) max($return_status) < ISEMAIL_DNSWARN) && function_exists('dns_get_record')) {
1095 // http://tools.ietf.org/html/rfc5321#section-2.3.5
1096 // Names that can
1097 // be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
1098 // in Section 5) are permitted, as are CNAME RRs whose targets can be
1099 // resolved, in turn, to MX or address RRs.
1100 //
1101 // http://tools.ietf.org/html/rfc5321#section-5.1
1102 // The lookup first attempts to locate an MX record associated with the
1103 // name. If a CNAME record is found, the resulting name is processed as
1104 // if it were the initial name. ... If an empty list of MXs is returned,
1105 // the address is treated as if it was associated with an implicit MX
1106 // RR, with a preference of 0, pointing to that host.
1107 //
1108 // is_email() author's note: We will regard the existence of a CNAME to be
1109 // sufficient evidence of the domain's existence. For performance reasons
1110 // we will not repeat the DNS lookup for the CNAME's target, but we will
1111 // raise a warning because we didn't immediately find an MX record.
1112 if ($element_count === 0) $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= '.'; // Checking TLD DNS seems to work only if you explicitly check from the root
1113
1114 $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_MX); // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id=51844)
1115
1116 if ((is_bool($result) && !(bool) $result))
1117 $return_status[] = ISEMAIL_DNSWARN_NO_RECORD; // Domain can't be found in DNS
1118 else {
1119 if (count($result) === 0) {
1120 $return_status[] = ISEMAIL_DNSWARN_NO_MX_RECORD; // MX-record for domain can't be found
1121 $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_A + DNS_CNAME);
1122
1123 if (count($result) === 0)
1124 $return_status[] = ISEMAIL_DNSWARN_NO_RECORD; // No usable records for the domain can be found
1125 } else $dns_checked = true;
1126 }
1127 }
1128
1129 // Check for TLD addresses
1130 // -----------------------
1131 // TLD addresses are specifically allowed in RFC 5321 but they are
1132 // unusual to say the least. We will allocate a separate
1133 // status to these addresses on the basis that they are more likely
1134 // to be typos than genuine addresses (unless we've already
1135 // established that the domain does have an MX record)
1136 //
1137 // http://tools.ietf.org/html/rfc5321#section-2.3.5
1138 // In the case
1139 // of a top-level domain used by itself in an email address, a single
1140 // string is used without any dots. This makes the requirement,
1141 // described in more detail below, that only fully-qualified domain
1142 // names appear in SMTP transactions on the public Internet,
1143 // particularly important where top-level domains are involved.
1144 //
1145 // TLD format
1146 // ----------
1147 // The format of TLDs has changed a number of times. The standards
1148 // used by IANA have been largely ignored by ICANN, leading to
1149 // confusion over the standards being followed. These are not defined
1150 // anywhere, except as a general component of a DNS host name (a label).
1151 // However, this could potentially lead to 123.123.123.123 being a
1152 // valid DNS name (rather than an IP address) and thereby creating
1153 // an ambiguity. The most authoritative statement on TLD formats that
1154 // the author can find is in a (rejected!) erratum to RFC 1123
1155 // submitted by John Klensin, the author of RFC 5321:
1156 //
1157 // http://www.rfc-editor.org/errata_search.php?rfc=1123&eid=1353
1158 // However, a valid host name can never have the dotted-decimal
1159 // form #.#.#.#, since this change does not permit the highest-level
1160 // component label to start with a digit even if it is not all-numeric.
1161 if (!$dns_checked && ((int) max($return_status) < ISEMAIL_DNSWARN)) {
1162 if ($element_count === 0) $return_status[] = ISEMAIL_RFC5321_TLD;
1163
1164 if (is_numeric($atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count][0]))
1165 $return_status[] = ISEMAIL_RFC5321_TLDNUMERIC;
1166 }
1167
1168 $return_status = array_unique($return_status);
1169 $final_status = (int) max($return_status);
1170
1171 if (count($return_status) !== 1) array_shift($return_status); // remove redundant ISEMAIL_VALID
1172
1173 $parsedata['status'] = $return_status;
1174
1175 if ($final_status < $threshold) $final_status = ISEMAIL_VALID;
1176
1177 return ($diagnose) ? $final_status : ($final_status < ISEMAIL_THRESHOLD);
1178 }
1179 ?>