--- /dev/null
+<?php\r
+/**\r
+ * To validate an email address according to RFCs 5321, 5322 and others\r
+ * \r
+ * Copyright © 2008-2011, Dominic Sayers <br>\r
+ * Test schema documentation Copyright © 2011, Daniel Marschall <br>\r
+ * All rights reserved.\r
+ * \r
+ * Redistribution and use in source and binary forms, with or without modification,\r
+ * are permitted provided that the following conditions are met:\r
+ * \r
+ * - Redistributions of source code must retain the above copyright notice,\r
+ * this list of conditions and the following disclaimer.\r
+ * - Redistributions in binary form must reproduce the above copyright notice,\r
+ * this list of conditions and the following disclaimer in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ * - Neither the name of Dominic Sayers nor the names of its contributors may be\r
+ * used to endorse or promote products derived from this software without\r
+ * specific prior written permission.\r
+ * \r
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND\r
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\r
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR\r
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\r
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\r
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\r
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+ * \r
+ * @package is_email\r
+ * @author Dominic Sayers <dominic@sayers.cc>\r
+ * @copyright 2008-2011 Dominic Sayers\r
+ * @license http://www.opensource.org/licenses/bsd-license.php BSD License\r
+ * @link http://www.dominicsayers.com/isemail\r
+ * @version 3.01.1 - Fixed examples and readme.txt\r
+ */\r
+\r
+// The quality of this code has been improved greatly by using PHPLint\r
+// Copyright (c) 2010 Umberto Salsi\r
+// This is free software; see the license for copying conditions.\r
+// More info: http://www.icosaedro.it/phplint/\r
+/*.\r
+ require_module 'standard';\r
+ require_module 'pcre';\r
+.*/\r
+\r
+if (!defined('ISEMAIL_VALID')) {\r
+/*:diagnostic constants start:*/\r
+// This part of the code is generated using data from test/meta.xml. Beware of making manual alterations\r
+ // Categories\r
+ define('ISEMAIL_VALID_CATEGORY', 1);\r
+ define('ISEMAIL_DNSWARN', 7);\r
+ define('ISEMAIL_RFC5321', 15);\r
+ define('ISEMAIL_CFWS', 31);\r
+ define('ISEMAIL_DEPREC', 63);\r
+ define('ISEMAIL_RFC5322', 127);\r
+ define('ISEMAIL_ERR', 255);\r
+\r
+ // Diagnoses\r
+ // Address is valid\r
+ define('ISEMAIL_VALID', 0);\r
+ // Address is valid but a DNS check was not successful\r
+ define('ISEMAIL_DNSWARN_NO_MX_RECORD', 5);\r
+ define('ISEMAIL_DNSWARN_NO_RECORD', 6);\r
+ // Address is valid for SMTP but has unusual elements\r
+ define('ISEMAIL_RFC5321_TLD', 9);\r
+ define('ISEMAIL_RFC5321_TLDNUMERIC', 10);\r
+ define('ISEMAIL_RFC5321_QUOTEDSTRING', 11);\r
+ define('ISEMAIL_RFC5321_ADDRESSLITERAL', 12);\r
+ define('ISEMAIL_RFC5321_IPV6DEPRECATED', 13);\r
+ // Address is valid within the message but cannot be used unmodified for the envelope\r
+ define('ISEMAIL_CFWS_COMMENT', 17);\r
+ define('ISEMAIL_CFWS_FWS', 18);\r
+ // Address contains deprecated elements but may still be valid in restricted contexts\r
+ define('ISEMAIL_DEPREC_LOCALPART', 33);\r
+ define('ISEMAIL_DEPREC_FWS', 34);\r
+ define('ISEMAIL_DEPREC_QTEXT', 35);\r
+ define('ISEMAIL_DEPREC_QP', 36);\r
+ define('ISEMAIL_DEPREC_COMMENT', 37);\r
+ define('ISEMAIL_DEPREC_CTEXT', 38);\r
+ define('ISEMAIL_DEPREC_CFWS_NEAR_AT', 49);\r
+ // The address is only valid according to the broad definition of RFC 5322. It is otherwise invalid.\r
+ define('ISEMAIL_RFC5322_DOMAIN', 65);\r
+ define('ISEMAIL_RFC5322_TOOLONG', 66);\r
+ define('ISEMAIL_RFC5322_LOCAL_TOOLONG', 67);\r
+ define('ISEMAIL_RFC5322_DOMAIN_TOOLONG', 68);\r
+ define('ISEMAIL_RFC5322_LABEL_TOOLONG', 69);\r
+ define('ISEMAIL_RFC5322_DOMAINLITERAL', 70);\r
+ define('ISEMAIL_RFC5322_DOMLIT_OBSDTEXT', 71);\r
+ define('ISEMAIL_RFC5322_IPV6_GRPCOUNT', 72);\r
+ define('ISEMAIL_RFC5322_IPV6_2X2XCOLON', 73);\r
+ define('ISEMAIL_RFC5322_IPV6_BADCHAR', 74);\r
+ define('ISEMAIL_RFC5322_IPV6_MAXGRPS', 75);\r
+ define('ISEMAIL_RFC5322_IPV6_COLONSTRT', 76);\r
+ define('ISEMAIL_RFC5322_IPV6_COLONEND', 77);\r
+ // Address is invalid for any purpose\r
+ define('ISEMAIL_ERR_EXPECTING_DTEXT', 129);\r
+ define('ISEMAIL_ERR_NOLOCALPART', 130);\r
+ define('ISEMAIL_ERR_NODOMAIN', 131);\r
+ define('ISEMAIL_ERR_CONSECUTIVEDOTS', 132);\r
+ define('ISEMAIL_ERR_ATEXT_AFTER_CFWS', 133);\r
+ define('ISEMAIL_ERR_ATEXT_AFTER_QS', 134);\r
+ define('ISEMAIL_ERR_ATEXT_AFTER_DOMLIT', 135);\r
+ define('ISEMAIL_ERR_EXPECTING_QPAIR', 136);\r
+ define('ISEMAIL_ERR_EXPECTING_ATEXT', 137);\r
+ define('ISEMAIL_ERR_EXPECTING_QTEXT', 138);\r
+ define('ISEMAIL_ERR_EXPECTING_CTEXT', 139);\r
+ define('ISEMAIL_ERR_BACKSLASHEND', 140);\r
+ define('ISEMAIL_ERR_DOT_START', 141);\r
+ define('ISEMAIL_ERR_DOT_END', 142);\r
+ define('ISEMAIL_ERR_DOMAINHYPHENSTART', 143);\r
+ define('ISEMAIL_ERR_DOMAINHYPHENEND', 144);\r
+ define('ISEMAIL_ERR_UNCLOSEDQUOTEDSTR', 145);\r
+ define('ISEMAIL_ERR_UNCLOSEDCOMMENT', 146);\r
+ define('ISEMAIL_ERR_UNCLOSEDDOMLIT', 147);\r
+ define('ISEMAIL_ERR_FWS_CRLF_X2', 148);\r
+ define('ISEMAIL_ERR_FWS_CRLF_END', 149);\r
+ define('ISEMAIL_ERR_CR_NO_LF', 150);\r
+// End of generated code\r
+/*:diagnostic constants end:*/\r
+\r
+ // function control\r
+ define('ISEMAIL_THRESHOLD' , 16);\r
+\r
+ // Email parts\r
+ define('ISEMAIL_COMPONENT_LOCALPART' , 0);\r
+ define('ISEMAIL_COMPONENT_DOMAIN' , 1);\r
+ define('ISEMAIL_COMPONENT_LITERAL' , 2);\r
+ define('ISEMAIL_CONTEXT_COMMENT' , 3);\r
+ define('ISEMAIL_CONTEXT_FWS' , 4);\r
+ define('ISEMAIL_CONTEXT_QUOTEDSTRING' , 5);\r
+ define('ISEMAIL_CONTEXT_QUOTEDPAIR' , 6);\r
+\r
+ // Miscellaneous string constants\r
+ define('ISEMAIL_STRING_AT' , '@');\r
+ define('ISEMAIL_STRING_BACKSLASH' , '\\');\r
+ define('ISEMAIL_STRING_DOT' , '.');\r
+ define('ISEMAIL_STRING_DQUOTE' , '"');\r
+ define('ISEMAIL_STRING_OPENPARENTHESIS' , '(');\r
+ define('ISEMAIL_STRING_CLOSEPARENTHESIS', ')');\r
+ define('ISEMAIL_STRING_OPENSQBRACKET' , '[');\r
+ define('ISEMAIL_STRING_CLOSESQBRACKET' , ']');\r
+ define('ISEMAIL_STRING_HYPHEN' , '-');\r
+ define('ISEMAIL_STRING_COLON' , ':');\r
+ define('ISEMAIL_STRING_DOUBLECOLON' , '::');\r
+ define('ISEMAIL_STRING_SP' , ' ');\r
+ define('ISEMAIL_STRING_HTAB' , "\t");\r
+ define('ISEMAIL_STRING_CR' , "\r");\r
+ define('ISEMAIL_STRING_LF' , "\n");\r
+ define('ISEMAIL_STRING_IPV6TAG' , 'IPv6:');\r
+ // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3)\r
+ define('ISEMAIL_STRING_SPECIALS' , '()<>[]:;@\\,."');\r
+}\r
+\r
+/**\r
+ * Check that an email address conforms to RFCs 5321, 5322 and others\r
+ *\r
+ * As of Version 3.0, we are now distinguishing clearly between a Mailbox\r
+ * as defined by RFC 5321 and an addr-spec as defined by RFC 5322. Depending\r
+ * on the context, either can be regarded as a valid email address. The\r
+ * RFC 5321 Mailbox specification is more restrictive (comments, white space\r
+ * and obsolete forms are not allowed)\r
+ *\r
+ * @param string $email The email address to check\r
+ * @param boolean $checkDNS If true then a DNS check for MX records will be made\r
+ * @param mixed $errorlevel Determines the boundary between valid and invalid addresses.\r
+ * Status codes above this number will be returned as-is,\r
+ * status codes below will be returned as ISEMAIL_VALID. Thus the\r
+ * calling program can simply look for ISEMAIL_VALID if it is\r
+ * only interested in whether an address is valid or not. The\r
+ * errorlevel will determine how "picky" is_email() is about\r
+ * the address.\r
+ *\r
+ * If omitted or passed as false then is_email() will return\r
+ * true or false rather than an integer error or warning.\r
+ *\r
+ * NB Note the difference between $errorlevel = false and\r
+ * $errorlevel = 0\r
+ * @param array $parsedata If passed, returns the parsed address components\r
+ */\r
+/*.mixed.*/ function is_email($email, $checkDNS = false, $errorlevel = false, &$parsedata = array()) {\r
+ // Check that $email is a valid address. Read the following RFCs to understand the constraints:\r
+ // (http://tools.ietf.org/html/rfc5321)\r
+ // (http://tools.ietf.org/html/rfc5322)\r
+ // (http://tools.ietf.org/html/rfc4291#section-2.2)\r
+ // (http://tools.ietf.org/html/rfc1123#section-2.1)\r
+ // (http://tools.ietf.org/html/rfc3696) (guidance only)\r
+// version 2.0: Enhance $diagnose parameter to $errorlevel\r
+// version 3.0: Introduced status categories\r
+// revision 3.1: BUG: $parsedata was passed by value instead of by reference\r
+\r
+ if (is_bool($errorlevel)) {\r
+ $threshold = ISEMAIL_VALID;\r
+ $diagnose = (bool) $errorlevel;\r
+ } else {\r
+ $diagnose = true;\r
+\r
+ switch ((int) $errorlevel) {\r
+ case E_WARNING: $threshold = ISEMAIL_THRESHOLD; break; // For backward compatibility\r
+ case E_ERROR: $threshold = ISEMAIL_VALID; break; // For backward compatibility\r
+ default: $threshold = (int) $errorlevel;\r
+ }\r
+ }\r
+\r
+ $return_status = array(ISEMAIL_VALID);\r
+\r
+ // Parse the address into components, character by character\r
+ $raw_length = strlen($email);\r
+ $context = ISEMAIL_COMPONENT_LOCALPART; // Where we are\r
+ $context_stack = array($context); // Where we have been\r
+ $context_prior = ISEMAIL_COMPONENT_LOCALPART; // Where we just came from\r
+ $token = ''; // The current character\r
+ $token_prior = ''; // The previous character\r
+ $parsedata = array(\r
+ ISEMAIL_COMPONENT_LOCALPART => '',\r
+ ISEMAIL_COMPONENT_DOMAIN => ''\r
+ ); // For the components of the address\r
+\r
+ $atomlist = array(\r
+ ISEMAIL_COMPONENT_LOCALPART => array(''),\r
+ ISEMAIL_COMPONENT_DOMAIN => array('')\r
+ ); // For the dot-atom elements of the address\r
+ $element_count = 0;\r
+ $element_len = 0;\r
+ $hyphen_flag = false; // Hyphen cannot occur at the end of a subdomain\r
+ $end_or_die = false; // CFWS can only appear at the end of the element\r
+\r
+//-echo "<table style=\"clear:left;\">"; // debug\r
+ for ($i = 0; $i < $raw_length; $i++) {\r
+ $token = $email[$i];\r
+//-echo "<tr><td><strong>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</strong></td>"; // debug\r
+\r
+ switch ($context) {\r
+ //-------------------------------------------------------------\r
+ // local-part\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_COMPONENT_LOCALPART:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // local-part = dot-atom / quoted-string / obs-local-part\r
+ //\r
+ // dot-atom = [CFWS] dot-atom-text [CFWS]\r
+ //\r
+ // dot-atom-text = 1*atext *("." 1*atext)\r
+ //\r
+ // quoted-string = [CFWS]\r
+ // DQUOTE *([FWS] qcontent) [FWS] DQUOTE\r
+ // [CFWS]\r
+ //\r
+ // obs-local-part = word *("." word)\r
+ //\r
+ // word = atom / quoted-string\r
+ //\r
+ // atom = [CFWS] 1*atext [CFWS]\r
+ switch ($token) {\r
+ // Comment\r
+ case ISEMAIL_STRING_OPENPARENTHESIS:\r
+ if ($element_len === 0)\r
+ // Comments are OK at the beginning of an element\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_COMMENT : ISEMAIL_DEPREC_COMMENT;\r
+ else {\r
+ $return_status[] = ISEMAIL_CFWS_COMMENT;\r
+ $end_or_die = true; // We can't start a comment in the middle of an element, so this better be the end\r
+ }\r
+\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_COMMENT;\r
+ break;\r
+ // Next dot-atom element\r
+ case ISEMAIL_STRING_DOT:\r
+ if ($element_len === 0)\r
+ // Another dot, already?\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error\r
+ else\r
+ // The entire local-part can be a quoted string for RFC 5321\r
+ // If it's just one atom that is quoted then it's an RFC 5322 obsolete form\r
+ if ($end_or_die) $return_status[] = ISEMAIL_DEPREC_LOCALPART;\r
+\r
+ $end_or_die = false; // CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms)\r
+ $element_len = 0;\r
+ $element_count++;\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] = '';\r
+\r
+ break;\r
+ // Quoted string\r
+ case ISEMAIL_STRING_DQUOTE:\r
+ if ($element_len === 0) {\r
+ // The entire local-part can be a quoted string for RFC 5321\r
+ // If it's just one atom that is quoted then it's an RFC 5322 obsolete form\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_RFC5321_QUOTEDSTRING : ISEMAIL_DEPREC_LOCALPART;\r
+\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;\r
+ $element_len++;\r
+ $end_or_die = true; // Quoted string must be the entire element\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_QUOTEDSTRING;\r
+ } else {\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error\r
+ }\r
+\r
+ break;\r
+ // Folding White Space\r
+ case ISEMAIL_STRING_CR:\r
+ case ISEMAIL_STRING_SP:\r
+ case ISEMAIL_STRING_HTAB:\r
+ if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error\r
+\r
+ if ($element_len === 0)\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_FWS : ISEMAIL_DEPREC_FWS;\r
+ else\r
+ $end_or_die = true; // We can't start FWS in the middle of an element, so this better be the end\r
+\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_FWS;\r
+ $token_prior = $token;\r
+\r
+ break;\r
+ // @\r
+ case ISEMAIL_STRING_AT:\r
+ // At this point we should have a valid local-part\r
+ if (count($context_stack) !== 1) die('Unexpected item on context stack');\r
+\r
+ if ($parsedata[ISEMAIL_COMPONENT_LOCALPART] === '')\r
+ $return_status[] = ISEMAIL_ERR_NOLOCALPART; // Fatal error\r
+ elseif ($element_len === 0) $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error\r
+ // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1\r
+ // The maximum total length of a user name or other local-part is 64\r
+ // octets.\r
+ elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART]) > 64)\r
+ $return_status[] = ISEMAIL_RFC5322_LOCAL_TOOLONG;\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // Comments and folding white space\r
+ // SHOULD NOT be used around the "@" in the addr-spec.\r
+ //\r
+ // http://tools.ietf.org/html/rfc2119\r
+ // 4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that\r
+ // there may exist valid reasons in particular circumstances when the\r
+ // particular behavior is acceptable or even useful, but the full\r
+ // implications should be understood and the case carefully weighed\r
+ // before implementing any behavior described with this label.\r
+ elseif (($context_prior === ISEMAIL_CONTEXT_COMMENT) || ($context_prior === ISEMAIL_CONTEXT_FWS))\r
+ $return_status[] = ISEMAIL_DEPREC_CFWS_NEAR_AT;\r
+\r
+ // Clear everything down for the domain parsing\r
+ $context = ISEMAIL_COMPONENT_DOMAIN; // Where we are\r
+ $context_stack = array($context); // Where we have been\r
+ $element_count = 0;\r
+ $element_len = 0;\r
+ $end_or_die = false; // CFWS can only appear at the end of the element\r
+\r
+ break;\r
+ // atext\r
+ default:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.3\r
+ // atext = ALPHA / DIGIT / ; Printable US-ASCII\r
+ // "!" / "#" / ; characters not including\r
+ // "$" / "%" / ; specials. Used for atoms.\r
+ // "&" / "'" /\r
+ // "*" / "+" /\r
+ // "-" / "/" /\r
+ // "=" / "?" /\r
+ // "^" / "_" /\r
+ // "`" / "{" /\r
+ // "|" / "}" /\r
+ // "~"\r
+ if ($end_or_die) {\r
+ // We have encountered atext where it is no longer valid\r
+ switch ($context_prior) {\r
+ case ISEMAIL_CONTEXT_COMMENT:\r
+ case ISEMAIL_CONTEXT_FWS:\r
+ $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;\r
+ break;\r
+ case ISEMAIL_CONTEXT_QUOTEDSTRING:\r
+ $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_QS;\r
+ break;\r
+ default:\r
+ die ("More atext found where none is allowed, but unrecognised prior context: $context_prior");\r
+ }\r
+ } else {\r
+ $context_prior = $context;\r
+ $ord = ord($token);\r
+\r
+ if (($ord < 33) || ($ord > 126) || ($ord === 10) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token))))\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error\r
+\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;\r
+ $element_len++;\r
+ }\r
+ }\r
+\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // Domain\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_COMPONENT_DOMAIN:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // domain = dot-atom / domain-literal / obs-domain\r
+ //\r
+ // dot-atom = [CFWS] dot-atom-text [CFWS]\r
+ //\r
+ // dot-atom-text = 1*atext *("." 1*atext)\r
+ //\r
+ // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]\r
+ //\r
+ // dtext = %d33-90 / ; Printable US-ASCII\r
+ // %d94-126 / ; characters not including\r
+ // obs-dtext ; "[", "]", or "\"\r
+ //\r
+ // obs-domain = atom *("." atom)\r
+ //\r
+ // atom = [CFWS] 1*atext [CFWS]\r
+\r
+\r
+ // http://tools.ietf.org/html/rfc5321#section-4.1.2\r
+ // Mailbox = Local-part "@" ( Domain / address-literal )\r
+ //\r
+ // Domain = sub-domain *("." sub-domain)\r
+ //\r
+ // address-literal = "[" ( IPv4-address-literal /\r
+ // IPv6-address-literal /\r
+ // General-address-literal ) "]"\r
+ // ; See Section 4.1.3\r
+\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // Note: A liberal syntax for the domain portion of addr-spec is\r
+ // given here. However, the domain portion contains addressing\r
+ // information specified by and used in other protocols (e.g.,\r
+ // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore\r
+ // incumbent upon implementations to conform to the syntax of\r
+ // addresses for the context in which they are used.\r
+ // is_email() author's note: it's not clear how to interpret this in\r
+ // the context of a general email address validator. The conclusion I\r
+ // have reached is this: "addressing information" must comply with\r
+ // RFC 5321 (and in turn RFC 1035), anything that is "semantically\r
+ // invisible" must comply only with RFC 5322.\r
+ switch ($token) {\r
+ // Comment\r
+ case ISEMAIL_STRING_OPENPARENTHESIS:\r
+ if ($element_len === 0)\r
+ // Comments at the start of the domain are deprecated in the text\r
+ // Comments at the start of a subdomain are obs-domain\r
+ // (http://tools.ietf.org/html/rfc5322#section-3.4.1)\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_COMMENT;\r
+ else {\r
+ $return_status[] = ISEMAIL_CFWS_COMMENT;\r
+ $end_or_die = true; // We can't start a comment in the middle of an element, so this better be the end\r
+ }\r
+\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_COMMENT;\r
+ break;\r
+ // Next dot-atom element\r
+ case ISEMAIL_STRING_DOT:\r
+ if ($element_len === 0)\r
+ // Another dot, already?\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_ERR_DOT_START : ISEMAIL_ERR_CONSECUTIVEDOTS; // Fatal error\r
+ elseif ($hyphen_flag)\r
+ // Previous subdomain ended in a hyphen\r
+ $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error\r
+ else\r
+ // Nowhere in RFC 5321 does it say explicitly that the\r
+ // domain part of a Mailbox must be a valid domain according\r
+ // to the DNS standards set out in RFC 1035, but this *is*\r
+ // implied in several places. For instance, wherever the idea\r
+ // of host routing is discussed the RFC says that the domain\r
+ // must be looked up in the DNS. This would be nonsense unless\r
+ // the domain was designed to be a valid DNS domain. Hence we\r
+ // must conclude that the RFC 1035 restriction on label length\r
+ // also applies to RFC 5321 domains.\r
+ //\r
+ // http://tools.ietf.org/html/rfc1035#section-2.3.4\r
+ // labels 63 octets or less\r
+ if ($element_len > 63) $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;\r
+\r
+ $end_or_die = false; // CFWS is OK again now we're at the beginning of an element (although it may be obsolete CFWS)\r
+ $element_len = 0;\r
+ $element_count++;\r
+ $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] = '';\r
+ $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;\r
+\r
+ break;\r
+ // Domain literal\r
+ case ISEMAIL_STRING_OPENSQBRACKET:\r
+ if ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') {\r
+ $end_or_die = true; // Domain literal must be the only component\r
+ $element_len++;\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_COMPONENT_LITERAL;\r
+ $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;\r
+ $parsedata[ISEMAIL_COMPONENT_LITERAL] = '';\r
+ } else {\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error\r
+ }\r
+\r
+ break;\r
+ // Folding White Space\r
+ case ISEMAIL_STRING_CR:\r
+ case ISEMAIL_STRING_SP:\r
+ case ISEMAIL_STRING_HTAB:\r
+ if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error\r
+\r
+ if ($element_len === 0)\r
+ $return_status[] = ($element_count === 0) ? ISEMAIL_DEPREC_CFWS_NEAR_AT : ISEMAIL_DEPREC_FWS;\r
+ else {\r
+ $return_status[] = ISEMAIL_CFWS_FWS;\r
+ $end_or_die = true; // We can't start FWS in the middle of an element, so this better be the end\r
+ }\r
+\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_FWS;\r
+ $token_prior = $token;\r
+ break;\r
+ // atext\r
+ default:\r
+ // RFC 5322 allows any atext...\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.3\r
+ // atext = ALPHA / DIGIT / ; Printable US-ASCII\r
+ // "!" / "#" / ; characters not including\r
+ // "$" / "%" / ; specials. Used for atoms.\r
+ // "&" / "'" /\r
+ // "*" / "+" /\r
+ // "-" / "/" /\r
+ // "=" / "?" /\r
+ // "^" / "_" /\r
+ // "`" / "{" /\r
+ // "|" / "}" /\r
+ // "~"\r
+\r
+ // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123)\r
+ // http://tools.ietf.org/html/rfc5321#section-4.1.2\r
+ // sub-domain = Let-dig [Ldh-str]\r
+ //\r
+ // Let-dig = ALPHA / DIGIT\r
+ //\r
+ // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig\r
+ //\r
+ if ($end_or_die) {\r
+ // We have encountered atext where it is no longer valid\r
+ switch ($context_prior) {\r
+ case ISEMAIL_CONTEXT_COMMENT:\r
+ case ISEMAIL_CONTEXT_FWS:\r
+ $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS;\r
+ break;\r
+ case ISEMAIL_COMPONENT_LITERAL:\r
+ $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_DOMLIT;\r
+ break;\r
+ default:\r
+ die ("More atext found where none is allowed, but unrecognised prior context: $context_prior");\r
+ }\r
+ }\r
+\r
+ $ord = ord($token);\r
+ $hyphen_flag = false; // Assume this token isn't a hyphen unless we discover it is\r
+\r
+ if (($ord < 33) || ($ord > 126) || (!is_bool(strpos(ISEMAIL_STRING_SPECIALS, $token)))) {\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT; // Fatal error\r
+ } elseif ($token === ISEMAIL_STRING_HYPHEN) {\r
+ if ($element_len === 0) {\r
+ // Hyphens can't be at the beginning of a subdomain\r
+ $return_status[] = ISEMAIL_ERR_DOMAINHYPHENSTART; // Fatal error\r
+ }\r
+\r
+ $hyphen_flag = true;\r
+ } elseif (!(($ord > 47 && $ord < 58) || ($ord > 64 && $ord < 91) || ($ord > 96 && $ord < 123))) {\r
+ // Not an RFC 5321 subdomain, but still OK by RFC 5322\r
+ $return_status[] = ISEMAIL_RFC5322_DOMAIN;\r
+ }\r
+\r
+ $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;\r
+ $element_len++;\r
+ }\r
+\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // Domain literal\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_COMPONENT_LITERAL:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]\r
+ //\r
+ // dtext = %d33-90 / ; Printable US-ASCII\r
+ // %d94-126 / ; characters not including\r
+ // obs-dtext ; "[", "]", or "\"\r
+ //\r
+ // obs-dtext = obs-NO-WS-CTL / quoted-pair\r
+ switch ($token) {\r
+ // End of domain literal\r
+ case ISEMAIL_STRING_CLOSESQBRACKET:\r
+ if ((int) max($return_status) < ISEMAIL_DEPREC) {\r
+ // Could be a valid RFC 5321 address literal, so let's check\r
+\r
+ // http://tools.ietf.org/html/rfc5321#section-4.1.2\r
+ // address-literal = "[" ( IPv4-address-literal /\r
+ // IPv6-address-literal /\r
+ // General-address-literal ) "]"\r
+ // ; See Section 4.1.3\r
+ //\r
+ // http://tools.ietf.org/html/rfc5321#section-4.1.3\r
+ // IPv4-address-literal = Snum 3("." Snum)\r
+ //\r
+ // IPv6-address-literal = "IPv6:" IPv6-addr\r
+ //\r
+ // General-address-literal = Standardized-tag ":" 1*dcontent\r
+ //\r
+ // Standardized-tag = Ldh-str\r
+ // ; Standardized-tag MUST be specified in a\r
+ // ; Standards-Track RFC and registered with IANA\r
+ //\r
+ // dcontent = %d33-90 / ; Printable US-ASCII\r
+ // %d94-126 ; excl. "[", "\", "]"\r
+ //\r
+ // Snum = 1*3DIGIT\r
+ // ; representing a decimal integer\r
+ // ; value in the range 0 through 255\r
+ //\r
+ // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp\r
+ //\r
+ // IPv6-hex = 1*4HEXDIG\r
+ //\r
+ // IPv6-full = IPv6-hex 7(":" IPv6-hex)\r
+ //\r
+ // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"\r
+ // [IPv6-hex *5(":" IPv6-hex)]\r
+ // ; The "::" represents at least 2 16-bit groups of\r
+ // ; zeros. No more than 6 groups in addition to the\r
+ // ; "::" may be present.\r
+ //\r
+ // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal\r
+ //\r
+ // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"\r
+ // [IPv6-hex *3(":" IPv6-hex) ":"]\r
+ // IPv4-address-literal\r
+ // ; The "::" represents at least 2 16-bit groups of\r
+ // ; zeros. No more than 4 groups in addition to the\r
+ // ; "::" and IPv4-address-literal may be present.\r
+ //\r
+ // is_email() author's note: We can't use ip2long() to validate\r
+ // IPv4 addresses because it accepts abbreviated addresses\r
+ // (xxx.xxx.xxx), expanding the last group to complete the address.\r
+ // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3\r
+ // at least) -- see http://bugs.php.net/bug.php?id=53236 for example\r
+ $max_groups = 8;\r
+ $matchesIP = array();\r
+ /*.mixed.*/ $index = false;\r
+ $addressliteral = $parsedata[ISEMAIL_COMPONENT_LITERAL];\r
+\r
+ // Extract IPv4 part from the end of the address-literal (if there is one)\r
+ if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressliteral, $matchesIP) > 0) {\r
+ $index = strrpos($addressliteral, $matchesIP[0]);\r
+ if ($index !== 0) $addressliteral = substr($addressliteral, 0, $index) . '0:0'; // Convert IPv4 part to IPv6 format for further testing\r
+ }\r
+\r
+ if ($index === 0) {\r
+ // Nothing there except a valid IPv4 address, so...\r
+ $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;\r
+ } elseif (strncasecmp($addressliteral, ISEMAIL_STRING_IPV6TAG, 5) !== 0) {\r
+ $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;\r
+ } else {\r
+ $IPv6 = substr($addressliteral, 5);\r
+ $matchesIP = explode(ISEMAIL_STRING_COLON, $IPv6); // Revision 2.7: Daniel Marschall's new IPv6 testing strategy\r
+ $groupCount = count($matchesIP);\r
+ $index = strpos($IPv6,ISEMAIL_STRING_DOUBLECOLON);\r
+\r
+ if ($index === false) {\r
+ // We need exactly the right number of groups\r
+ if ($groupCount !== $max_groups)\r
+ $return_status[] = ISEMAIL_RFC5322_IPV6_GRPCOUNT;\r
+ } else {\r
+ if ($index !== strrpos($IPv6,ISEMAIL_STRING_DOUBLECOLON))\r
+ $return_status[] = ISEMAIL_RFC5322_IPV6_2X2XCOLON;\r
+ else {\r
+ if ($index === 0 || $index === (strlen($IPv6) - 2)) $max_groups++; // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition\r
+\r
+ if ($groupCount > $max_groups)\r
+ $return_status[] = ISEMAIL_RFC5322_IPV6_MAXGRPS;\r
+ elseif ($groupCount === $max_groups)\r
+ $return_status[] = ISEMAIL_RFC5321_IPV6DEPRECATED; // Eliding a single "::"\r
+ }\r
+ }\r
+\r
+ // Revision 2.7: Daniel Marschall's new IPv6 testing strategy\r
+ if ((substr($IPv6, 0, 1) === ISEMAIL_STRING_COLON) && (substr($IPv6, 1, 1) !== ISEMAIL_STRING_COLON))\r
+ $return_status[] = ISEMAIL_RFC5322_IPV6_COLONSTRT; // Address starts with a single colon\r
+ elseif ((substr($IPv6, -1) === ISEMAIL_STRING_COLON) && (substr($IPv6, -2, 1) !== ISEMAIL_STRING_COLON))\r
+ $return_status[] = ISEMAIL_RFC5322_IPV6_COLONEND; // Address ends with a single colon\r
+ elseif (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0)\r
+ $return_status[] = ISEMAIL_RFC5322_IPV6_BADCHAR; // Check for unmatched characters\r
+ else\r
+ $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL;\r
+ }\r
+ } else\r
+ $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL;\r
+\r
+\r
+ $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;\r
+ $element_len++;\r
+ $context_prior = $context;\r
+ $context = (int) array_pop($context_stack);\r
+ break;\r
+ case ISEMAIL_STRING_BACKSLASH:\r
+ $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_QUOTEDPAIR;\r
+ break;\r
+ // Folding White Space\r
+ case ISEMAIL_STRING_CR:\r
+ case ISEMAIL_STRING_SP:\r
+ case ISEMAIL_STRING_HTAB:\r
+ if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error\r
+\r
+ $return_status[] = ISEMAIL_CFWS_FWS;\r
+\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_FWS;\r
+ $token_prior = $token;\r
+ break;\r
+ // dtext\r
+ default:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // dtext = %d33-90 / ; Printable US-ASCII\r
+ // %d94-126 / ; characters not including\r
+ // obs-dtext ; "[", "]", or "\"\r
+ //\r
+ // obs-dtext = obs-NO-WS-CTL / quoted-pair\r
+ //\r
+ // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control\r
+ // %d11 / ; characters that do not\r
+ // %d12 / ; include the carriage\r
+ // %d14-31 / ; return, line feed, and\r
+ // %d127 ; white space characters\r
+ $ord = ord($token);\r
+\r
+ // CR, LF, SP & HTAB have already been parsed above\r
+ if (($ord > 127) || ($ord === 0) || ($token === ISEMAIL_STRING_OPENSQBRACKET)) {\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_DTEXT; // Fatal error\r
+ break;\r
+ } elseif (($ord < 33) || ($ord === 127)) {\r
+ $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT;\r
+ }\r
+\r
+ $parsedata[ISEMAIL_COMPONENT_LITERAL] .= $token;\r
+ $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;\r
+ $element_len++;\r
+ }\r
+\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // Quoted string\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_CONTEXT_QUOTEDSTRING:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.4\r
+ // quoted-string = [CFWS]\r
+ // DQUOTE *([FWS] qcontent) [FWS] DQUOTE\r
+ // [CFWS]\r
+ //\r
+ // qcontent = qtext / quoted-pair\r
+ switch ($token) {\r
+ // Quoted pair\r
+ case ISEMAIL_STRING_BACKSLASH:\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_QUOTEDPAIR;\r
+ break;\r
+ // Folding White Space\r
+ // Inside a quoted string, spaces are allowed as regular characters.\r
+ // It's only FWS if we include HTAB or CRLF\r
+ case ISEMAIL_STRING_CR:\r
+ case ISEMAIL_STRING_HTAB:\r
+ if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error\r
+\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.2\r
+ // Runs of FWS, comment, or CFWS that occur between lexical tokens in a\r
+ // structured header field are semantically interpreted as a single\r
+ // space character.\r
+\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.4\r
+ // the CRLF in any FWS/CFWS that appears within the quoted-string [is]\r
+ // semantically "invisible" and therefore not part of the quoted-string\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= ISEMAIL_STRING_SP;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= ISEMAIL_STRING_SP;\r
+ $element_len++;\r
+\r
+ $return_status[] = ISEMAIL_CFWS_FWS;\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_FWS;\r
+ $token_prior = $token;\r
+ break;\r
+ // End of quoted string\r
+ case ISEMAIL_STRING_DQUOTE:\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;\r
+ $element_len++;\r
+ $context_prior = $context;\r
+ $context = (int) array_pop($context_stack);\r
+ break;\r
+ // qtext\r
+ default:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.4\r
+ // qtext = %d33 / ; Printable US-ASCII\r
+ // %d35-91 / ; characters not including\r
+ // %d93-126 / ; "\" or the quote character\r
+ // obs-qtext\r
+ //\r
+ // obs-qtext = obs-NO-WS-CTL\r
+ //\r
+ // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control\r
+ // %d11 / ; characters that do not\r
+ // %d12 / ; include the carriage\r
+ // %d14-31 / ; return, line feed, and\r
+ // %d127 ; white space characters\r
+ $ord = ord($token);\r
+\r
+ if (($ord > 127) || ($ord === 0) || ($ord === 10)) {\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_QTEXT; // Fatal error\r
+ } elseif (($ord < 32) || ($ord === 127))\r
+ $return_status[] = ISEMAIL_DEPREC_QTEXT;\r
+\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;\r
+ $element_len++;\r
+ }\r
+\r
+ // http://tools.ietf.org/html/rfc5322#section-3.4.1\r
+ // If the\r
+ // string can be represented as a dot-atom (that is, it contains no\r
+ // characters other than atext characters or "." surrounded by atext\r
+ // characters), then the dot-atom form SHOULD be used and the quoted-\r
+ // string form SHOULD NOT be used.\r
+// To do\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // Quoted pair\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_CONTEXT_QUOTEDPAIR:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.1\r
+ // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp\r
+ //\r
+ // VCHAR = %d33-126 ; visible (printing) characters\r
+ // WSP = SP / HTAB ; white space\r
+ //\r
+ // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)\r
+ //\r
+ // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control\r
+ // %d11 / ; characters that do not\r
+ // %d12 / ; include the carriage\r
+ // %d14-31 / ; return, line feed, and\r
+ // %d127 ; white space characters\r
+ //\r
+ // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)\r
+ $ord = ord($token);\r
+\r
+ if ($ord > 127)\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_QPAIR; // Fatal error\r
+ elseif ((($ord < 31) && ($ord !== 9)) || ($ord === 127)) // SP & HTAB are allowed\r
+ $return_status[] = ISEMAIL_DEPREC_QP;\r
+\r
+ // At this point we know where this qpair occurred so\r
+ // we could check to see if the character actually\r
+ // needed to be quoted at all.\r
+ // http://tools.ietf.org/html/rfc5321#section-4.1.2\r
+ // the sending system SHOULD transmit the\r
+ // form that uses the minimum quoting possible.\r
+// To do: check whether the character needs to be quoted (escaped) in this context\r
+ $context_prior = $context;\r
+ $context = (int) array_pop($context_stack); // End of qpair\r
+ $token = ISEMAIL_STRING_BACKSLASH . $token;\r
+\r
+ switch ($context) {\r
+ case ISEMAIL_CONTEXT_COMMENT:\r
+ break;\r
+ case ISEMAIL_CONTEXT_QUOTEDSTRING:\r
+ $parsedata[ISEMAIL_COMPONENT_LOCALPART] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_LOCALPART][$element_count] .= $token;\r
+ $element_len += 2; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash\r
+ break;\r
+ case ISEMAIL_COMPONENT_LITERAL:\r
+ $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= $token;\r
+ $atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count] .= $token;\r
+ $element_len += 2; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash\r
+ break;\r
+ default:\r
+ die("Quoted pair logic invoked in an invalid context: $context");\r
+ }\r
+\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // Comment\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_CONTEXT_COMMENT:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.2\r
+ // comment = "(" *([FWS] ccontent) [FWS] ")"\r
+ //\r
+ // ccontent = ctext / quoted-pair / comment\r
+ switch ($token) {\r
+ // Nested comment\r
+ case ISEMAIL_STRING_OPENPARENTHESIS:\r
+ // Nested comments are OK\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_COMMENT;\r
+ break;\r
+ // End of comment\r
+ case ISEMAIL_STRING_CLOSEPARENTHESIS:\r
+ $context_prior = $context;\r
+ $context = (int) array_pop($context_stack);\r
+\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.2\r
+ // Runs of FWS, comment, or CFWS that occur between lexical tokens in a\r
+ // structured header field are semantically interpreted as a single\r
+ // space character.\r
+ //\r
+ // is_email() author's note: This *cannot* mean that we must add a\r
+ // space to the address wherever CFWS appears. This would result in\r
+ // any addr-spec that had CFWS outside a quoted string being invalid\r
+ // for RFC 5321.\r
+// if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {\r
+// $parsedata[$context] .= ISEMAIL_STRING_SP;\r
+// $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;\r
+// $element_len++;\r
+// }\r
+\r
+ break;\r
+ // Quoted pair\r
+ case ISEMAIL_STRING_BACKSLASH:\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_QUOTEDPAIR;\r
+ break;\r
+ // Folding White Space\r
+ case ISEMAIL_STRING_CR:\r
+ case ISEMAIL_STRING_SP:\r
+ case ISEMAIL_STRING_HTAB:\r
+ if (($token === ISEMAIL_STRING_CR) && ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))) {$return_status[] = ISEMAIL_ERR_CR_NO_LF; break;} // Fatal error\r
+\r
+ $return_status[] = ISEMAIL_CFWS_FWS;\r
+\r
+ $context_stack[] = $context;\r
+ $context = ISEMAIL_CONTEXT_FWS;\r
+ $token_prior = $token;\r
+ break;\r
+ // ctext\r
+ default:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.3\r
+ // ctext = %d33-39 / ; Printable US-ASCII\r
+ // %d42-91 / ; characters not including\r
+ // %d93-126 / ; "(", ")", or "\"\r
+ // obs-ctext\r
+ //\r
+ // obs-ctext = obs-NO-WS-CTL\r
+ //\r
+ // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control\r
+ // %d11 / ; characters that do not\r
+ // %d12 / ; include the carriage\r
+ // %d14-31 / ; return, line feed, and\r
+ // %d127 ; white space characters\r
+ $ord = ord($token);\r
+\r
+ if (($ord > 127) || ($ord === 0) || ($ord === 10)) {\r
+ $return_status[] = ISEMAIL_ERR_EXPECTING_CTEXT; // Fatal error\r
+ break;\r
+ } elseif (($ord < 32) || ($ord === 127)) {\r
+ $return_status[] = ISEMAIL_DEPREC_CTEXT;\r
+ }\r
+ }\r
+\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // Folding White Space\r
+ //-------------------------------------------------------------\r
+ case ISEMAIL_CONTEXT_FWS:\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.2\r
+ // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS\r
+ // ; Folding white space\r
+\r
+ // But note the erratum:\r
+ // http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908:\r
+ // In the obsolete syntax, any amount of folding white space MAY be\r
+ // inserted where the obs-FWS rule is allowed. This creates the\r
+ // possibility of having two consecutive "folds" in a line, and\r
+ // therefore the possibility that a line which makes up a folded header\r
+ // field could be composed entirely of white space.\r
+ //\r
+ // obs-FWS = 1*([CRLF] WSP)\r
+ if ($token_prior === ISEMAIL_STRING_CR) {\r
+ if ($token === ISEMAIL_STRING_CR) {\r
+ $return_status[] = ISEMAIL_ERR_FWS_CRLF_X2; // Fatal error\r
+ break;\r
+ }\r
+\r
+ if (isset($crlf_count)) {\r
+ if (++$crlf_count > 1)\r
+ $return_status[] = ISEMAIL_DEPREC_FWS; // Multiple folds = obsolete FWS\r
+ } else $crlf_count = 1;\r
+ }\r
+\r
+ switch ($token) {\r
+ case ISEMAIL_STRING_CR:\r
+ if ((++$i === $raw_length) || ($email[$i] !== ISEMAIL_STRING_LF))\r
+ $return_status[] = ISEMAIL_ERR_CR_NO_LF; // Fatal error\r
+\r
+ break;\r
+ case ISEMAIL_STRING_SP:\r
+ case ISEMAIL_STRING_HTAB:\r
+ break;\r
+ default:\r
+ if ($token_prior === ISEMAIL_STRING_CR) {\r
+ $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error\r
+ break;\r
+ }\r
+\r
+ if (isset($crlf_count)) unset($crlf_count);\r
+\r
+ $context_prior = $context;\r
+ $context = (int) array_pop($context_stack); // End of FWS\r
+\r
+ // http://tools.ietf.org/html/rfc5322#section-3.2.2\r
+ // Runs of FWS, comment, or CFWS that occur between lexical tokens in a\r
+ // structured header field are semantically interpreted as a single\r
+ // space character.\r
+ //\r
+ // is_email() author's note: This *cannot* mean that we must add a\r
+ // space to the address wherever CFWS appears. This would result in\r
+ // any addr-spec that had CFWS outside a quoted string being invalid\r
+ // for RFC 5321.\r
+// if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {\r
+// $parsedata[$context] .= ISEMAIL_STRING_SP;\r
+// $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;\r
+// $element_len++;\r
+// }\r
+\r
+ $i--; // Look at this token again in the parent context\r
+ }\r
+\r
+ $token_prior = $token;\r
+ break;\r
+ //-------------------------------------------------------------\r
+ // A context we aren't expecting\r
+ //-------------------------------------------------------------\r
+ default:\r
+ die("Unknown context: $context");\r
+ }\r
+\r
+//-echo "<td>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</td></tr>"; // debug\r
+ if ((int) max($return_status) > ISEMAIL_RFC5322) break; // No point going on if we've got a fatal error\r
+ }\r
+\r
+ // Some simple final tests\r
+ if ((int) max($return_status) < ISEMAIL_RFC5322) {\r
+ if ($context === ISEMAIL_CONTEXT_QUOTEDSTRING) $return_status[] = ISEMAIL_ERR_UNCLOSEDQUOTEDSTR; // Fatal error\r
+ elseif ($context === ISEMAIL_CONTEXT_QUOTEDPAIR) $return_status[] = ISEMAIL_ERR_BACKSLASHEND; // Fatal error\r
+ elseif ($context === ISEMAIL_CONTEXT_COMMENT) $return_status[] = ISEMAIL_ERR_UNCLOSEDCOMMENT; // Fatal error\r
+ elseif ($context === ISEMAIL_COMPONENT_LITERAL) $return_status[] = ISEMAIL_ERR_UNCLOSEDDOMLIT; // Fatal error\r
+ elseif ($token === ISEMAIL_STRING_CR) $return_status[] = ISEMAIL_ERR_FWS_CRLF_END; // Fatal error\r
+ elseif ($parsedata[ISEMAIL_COMPONENT_DOMAIN] === '') $return_status[] = ISEMAIL_ERR_NODOMAIN; // Fatal error\r
+ elseif ($element_len === 0) $return_status[] = ISEMAIL_ERR_DOT_END; // Fatal error\r
+ elseif ($hyphen_flag) $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND; // Fatal error\r
+ // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2\r
+ // The maximum total length of a domain name or number is 255 octets.\r
+ elseif (strlen($parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 255)\r
+ $return_status[] = ISEMAIL_RFC5322_DOMAIN_TOOLONG;\r
+ // http://tools.ietf.org/html/rfc5321#section-4.1.2\r
+ // Forward-path = Path\r
+ //\r
+ // Path = "<" [ A-d-l ":" ] Mailbox ">"\r
+ //\r
+ // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3\r
+ // The maximum total length of a reverse-path or forward-path is 256\r
+ // octets (including the punctuation and element separators).\r
+ //\r
+ // Thus, even without (obsolete) routing information, the Mailbox can\r
+ // only be 254 characters long. This is confirmed by this verified\r
+ // erratum to RFC 3696:\r
+ //\r
+ // http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690\r
+ // However, there is a restriction in RFC 2821 on the length of an\r
+ // address in MAIL and RCPT commands of 254 characters. Since addresses\r
+ // that do not fit in those fields are not normally useful, the upper\r
+ // limit on address lengths should normally be considered to be 254.\r
+ elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART] . ISEMAIL_STRING_AT . $parsedata[ISEMAIL_COMPONENT_DOMAIN]) > 254)\r
+ $return_status[] = ISEMAIL_RFC5322_TOOLONG;\r
+ // http://tools.ietf.org/html/rfc1035#section-2.3.4\r
+ // labels 63 octets or less\r
+ elseif ($element_len > 63) $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG;\r
+ }\r
+\r
+ // Check DNS?\r
+ $dns_checked = false;\r
+\r
+ if ($checkDNS && ((int) max($return_status) < ISEMAIL_DNSWARN) && function_exists('dns_get_record')) {\r
+ // http://tools.ietf.org/html/rfc5321#section-2.3.5\r
+ // Names that can\r
+ // be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed\r
+ // in Section 5) are permitted, as are CNAME RRs whose targets can be\r
+ // resolved, in turn, to MX or address RRs.\r
+ //\r
+ // http://tools.ietf.org/html/rfc5321#section-5.1\r
+ // The lookup first attempts to locate an MX record associated with the\r
+ // name. If a CNAME record is found, the resulting name is processed as\r
+ // if it were the initial name. ... If an empty list of MXs is returned,\r
+ // the address is treated as if it was associated with an implicit MX\r
+ // RR, with a preference of 0, pointing to that host.\r
+ //\r
+ // is_email() author's note: We will regard the existence of a CNAME to be\r
+ // sufficient evidence of the domain's existence. For performance reasons\r
+ // we will not repeat the DNS lookup for the CNAME's target, but we will\r
+ // raise a warning because we didn't immediately find an MX record.\r
+ if ($element_count === 0) $parsedata[ISEMAIL_COMPONENT_DOMAIN] .= '.'; // Checking TLD DNS seems to work only if you explicitly check from the root\r
+\r
+ $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_MX); // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id=51844)\r
+\r
+ if ((is_bool($result) && !(bool) $result))\r
+ $return_status[] = ISEMAIL_DNSWARN_NO_RECORD; // Domain can't be found in DNS\r
+ else {\r
+ if (count($result) === 0) {\r
+ $return_status[] = ISEMAIL_DNSWARN_NO_MX_RECORD; // MX-record for domain can't be found\r
+ $result = @dns_get_record($parsedata[ISEMAIL_COMPONENT_DOMAIN], DNS_A + DNS_CNAME);\r
+\r
+ if (count($result) === 0)\r
+ $return_status[] = ISEMAIL_DNSWARN_NO_RECORD; // No usable records for the domain can be found\r
+ } else $dns_checked = true;\r
+ }\r
+ }\r
+\r
+ // Check for TLD addresses\r
+ // -----------------------\r
+ // TLD addresses are specifically allowed in RFC 5321 but they are\r
+ // unusual to say the least. We will allocate a separate\r
+ // status to these addresses on the basis that they are more likely\r
+ // to be typos than genuine addresses (unless we've already\r
+ // established that the domain does have an MX record)\r
+ //\r
+ // http://tools.ietf.org/html/rfc5321#section-2.3.5\r
+ // In the case\r
+ // of a top-level domain used by itself in an email address, a single\r
+ // string is used without any dots. This makes the requirement,\r
+ // described in more detail below, that only fully-qualified domain\r
+ // names appear in SMTP transactions on the public Internet,\r
+ // particularly important where top-level domains are involved.\r
+ //\r
+ // TLD format\r
+ // ----------\r
+ // The format of TLDs has changed a number of times. The standards\r
+ // used by IANA have been largely ignored by ICANN, leading to\r
+ // confusion over the standards being followed. These are not defined\r
+ // anywhere, except as a general component of a DNS host name (a label).\r
+ // However, this could potentially lead to 123.123.123.123 being a\r
+ // valid DNS name (rather than an IP address) and thereby creating\r
+ // an ambiguity. The most authoritative statement on TLD formats that\r
+ // the author can find is in a (rejected!) erratum to RFC 1123\r
+ // submitted by John Klensin, the author of RFC 5321:\r
+ //\r
+ // http://www.rfc-editor.org/errata_search.php?rfc=1123&eid=1353\r
+ // However, a valid host name can never have the dotted-decimal\r
+ // form #.#.#.#, since this change does not permit the highest-level\r
+ // component label to start with a digit even if it is not all-numeric.\r
+ if (!$dns_checked && ((int) max($return_status) < ISEMAIL_DNSWARN)) {\r
+ if ($element_count === 0) $return_status[] = ISEMAIL_RFC5321_TLD;\r
+\r
+ if (is_numeric($atomlist[ISEMAIL_COMPONENT_DOMAIN][$element_count][0]))\r
+ $return_status[] = ISEMAIL_RFC5321_TLDNUMERIC;\r
+ }\r
+\r
+ $return_status = array_unique($return_status);\r
+ $final_status = (int) max($return_status);\r
+\r
+ if (count($return_status) !== 1) array_shift($return_status); // remove redundant ISEMAIL_VALID\r
+\r
+ $parsedata['status'] = $return_status;\r
+\r
+ if ($final_status < $threshold) $final_status = ISEMAIL_VALID;\r
+\r
+ return ($diagnose) ? $final_status : ($final_status < ISEMAIL_THRESHOLD);\r
+}\r
+?>\r