Return to Snippet

Revision: 11232
at October 5, 2010 21:13 by dominicsayers


Updated Code
<?php
/**
 * To validate an email address according to RFCs 5321, 5322 and others
 * 
 * Copyright (c) 2008-2010, Dominic Sayers							<br>
 * Test schema documentation Copyright (c) 2010, Daniel Marschall				<br>
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 * 
 *     - Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *     - Redistributions in binary form must reproduce the above copyright notice,
 *       this list of conditions and the following disclaimer in the documentation
 *       and/or other materials provided with the distribution.
 *     - Neither the name of Dominic Sayers nor the names of its contributors may be
 *       used to endorse or promote products derived from this software without
 *       specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * @package	is_email
 * @author	Dominic Sayers <[email protected]>
 * @copyright	2008-2010 Dominic Sayers
 * @license	http://www.opensource.org/licenses/bsd-license.php BSD License
 * @link	http://www.dominicsayers.com/isemail
 * @version	2.8.3 - Clarified text for ISEMAIL_IPV6BADCHAR and new test #276 added (too many IPv6 groups with an elision)
 */

// The quality of this code has been improved greatly by using PHPLint
// Copyright (c) 2010 Umberto Salsi
// This is free software; see the license for copying conditions.
// More info: http://www.icosaedro.it/phplint/
/*.
	require_module 'standard';
	require_module 'pcre';
.*/
/**
 * Check that an email address conforms to RFCs 5321, 5322 and others
 *
 * @param string	$email		The email address to check
 * @param boolean	$checkDNS	If true then a DNS check for A and MX records will be made
 * @param mixed		$errorlevel	If true then return an integer error or warning number rather than true or false
 */
/*.mixed.*/ function is_email ($email, $checkDNS = false, $errorlevel = false) {
	// Check that $email is a valid address. Read the following RFCs to understand the constraints:
	// 	(http://tools.ietf.org/html/rfc5321)
	// 	(http://tools.ietf.org/html/rfc5322)
	// 	(http://tools.ietf.org/html/rfc4291#section-2.2)
	// 	(http://tools.ietf.org/html/rfc1123#section-2.1)
	// 	(http://tools.ietf.org/html/rfc3696) (guidance only)

	//	$errorlevel	Behaviour
	//	---------------	---------------------------------------------------------------------------
	//	E_ERROR		Return validation failures only. For technically valid addresses return
	//			ISEMAIL_VALID
	//	E_WARNING	Return warnings for unlikely but technically valid addresses. This includes
	//			addresses at TLDs (e.g. johndoe@com), addresses with FWS and comments,
	//			addresses that are quoted and addresses that contain no alphabetic or
	//			numeric characters.
	//	true		Same as E_ERROR
	//	false		Return true for valid addresses, false for invalid ones. No warnings.
	//
	//	Errors can be distinguished from warnings if ($return_value > ISEMAIL_ERROR)
// version 2.0: Enhance $diagnose parameter to $errorlevel
// revision 2.5: some syntax changes to make it more PHPLint-friendly. Should be functionally identical.

	if (!defined('ISEMAIL_VALID')) {
		// No errors
		define('ISEMAIL_VALID'			, 0);
		// Warnings (valid address but unlikely in the real world)
		define('ISEMAIL_WARNING'		, 64);
		define('ISEMAIL_TLD'			, 65);
		define('ISEMAIL_TLDNUMERIC'		, 66);
		define('ISEMAIL_QUOTEDSTRING'		, 67);
		define('ISEMAIL_COMMENTS'		, 68);
		define('ISEMAIL_FWS'			, 69);
		define('ISEMAIL_ADDRESSLITERAL'		, 70);
		define('ISEMAIL_UNLIKELYINITIAL'	, 71);
		define('ISEMAIL_SINGLEGROUPELISION'	, 72);
		define('ISEMAIL_DOMAINNOTFOUND'		, 73);
		define('ISEMAIL_MXNOTFOUND'		, 74);
		// Errors (invalid address)
		define('ISEMAIL_ERROR'			, 128);
		define('ISEMAIL_TOOLONG'		, 129);
		define('ISEMAIL_NOAT'			, 130);
		define('ISEMAIL_NOLOCALPART'		, 131);
		define('ISEMAIL_NODOMAIN'		, 132);
		define('ISEMAIL_ZEROLENGTHELEMENT'	, 133);
		define('ISEMAIL_BADCOMMENT_START'	, 134);
		define('ISEMAIL_BADCOMMENT_END'		, 135);
		define('ISEMAIL_UNESCAPEDDELIM'		, 136);
		define('ISEMAIL_EMPTYELEMENT'		, 137);
		define('ISEMAIL_UNESCAPEDSPECIAL'	, 138);
		define('ISEMAIL_LOCALTOOLONG'		, 139);
//		define('ISEMAIL_IPV4BADPREFIX'		, 140);
		define('ISEMAIL_IPV6BADPREFIXMIXED'	, 141);
		define('ISEMAIL_IPV6BADPREFIX'		, 142);
		define('ISEMAIL_IPV6GROUPCOUNT'		, 143);
		define('ISEMAIL_IPV6DOUBLEDOUBLECOLON'	, 144);
		define('ISEMAIL_IPV6BADCHAR'		, 145);
		define('ISEMAIL_IPV6TOOMANYGROUPS'	, 146);
		define('ISEMAIL_DOMAINEMPTYELEMENT'	, 147);
		define('ISEMAIL_DOMAINELEMENTTOOLONG'	, 148);
		define('ISEMAIL_DOMAINBADCHAR'		, 149);
		define('ISEMAIL_DOMAINTOOLONG'		, 150);
		define('ISEMAIL_IPV6SINGLECOLONSTART'	, 151);
		define('ISEMAIL_IPV6SINGLECOLONEND'	, 152);
		// Unexpected errors
//		define('ISEMAIL_BADPARAMETER'		, 190);
//		define('ISEMAIL_NOTDEFINED'		, 191);
// revision 2.1: Redefined unexpected error constants so they don't clash with the ISEMAIL_WARNING bit
// revision 2.5: Undefined unused constants
	}

	if (is_bool($errorlevel)) {
		if ((bool) $errorlevel) {
			$diagnose	= true;
			$warn		= false;
		} else {
			$diagnose	= false;
			$warn		= false;
		}
	} else {
		switch ((int) $errorlevel) {
		case E_WARNING:
			$diagnose	= true;
			$warn		= true;
			break;
		case E_ERROR:
			$diagnose	= true;
			$warn		= false;
			break;
		default:
			$diagnose	= false;
			$warn		= false;
		}
	}

	if ($diagnose) /*.mixed.*/ $return_status = ISEMAIL_VALID; else $return_status = true;
// version 2.0: Enhance $diagnose parameter to $errorlevel

	// the upper limit on address lengths should normally be considered to be 254
	// 	(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	// 	NB My erratum has now been verified by the IETF so the correct answer is 254
	//
	// The maximum total length of a reverse-path or forward-path is 256
	// characters (including the punctuation and element separators)
	// 	(http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
	//	NB There is a mandatory 2-character wrapper round the actual address
	$emailLength = strlen($email);
// revision 1.17: Max length reduced to 254 (see above)
	if ($emailLength > 254)			if ($diagnose) return ISEMAIL_TOOLONG;		else return false;	// Too long

	// Contemporary email addresses consist of a "local part" separated from
	// a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	// 	(http://tools.ietf.org/html/rfc3696#section-3)
	$atIndex = strrpos($email,'@');

	if ($atIndex === false)			if ($diagnose) return ISEMAIL_NOAT;		else return false;	// No at-sign
	if ($atIndex === 0)			if ($diagnose) return ISEMAIL_NOLOCALPART;	else return false;	// No local part
	if ($atIndex === $emailLength - 1)	if ($diagnose) return ISEMAIL_NODOMAIN;		else return false;	// No domain part
// revision 1.14: Length test bug suggested by Andrew Campbell of Gloucester, MA

	// Sanitize comments
	// - remove nested comments, quotes and dots in comments
	// - remove parentheses and dots from quoted strings
	$braceDepth	= 0;
	$inQuote	= false;
	$escapeThisChar	= false;

	for ($i = 0; $i < $emailLength; ++$i) {
		$char = $email[$i];
		$replaceChar = false;

		if ($char === '\\') 	$escapeThisChar = !$escapeThisChar;			// Escape the next character?
		else {
			switch ($char) {
			case '(':
				if	($escapeThisChar)	$replaceChar	= true;
				else if	($inQuote)		$replaceChar	= true;
				else if	($braceDepth++ > 0)	$replaceChar	= true;		// Increment brace depth

				break;
			case ')':
				if	($escapeThisChar)	$replaceChar	= true;
				else if	($inQuote)		$replaceChar	= true;
				else {
					if (--$braceDepth > 0)	$replaceChar	= true;		// Decrement brace depth
					if ($braceDepth < 0)	$braceDepth	= 0;
				}

				break;
			case '"':
				if	($escapeThisChar)	$replaceChar	= true;
				else if ($braceDepth === 0)	$inQuote	= !$inQuote;	// Are we inside a quoted string?
				else				$replaceChar	= true;

				break;
			case '.':
				if	($escapeThisChar)	$replaceChar	= true;		// Dots don't help us either
				else if	($braceDepth > 0)	$replaceChar	= true;

				break;
			default:
			}

			$escapeThisChar = false;
//			if ($replaceChar) $email[$i] = 'x';					// Replace the offending character with something harmless
// revision 1.12: Line above replaced because PHPLint doesn't like that syntax
			if ($replaceChar) $email = (string) substr_replace($email, 'x', $i, 1);	// Replace the offending character with something harmless
		}
	}

	$localPart	= substr($email, 0, $atIndex);
	$domain		= substr($email, $atIndex + 1);
	$FWS		= "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))";	// Folding white space
	$dotArray	= /*. (array[]) .*/ array();

	// Let's check the local part for RFC compliance...
	//
	// local-part      =       dot-atom / quoted-string / obs-local-part
	// obs-local-part  =       word *("." word)
	// 	(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//
	// Problem: need to distinguish between "first.last" and "first"."last"
	// (i.e. one element or two). And I suck at regexes.
	$dotArray	= preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
	$partLength	= 0;

	foreach ($dotArray as $arrayMember) {
		$element = (string) $arrayMember;
		// Remove any leading or trailing FWS
		$new_element = preg_replace("/^$FWS|$FWS\$/", '', $element);
		if ($warn && ($element !== $new_element)) $return_status = ISEMAIL_FWS;	// FWS is unlikely in the real world
		$element = $new_element;
// version 2.3: Warning condition added
		$elementLength	= strlen($element);

		if ($elementLength === 0)								if ($diagnose) return ISEMAIL_ZEROLENGTHELEMENT;	else return false;	// Can't have empty element (consecutive dots or dots at the start or end)
// revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP

		// We need to remove any valid comments (i.e. those at the start or end of the element)
		if ($element[0] === '(') {
			if ($warn) $return_status = ISEMAIL_COMMENTS;	// Comments are unlikely in the real world
// version 2.0: Warning condition added
			$indexBrace = strpos($element, ')');
			if ($indexBrace !== false) {
				if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0)
													if ($diagnose) return ISEMAIL_BADCOMMENT_START;		else return false;	// Illegal characters in comment
				$element	= substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
				$elementLength	= strlen($element);
			}
		}

		if ($element[$elementLength - 1] === ')') {
			if ($warn) $return_status = ISEMAIL_COMMENTS;	// Comments are unlikely in the real world
// version 2.0: Warning condition added
			$indexBrace = strrpos($element, '(');
			if ($indexBrace !== false) {
				if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0)
													if ($diagnose) return ISEMAIL_BADCOMMENT_END;		else return false;	// Illegal characters in comment
				$element	= substr($element, 0, $indexBrace);
				$elementLength	= strlen($element);
			}
		}

		// Remove any remaining leading or trailing FWS around the element (having removed any comments)
		$new_element = preg_replace("/^$FWS|$FWS\$/", '', $element);
		if ($warn && ($element !== $new_element)) $return_status = ISEMAIL_FWS;	// FWS is unlikely in the real world
		$element = $new_element;
// version 2.0: Warning condition added

		// What's left counts towards the maximum length for this part
		if ($partLength > 0) $partLength++;	// for the dot
		$partLength += strlen($element);

		// Each dot-delimited component can be an atom or a quoted string
		// (because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
			// Quoted-string tests:
			if ($warn) $return_status = ISEMAIL_QUOTEDSTRING;	// Quoted string is unlikely in the real world
// version 2.0: Warning condition added
			// Remove any FWS
			$element = preg_replace("/(?<!\\\\)$FWS/", '', $element);	// A warning condition, but we've already raised ISEMAIL_QUOTEDSTRING
			// My regex skillz aren't up to distinguishing between \" \\" \\\" \\\\" etc.
			// So remove all \\ from the string first...
			$element = preg_replace('/\\\\\\\\/', ' ', $element);
			if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0)	if ($diagnose) return ISEMAIL_UNESCAPEDDELIM;		else return false;	// ", CR, LF and NUL must be escaped
// version 2.0: allow ""@example.com because it's technically valid
		} else {
			// Unquoted string tests:
			//
			// Period (".") may...appear, but may not be used to start or end the
			// local part, nor may two or more consecutive periods appear.
			// 	(http://tools.ietf.org/html/rfc3696#section-3)
			//
			// A zero-length element implies a period at the beginning or end of the
			// local part, or two periods together. Either way it's not allowed.
			if ($element === '')								if ($diagnose) return ISEMAIL_EMPTYELEMENT;		else return false;	// Dots in wrong place

			// Any ASCII graphic (printing) character other than the
			// at-sign ("@"), backslash, double quote, comma, or square brackets may
			// appear without quoting.  If any of that list of excluded characters
			// are to appear, they must be quoted
			// 	(http://tools.ietf.org/html/rfc3696#section-3)
			//
			// Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
			if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0)	if ($diagnose) return ISEMAIL_UNESCAPEDSPECIAL;		else return false;	// These characters must be in a quoted string
			if ($warn && (preg_match('/^\\w+/', $element) === 0)) $return_status = ISEMAIL_UNLIKELYINITIAL;	// First character is an odd one
		}
	}

	if ($partLength > 64)										if ($diagnose) return ISEMAIL_LOCALTOOLONG;		else return false;	// Local part must be 64 characters or less

	// Now let's check the domain part...

	// The domain name can also be replaced by an IP address in square brackets
	// 	(http://tools.ietf.org/html/rfc3696#section-3)
	// 	(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	// 	(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		// It's an address-literal
		if ($warn) $return_status = ISEMAIL_ADDRESSLITERAL;	// Quoted string is unlikely in the real world
// version 2.0: Warning condition added
		$addressLiteral = substr($domain, 1, strlen($domain) - 2);
		$groupMax	= 8;
// revision 2.1: new IPv6 testing strategy
		$matchesIP	= array();
		$colon		= ':';	// Revision 2.7: Daniel Marschall's new IPv6 testing strategy
		$double_colon	= '::';

		// Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);

			if ($index === 0) {
				// Nothing there except a valid IPv4 address, so...
				if ($diagnose) return $return_status; else return true;
// version 2.0: return warning if one is set
			} else {
//-				// Assume it's an attempt at a mixed address (IPv6 + IPv4)
//-				if ($addressLiteral[$index - 1] !== $colon)				if ($diagnose) return ISEMAIL_IPV4BADPREFIX;		else return false;	// Character preceding IPv4 address must be ':'
// revision 2.1: new IPv6 testing strategy
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')				if ($diagnose) return ISEMAIL_IPV6BADPREFIXMIXED;	else return false;	// RFC5321 section 4.1.3
//-
//-				$IPv6		= substr($addressLiteral, 5, ($index === 7) ? 2 : $index - 6);
//-				$groupMax	= 6;
// revision 2.1: new IPv6 testing strategy
				$IPv6		= substr($addressLiteral, 5, $index - 5) . '0000:0000'; // Convert IPv4 part to IPv6 format
			}
		} else {
			// It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')					if ($diagnose) return ISEMAIL_IPV6BADPREFIX;		else return false;	// RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
//-			$groupMax = 8;
// revision 2.1: new IPv6 testing strategy
		}

		$matchesIP	= explode($colon, $IPv6);	// Revision 2.7: Daniel Marschall's new IPv6 testing strategy
		$groupCount	= count($matchesIP);
		$index		= strpos($IPv6,$double_colon);

		if ($index === false) {
			// We need exactly the right number of groups
			if ($groupCount !== $groupMax)							if ($diagnose) return ISEMAIL_IPV6GROUPCOUNT;		else return false;	// RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,$double_colon))					if ($diagnose) return ISEMAIL_IPV6DOUBLEDOUBLECOLON;	else return false;	// More than one '::'
			if ($index === 0 || $index === (strlen($IPv6) - 2)) $groupMax++;	// RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
			if ($groupCount > $groupMax)							if ($diagnose) return ISEMAIL_IPV6TOOMANYGROUPS;	else return false;	// Too many IPv6 groups in address
			if ($groupCount === $groupMax) $return_status = ISEMAIL_SINGLEGROUPELISION;	// Eliding a single group with :: is deprecated by RFCs 5321 & 5952
		}

		// Check for single : at start and end of address
		// Revision 2.7: Daniel Marschall's new IPv6 testing strategy
		if ((substr($IPv6, 0,  1)	=== $colon) && (substr($IPv6, 1,  1) !== $colon))	if ($diagnose) return ISEMAIL_IPV6SINGLECOLONSTART;	else return false;	// Address starts with a single colon
		if ((substr($IPv6, -1)		=== $colon) && (substr($IPv6, -2, 1) !== $colon))	if ($diagnose) return ISEMAIL_IPV6SINGLECOLONEND;	else return false;	// Address ends with a single colon

		// Check for unmatched characters
		if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0)	if ($diagnose) return ISEMAIL_IPV6BADCHAR	;	else return false;	// Illegal characters in address
		// It's a valid IPv6 address, so...
		if ($diagnose) return $return_status; else return true;
// revision 2.1: bug fix: now correctly return warning status
	} else {
		// It's a domain name...

		// The syntax of a legal Internet host name was specified in RFC-952
		// One aspect of host name syntax is hereby changed: the
		// restriction on the first character is relaxed to allow either a
		// letter or a digit.
		// 	(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		// NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		// Most common applications, including email and the Web, will generally not
		// permit...escaped strings
		// 	(http://tools.ietf.org/html/rfc3696#section-2)
		//
		// the better strategy has now become to make the "at least one period" test,
		// to verify LDH conformance (including verification that the apparent TLD name
		// is not all-numeric)
		// 	(http://tools.ietf.org/html/rfc3696#section-2)
		//
		// Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		// labels for SMTP clients or servers
		// 	(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		// RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		// 	(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$dotArray	= preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
		$partLength	= 0;
		$element	= ''; // Since we use $element after the foreach loop let's make sure it has a value
// revision 1.13: Line above added because PHPLint now checks for Definitely Assigned Variables

		if ($warn && (count($dotArray) === 1))	$return_status = ISEMAIL_TLD;	// The mail host probably isn't a TLD
// version 2.0: downgraded to a warning

		foreach ($dotArray as $arrayMember) {
			$element = (string) $arrayMember;
			// Remove any leading or trailing FWS
			$new_element	= preg_replace("/^$FWS|$FWS\$/", '', $element);
			if ($warn && ($element !== $new_element)) $return_status = ISEMAIL_FWS;	// FWS is unlikely in the real world
			$element = $new_element;
// version 2.0: Warning condition added
			$elementLength	= strlen($element);

			// Each dot-delimited component must be of type atext
			// A zero-length element implies a period at the beginning or end of the
			// local part, or two periods together. Either way it's not allowed.
			if ($elementLength === 0)							if ($diagnose) return ISEMAIL_DOMAINEMPTYELEMENT;	else return false;	// Dots in wrong place
// revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP

			// Then we need to remove all valid comments (i.e. those at the start or end of the element
			if ($element[0] === '(') {
				if ($warn) $return_status = ISEMAIL_COMMENTS;	// Comments are unlikely in the real world
// version 2.0: Warning condition added
				$indexBrace = strpos($element, ')');
				if ($indexBrace !== false) {
					if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0)
													if ($diagnose) return ISEMAIL_BADCOMMENT_START;		else return false;	// Illegal characters in comment
// revision 1.17: Fixed name of constant (also spotted by turboflash - thanks!)
					$element	= substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
					$elementLength	= strlen($element);
				}
			}

			if ($element[$elementLength - 1] === ')') {
				if ($warn) $return_status = ISEMAIL_COMMENTS;	// Comments are unlikely in the real world
// version 2.0: Warning condition added
				$indexBrace = strrpos($element, '(');
				if ($indexBrace !== false) {
					if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0)
													if ($diagnose) return ISEMAIL_BADCOMMENT_END;		else return false;	// Illegal characters in comment
// revision 1.17: Fixed name of constant (also spotted by turboflash - thanks!)
					$element	= substr($element, 0, $indexBrace);
					$elementLength	= strlen($element);
				}
			}

			// Remove any leading or trailing FWS around the element (inside any comments)
			$new_element	= preg_replace("/^$FWS|$FWS\$/", '', $element);
			if ($warn && ($element !== $new_element)) $return_status = ISEMAIL_FWS;	// FWS is unlikely in the real world
			$element = $new_element;
// version 2.0: Warning condition added

			// What's left counts towards the maximum length for this part
			if ($partLength > 0) $partLength++;	// for the dot
			$partLength += strlen($element);

			// The DNS defines domain name syntax very generally -- a
			// string of labels each containing up to 63 8-bit octets,
			// separated by dots, and with a maximum total of 255
			// octets.
			// 	(http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
			if ($elementLength > 63)							if ($diagnose) return ISEMAIL_DOMAINELEMENTTOOLONG;	else return false;	// Label must be 63 characters or less

			// Any ASCII graphic (printing) character other than the
			// at-sign ("@"), backslash, double quote, comma, or square brackets may
			// appear without quoting.  If any of that list of excluded characters
			// are to appear, they must be quoted
			// 	(http://tools.ietf.org/html/rfc3696#section-3)
			//
			// If the hyphen is used, it is not permitted to appear at
			// either the beginning or end of a label.
			// 	(http://tools.ietf.org/html/rfc3696#section-2)
			//
			// Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
			if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) if ($diagnose) return ISEMAIL_DOMAINBADCHAR;		else return false;	// Illegal character in domain name
		}

		if ($partLength > 255) 									if ($diagnose) return ISEMAIL_DOMAINTOOLONG;		else return false;	// Domain part must be 255 characters or less (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)

		if ($warn && (preg_match('/^[0-9]+$/', $element) > 0))	$return_status = ISEMAIL_TLDNUMERIC;	// TLD probably isn't all-numeric (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2)
// version 2.0: Downgraded to a warning

		// Check DNS?
		if ($diagnose && ($return_status === ISEMAIL_VALID) && $checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A')))	$return_status = ISEMAIL_DOMAINNOTFOUND;	// 'A' record for domain can't be found
			if (!(checkdnsrr($domain, 'MX')))	$return_status = ISEMAIL_MXNOTFOUND;		// 'MX' record for domain can't be found
		}
	}

	// Eliminate all other factors, and the one which remains must be the truth.
	// 	(Sherlock Holmes, The Sign of Four)
	if ($diagnose) return $return_status; else return true;
// version 2.0: return warning if one is set
}

$email = '[email protected]';

echo "Testing $email<br/>";
echo "$email is " . ((is_email($email)) ? '' : 'not ') . 'a valid email address';
?>

Revision: 11231
at March 12, 2009 09:58 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Version 1.7

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/

//	PHPLint modules
/*.
	require_module 'standard';
	require_module 'pcre';
.*/
/*.boolean.*/ function is_email (/*.string.*/ $email, $checkDNS = false) {
	//	Check that $email is a valid address. Read the following RFCs to understand the constraints:
	//		(http://tools.ietf.org/html/rfc5322)
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5321)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	//		NB I think John Klensin is misreading RFC 5321 and the the limit should actually be 254
	//		However, I will stick to the published number until it is changed.
	//
	//	The maximum total length of a reverse-path or forward-path is 256
	//	characters (including the punctuation and element separators)
	//		(http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
	$emailLength = strlen($email);
	if ($emailLength > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$atIndex		= strrpos($email,'@');

	if ($atIndex === false)			return false;	//	No at-sign
	if ($atIndex === 0)				return false;	//	No local part
	if ($atIndex === $emailLength)	return false;	//	No domain part
	
	//	Sanitize comments
	//	- remove nested comments, quotes and dots in comments
	//	- remove parentheses and dots from quoted strings
	$braceDepth		= 0;
	$inQuote		= false;
	$escapeThisChar	= false;

	for ($i = 0; $i < $emailLength; ++$i) {
		$char = $email[$i];
		$replaceChar = false;

		if ($char === '\\') {
			$escapeThisChar = !$escapeThisChar;	//	Escape the next character?
		} else {
			switch ($char) {
			case '(':
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($inQuote) {
						$replaceChar = true;
					} else {
						if ($braceDepth++ > 0) $replaceChar = true;	//	Increment brace depth
					}
				}

				break;
			case ')':
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($inQuote) {
						$replaceChar = true;
					} else {
						if (--$braceDepth > 0) $replaceChar = true;	//	Decrement brace depth
						if ($braceDepth < 0) $braceDepth = 0;
					}
				}

				break;
			case '"':
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($braceDepth === 0) {
						$inQuote = !$inQuote;	//	Are we inside a quoted string?
					} else {
						$replaceChar = true;
					}
				}

				break;
			case '.':	//	Dots don't help us either
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($braceDepth > 0) $replaceChar = true;
				}

				break;
			}

			$escapeThisChar = false;
			if ($replaceChar) $email[$i] = 'x';	//	Replace the offending character with something harmless
		}
	}

	$localPart		= substr($email, 0, $atIndex);
	$domain			= substr($email, $atIndex + 1);
	$FWS			= "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))";	//	Folding white space
	//	Let's check the local part for RFC compliance...
	//
	//	local-part      =       dot-atom / quoted-string / obs-local-part
	//	obs-local-part  =       word *("." word)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//
	//	Problem: need to distinguish between "first.last" and "first"."last"
	//	(i.e. one element or two). And I suck at regexes.
	$dotArray	= /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
	$partLength = 0;

	foreach ($dotArray as $element) {
		//	Remove any leading or trailing FWS
		$element = preg_replace("/^$FWS|$FWS\$/", '', $element);

		//	Then we need to remove all valid comments (i.e. those at the start or end of the element
		$elementLength = strlen($element);

		if ($element[0] === '(') {
			$indexBrace = strpos($element, ')');
			if ($indexBrace !== false) {
				if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
																						return false;	//	Illegal characters in comment
				}
				$element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
				$elementLength = strlen($element);
			}
		}
		
		if ($element[$elementLength - 1] === ')') {
			$indexBrace = strrpos($element, '(');
			if ($indexBrace !== false) {
				if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
																						return false;	//	Illegal characters in comment
				}
				$element = substr($element, 0, $indexBrace);
				$elementLength = strlen($element);
			}
		}			

		//	Remove any leading or trailing FWS around the element (inside any comments)
		$element = preg_replace("/^$FWS|$FWS\$/", '', $element);

		//	What's left counts towards the maximum length for this part
		if ($partLength > 0) $partLength++;	//	for the dot
		$partLength += strlen($element);

		//	Each dot-delimited component can be an atom or a quoted string
		//	(because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
			//	Quoted-string tests:
			//
			//	Remove any FWS
			$element = preg_replace("/(?<!\\\\)$FWS/", '', $element);
			//	My regex skillz aren't up to distinguishing between \" \\" \\\" \\\\" etc.
			//	So remove all \\ from the string first...
			$element = preg_replace('/\\\\\\\\/', ' ', $element);
			if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0)	return false;	//	", CR, LF and NUL must be escaped, "" is too short
		} else {
			//	Unquoted string tests:
			//
			//	Period (".") may...appear, but may not be used to start or end the
			//	local part, nor may two or more consecutive periods appear.
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($element === '')														return false;	//	Dots in wrong place

			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
			if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0)	return false;	//	These characters must be in a quoted string
		}
	}

	if ($partLength > 64) return false;	// Local part must be 64 characters or less

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, strlen($domain) - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')		return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')	return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)						return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))					return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)						return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')							return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not
		//	permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	the better strategy has now become to make the "at least one period" test,
		//	to verify LDH conformance (including verification that the apparent TLD name
		//	is not all-numeric)
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$dotArray	= /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
		$partLength = 0;

		if (count($dotArray) === 1)					return false;	//	Mail host can't be a TLD

		foreach ($dotArray as $element) {
			//	Remove any leading or trailing FWS
			$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
	
			//	Then we need to remove all valid comments (i.e. those at the start or end of the element
			$elementLength = strlen($element);
	
			if ($element[0] === '(') {
				$indexBrace = strpos($element, ')');
				if ($indexBrace !== false) {
					if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
													return false;	//	Illegal characters in comment
					}
					$element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
					$elementLength = strlen($element);
				}
			}
			
			if ($element[$elementLength - 1] === ')') {
				$indexBrace = strrpos($element, '(');
				if ($indexBrace !== false) {
					if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
													return false;	//	Illegal characters in comment
					}
					$element = substr($element, 0, $indexBrace);
					$elementLength = strlen($element);
				}
			}			
	
			//	Remove any leading or trailing FWS around the element (inside any comments)
			$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
	
			//	What's left counts towards the maximum length for this part
			if ($partLength > 0) $partLength++;	//	for the dot
			$partLength += strlen($element);
	
			//	The DNS defines domain name syntax very generally -- a
			//	string of labels each containing up to 63 8-bit octets,
			//	separated by dots, and with a maximum total of 255
			//	octets.
			//		(http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
			if ($elementLength > 63)				return false;	//	Label must be 63 characters or less
	
			//	Each dot-delimited component must be atext
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($elementLength === 0)				return false;	//	Dots in wrong place
	
			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	If the hyphen is used, it is not permitted to appear at
			//	either the beginning or end of a label.
			//		(http://tools.ietf.org/html/rfc3696#section-2)
			//
			//	Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
			if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) {
													return false;
			}
		}

		if ($partLength > 255) 						return false;	// Local part must be 64 characters or less

		if (preg_match('/^[0-9]+$/', $element) > 0)	return false;	//	TLD can't be all-numeric

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
													return false;	//	Domain doesn't actually exist
			}
		}
	}

	//	Eliminate all other factors, and the one which remains must be the truth.
	//		(Sherlock Holmes, The Sign of Four)
	return true;
}
?>

Revision: 11230
at February 26, 2009 12:03 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Version 1.6

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/

//	PHPLint modules
/*.
	require_module 'standard';
	require_module 'pcre';
.*/
/*.boolean.*/ function is_email (/*.string.*/ $email, $checkDNS = false) {
	//	Check that $email is a valid address. Read the following RFCs to understand the constraints:
	//		(http://tools.ietf.org/html/rfc5322)
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5321)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	//		NB I think John Klensin is misreading RFC 5321 and the the limit should actually be 254
	//		However, I will stick to the published number until it is changed.
	//
	//	The maximum total length of a reverse-path or forward-path is 256
	//	characters (including the punctuation and element separators)
	//		(http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
	$emailLength = strlen($email);
	if ($emailLength > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$atIndex		= strrpos($email,'@');

	if ($atIndex === false)			return false;	//	No at-sign
	if ($atIndex === 0)				return false;	//	No local part
	if ($atIndex === $emailLength)	return false;	//	No domain part
	
	//	Sanitize comments
	//	- remove nested comments, quotes and dots in comments
	//	- remove parentheses and dots from quoted strings
	$braceDepth		= 0;
	$inQuote		= false;
	$escapeThisChar	= false;

	for ($i = 0; $i < $emailLength; ++$i) {
		$char = $email[$i];
		$replaceChar = false;

		if ($char === '\\') {
			$escapeThisChar = !$escapeThisChar;	//	Escape the next character?
		} else {
			switch ($char) {
			case '(':
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($inQuote) {
						$replaceChar = true;
					} else {
						if ($braceDepth++ > 0) $replaceChar = true;	//	Increment brace depth
					}
				}

				break;
			case ')':
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($inQuote) {
						$replaceChar = true;
					} else {
						if (--$braceDepth > 0) $replaceChar = true;	//	Decrement brace depth
						if ($braceDepth < 0) $braceDepth = 0;
					}
				}

				break;
			case '"':
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($braceDepth === 0) {
						$inQuote = !$inQuote;	//	Are we inside a quoted string?
					} else {
						$replaceChar = true;
					}
				}

				break;
			case '.':	//	Dots don't help us either
				if ($escapeThisChar) {
					$replaceChar = true;
				} else {
					if ($braceDepth > 0) $replaceChar = true;
				}

				break;
			}

			$escapeThisChar = false;
			if ($replaceChar) $email[$i] = 'x';	//	Replace the offending character with something harmless
		}
	}

	$localPart		= substr($email, 0, $atIndex);
	$domain			= substr($email, $atIndex + 1);
	$FWS			= "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))";	//	Folding white space
	//	Let's check the local part for RFC compliance...
	//
	//	local-part      =       dot-atom / quoted-string / obs-local-part
	//	obs-local-part  =       word *("." word)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//
	//	Problem: need to distinguish between "first.last" and "first"."last"
	//	(i.e. one element or two). And I suck at regexes.
	$dotArray	= /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
	$partLength = 0;

	foreach ($dotArray as $element) {
		//	Remove any leading or trailing FWS
		$element = preg_replace("/^$FWS|$FWS\$/", '', $element);

		//	Then we need to remove all valid comments (i.e. those at the start or end of the element
		$elementLength = strlen($element);

		if ($element[0] === '(') {
			$indexBrace = strpos($element, ')');
			if ($indexBrace !== false) {
				if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
																						return false;	//	Illegal characters in comment
				}
				$element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
				$elementLength = strlen($element);
			}
		}
		
		if ($element[$elementLength - 1] === ')') {
			$indexBrace = strrpos($element, '(');
			if ($indexBrace !== false) {
				if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
																						return false;	//	Illegal characters in comment
				}
				$element = substr($element, 0, $indexBrace);
				$elementLength = strlen($element);
			}
		}			

		//	Remove any leading or trailing FWS around the element (inside any comments)
		$element = preg_replace("/^$FWS|$FWS\$/", '', $element);

		//	What's left counts towards the maximum length for this part
		if ($partLength > 0) $partLength++;	//	for the dot
		$partLength += strlen($element);

		//	Each dot-delimited component can be an atom or a quoted string
		//	(because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
			//	Quoted-string tests:
			//
			//	Remove any FWS
			$element = preg_replace("/(?<!\\\\)$FWS/", '', $element);
			//	My regex skillz aren't up to distinguishing between \" \\" \\\" \\\\" etc.
			//	So remove all \\ from the string first...
			$element = preg_replace('/\\\\\\\\/', ' ', $element);
			if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0)	return false;	//	", CR, LF and NUL must be escaped, "" is too short
		} else {
			//	Unquoted string tests:
			//
			//	Period (".") may...appear, but may not be used to start or end the
			//	local part, nor may two or more consecutive periods appear.
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($element === '')														return false;	//	Dots in wrong place

			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
			if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0)	return false;	//	These characters must be in a quoted string
		}
	}

	if ($partLength > 64) return false;	// Local part must be 64 characters or less

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, strlen($domain) - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')		return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')	return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)						return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))					return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)						return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')							return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not
		//	permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	the better strategy has now become to make the "at least one period" test,
		//	to verify LDH conformance (including verification that the apparent TLD name
		//	is not all-numeric)
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$dotArray	= /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
		$partLength = 0;

		if (count($dotArray) === 1)					return false;	//	Mail host can't be a TLD

		foreach ($dotArray as $element) {
			//	Remove any leading or trailing FWS
			$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
	
			//	Then we need to remove all valid comments (i.e. those at the start or end of the element
			$elementLength = strlen($element);
	
			if ($element[0] === '(') {
				$indexBrace = strpos($element, ')');
				if ($indexBrace !== false) {
					if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
													return false;	//	Illegal characters in comment
					}
					$element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
					$elementLength = strlen($element);
				}
			}
			
			if ($element[$elementLength - 1] === ')') {
				$indexBrace = strrpos($element, '(');
				if ($indexBrace !== false) {
					if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
													return false;	//	Illegal characters in comment
					}
					$element = substr($element, 0, $indexBrace);
					$elementLength = strlen($element);
				}
			}			
	
			//	Remove any leading or trailing FWS around the element (inside any comments)
			$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
	
			//	What's left counts towards the maximum length for this part
			if ($partLength > 0) $partLength++;	//	for the dot
			$partLength += strlen($element);
	
			//	The DNS defines domain name syntax very generally -- a
			//	string of labels each containing up to 63 8-bit octets,
			//	separated by dots, and with a maximum total of 255
			//	octets.
			//		(http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
			if ($elementLength > 63)				return false;	//	Label must be 63 characters or less
	
			//	Each dot-delimited component must be atext
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($elementLength === 0)				return false;	//	Dots in wrong place
	
			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	If the hyphen is used, it is not permitted to appear at
			//	either the beginning or end of a label.
			//		(http://tools.ietf.org/html/rfc3696#section-2)
			//
			//	Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
			if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) {
													return false;
			}
		}

		if ($partLength > 255) 						return false;	// Local part must be 64 characters or less

		if (preg_match('/^[0-9]+$/', $element) > 0)	return false;	//	TLD can't be all-numeric
	}

	//	Check DNS?
	if ($checkDNS && function_exists('checkdnsrr')) {
		if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
													return false;	//	Domain doesn't actually exist
		}
	}

	//	Eliminate all other factors, and the one which remains must be the truth.
	//		(Sherlock Holmes, The Sign of Four)
	return true;
}
?>

Revision: 11229
at February 25, 2009 06:59 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Contains some code from RFC3696 Email Parser by Cal Henderson <[email protected]>

Version 1.4

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/

//	PHPLint modules
/*.
	require_module 'standard';
	require_module 'pcre';
.*/
/*.boolean.*/ function is_email (/*.string.*/ $email, $checkDNS = false) {
	//	Check that $email is a valid address. Read the following RFCs to understand the constraints:
	//		(http://tools.ietf.org/html/rfc5322)
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5321)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	//		NB I think John Klensin is misreading RFC 5321 and the the limit should actually be 254
	//		However, I will stick to the published number until it is changed.
	//
	//	The maximum total length of a reverse-path or forward-path is 256
	//	characters (including the punctuation and element separators)
	//		(http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
	if (strlen($email) > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long
	
	//	Let's check the local part for RFC compliance...

	//	First we need to remove all valid comments
	//	Comment patterns lifted from Cal Henderson (http://iamcal.com) who doesn't suck at regexes
	$cr				= "\\x0d";
	$lf				= "\\x0a";
	$obs_char		= "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]";
	$text			= "(?:$lf*$cr*$obs_char$lf*$cr*)";
	$obs_qp			= "(?:\\x5c[\\x00-\\x7f])";
	$quoted_pair	= "(?:\\x5c$text|$obs_qp)";
	$crlf			= "(?:$cr$lf)";
	$no_ws_ctl		= "[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]";
	$wsp			= "[\\x20\\x09]";
	$obs_fws		= "(?:$wsp+(?:$crlf$wsp+)*)";
	$fws			= "(?:(?:(?:$wsp*$crlf)?$wsp+)|$obs_fws)";
	$ctext			= "(?:$no_ws_ctl|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])";
	$ccontent		= "(?:$ctext|$quoted_pair)";
	$comment		= "(?:\\x28(?:$fws?$ccontent)*$fws?\\x29)";

	//	Comments can be nested
	do {
		$interim = preg_replace("/$comment/", '', $localPart);
		if ($interim === $localPart) break;
	} while ($localPart = $interim);

	//
	//	local-part      =       dot-atom / quoted-string / obs-local-part
	//	obs-local-part  =       word *("." word)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//
	//	Problem: need to distinguish between "first.last" and "first"."last"
	//	(i.e. one element or two). And I suck at regexes.
	$dotArray	= /*. (array[int]string) .*/ preg_split('/(\\.|\\r\\n|\\n|\\r)(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
	foreach ($dotArray as $localElement) {
		//	Each dot-delimited component can be an atom or a quoted string
		//	(because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/s', $localElement) > 0) {
			//	Quoted-string tests:
			//
			//	My regex skillz aren't up to distunguishing between \" \\" \\\" \\\\" etc.
			//	So remove all \\ from the string first...
			//	Also remove valid folding white space unless it's escaped
			$localElement = preg_replace('/\\\\\\\\|(?<!\\\\|^)\\x0D\\x0A[ \\x09]/', ' ', $localElement);
			if (preg_match('/(?<!\\\\|^)["\\x0D\\x0A\\x00](?!$)|\\\\"$|""/', $localElement) > 0)	return false;	//	", CR, LF and NUL must be escaped, "" is too short
		} else {
			//	Unquoted string tests:
			//
			//	Period (".") may...appear, but may not be used to start or end the
			//	local part, nor may two or more consecutive periods appear.
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($localElement === '')																return false;	//	Dots in wrong place

			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	Any excluded characters? i.e. 0x00-0x20, @, [, ], \, ", <comma>, (, ), <, >, :, ;
			if (preg_match('/[\\x00-\\x20@\\[\\]\\\\",\\(\\)<>:;]/', $localElement) > 0)			return false;	//	These characters must be in a quoted string
		}
	}

	//	Now let's check the domain part...

	//	First we need to remove all valid comments
	//	Comments can be nested
	do {
		$interim = preg_replace("/$comment/", '', $domain);
		if ($interim === $domain) break;
	} while ($domain = $interim);

	$domainLength	= strlen($domain);

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')		return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')	return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)						return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))					return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)						return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')							return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$matches	= array();
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)										return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if (substr($TLD, strlen($TLD) - 1, 1) === '.')			return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)					return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		array_multisort($matches[1], SORT_DESC);
		if ($matches[1][0] !== '')								return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}
?>

Revision: 11228
at February 24, 2009 06:58 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Version 1.3

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address. Read the following RFCs to understand the constraints:
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc2822)
	//		(http://tools.ietf.org/html/rfc2821#section-4.5.3)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	//		NB I think John Klensin is misreading RFC 2821 and the the limit should actually be 254
	//		However, I will stick to the published number until it is changed.
	//
	//	The maximum total length of a reverse-path or forward-path is 256
	//	characters (including the punctuation and element separators)
	//		(http://tools.ietf.org/html/rfc2821#section-4.5.3)
	if (strlen($email) > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	local-part      =       dot-atom / quoted-string / obs-local-part
	//	obs-local-part  =       word *("." word)
	//		(http://tools.ietf.org/html/rfc2822#section-3.4.1)
	//
	//	Problem: need to distinguish between "first.last" and "first"."last"
	//	(i.e. one element or two). And I suck at regexes.
	$dotArray	= preg_split('/(\\.|\\r\\n|\\n|\\r)(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
	foreach ($dotArray as $localElement) {
		//	Each dot-delimited component can be an atom or a quoted string
		//	(because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/s', $localElement) > 0) {
			//	Quoted-string tests:
			//
			//	My regex skillz aren't up to distunguishing between \" \\" \\\" \\\\" etc.
			//	So remove all \\ from the string first...
			//	Also remove valid folding white space
			$localElement = preg_replace('/\\\\\\\\|\\x0D\\x0A[ \\x09]/', ' ', $localElement);
			if (preg_match('/(?<!\\\\|^)["\\x0D\\x0A\\x00](?!$)|\\\\"$|""/', $localElement) > 0)	return false;	//	", CR, LF and NUL must be escaped, "" is too short
		} else {
			//	Unquoted string tests:
			//
			//	Period (".") may...appear, but may not be used to start or end the
			//	local part, nor may two or more consecutive periods appear.
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($localElement === '')											return false;	//	Dots in wrong place

			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	Any excluded characters? i.e. 0x00-0x20, @, [, ], \, ", <comma>, (, ), <, >, :, ;
			if (preg_match('/[\\x00-\\x20@\\[\\]\\\\",\\(\\)<>:;]/', $localElement) > 0)	return false;	//	These characters must be in a quoted string
		}
	}

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')		return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')	return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)						return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))					return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)						return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')							return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$matches	= array();
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)										return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if (substr($TLD, strlen($TLD) - 1, 1) === '.')			return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)					return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		array_multisort($matches[1], SORT_DESC);
		if ($matches[1][0] !== '')								return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}
?>

Revision: 11227
at February 22, 2009 11:04 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Version 1.2

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address. Read the following RFCs to understand the constraints:
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc2822)
	//		(http://tools.ietf.org/html/rfc2821#section-4.5.3)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	//		NB I think John Klensin is misreading RFC 2821 and the the limit should actually be 254
	//		However, I will stick to the published number until it is changed.
	//
	//	The maximum total length of a reverse-path or forward-path is 256
	//	characters (including the punctuation and element separators)
	//		(http://tools.ietf.org/html/rfc2821#section-4.5.3)
	if (strlen($email) > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	local-part      =       dot-atom / quoted-string / obs-local-part
	//	obs-local-part  =       word *("." word)
	//		(http://tools.ietf.org/html/rfc2822#section-3.4.1)
	//
	//	Problem: need to distinguish between "first.last" and "first"."last"
	//	(i.e. one element or two). And I suck at regexes.
	$dotArray	= preg_split('/(\\.|\\r\\n|\\n|\\r)(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
	foreach ($dotArray as $localElement) {
		//	Each dot-delimited component can be an atom or a quoted string
		//	(because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/', $localElement) > 0) {
			//	Quoted-string tests:
			//
			//	My regex skillz aren't up to distunguishing between \" \\" \\\" \\\\" etc.
			//	So remove all \\ from the string first...
			$localElement = str_replace('\\\\', '', $localElement);
			if (preg_match('/(?<!\\\\|^)"(?!$)|\\\\"$|""/', $localElement) > 0)	return false;	//	" must be escaped, \ must have a partner, "" is too short
		} else {
			//	Unquoted string tests:
			//
			//	Period (".") may...appear, but may not be used to start or end the
			//	local part, nor may two or more consecutive periods appear.
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	A zero-length element implies a period at the beginning or end of the
			//	local part, or two periods together. Either way it's not allowed.
			if ($localElement === '')											return false;	//	Dots in wrong place

			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	Any excluded characters? i.e. 0x00-0x20, @, [, ], \, ", <comma>
			if (preg_match('/[\\x00-\\x20@\\[\\]\\\\",]/', $localElement) > 0)	return false;	//	These characters must be in a quoted string
		}
	}

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')		return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')	return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)						return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))					return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)						return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP
[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')							return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$matches	= array();
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)										return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if (substr($TLD, strlen($TLD) - 1, 1) === '.')			return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)					return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		array_multisort($matches[1], SORT_DESC);
		if ($matches[1][0] !== '')								return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}
?>

Revision: 11226
at February 20, 2009 05:57 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Version 1.0

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc2822)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	if (strlen($email) > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	local-part      =       dot-atom / quoted-string / obs-local-part
	//	obs-local-part  =       word *("." word)
	//		(http://tools.ietf.org/html/rfc2822#section-3.4.1)
	if (preg_match('/^"(?:.)*"$/', $localPart) > 0) {
		$dotArray[]	= $localPart;
	} else {
		$dotArray	= explode('.', $localPart);
	}
	
	foreach ($dotArray as $localElement) {
		//	Period (".") may...appear, but may not be used to start or end the
		//	local part, nor may two or more consecutive periods appear.
		//		(http://tools.ietf.org/html/rfc3696#section-3)
		//
		//	A zero-length element implies a period at the beginning or end of the
		//	local part, or two periods together. Either way it's not allowed.
		if ($localElement === '')										return false;	//	Dots in wrong place

		//	Each dot-delimited component can be an atom or a quoted string
		//	(because of the obs-local-part provision)
		if (preg_match('/^"(?:.)*"$/', $localElement) > 0) {
			//	Quoted-string tests:
			//
			//	Note that since quoted-pair
			//	is allowed in a quoted-string, the quote and backslash characters may
			//	appear in a quoted-string so long as they appear as a quoted-pair.
			//		(http://tools.ietf.org/html/rfc2822#section-3.2.5)
			$groupCount	= preg_match_all('/(?:^"|"$|\\\\\\\\|\\\\")|(\\\\|")/', $localElement, $matches);
			array_multisort($matches[1], SORT_DESC);
			if ($matches[1][0] !== '')									return false;	//	Unescaped quote or backslash character inside quoted string
			if (preg_match('/^"\\\\*"$/', $localElement) > 0)			return false;	//	"" and "\" are slipping through - note: must tidy this up
		} else {
			//	Unquoted string tests:
			//
			//	Any ASCII graphic (printing) character other than the
			//	at-sign ("@"), backslash, double quote, comma, or square brackets may
			//	appear without quoting.  If any of that list of excluded characters
			//	are to appear, they must be quoted
			//		(http://tools.ietf.org/html/rfc3696#section-3)
			//
			//	Any excluded characters? i.e. <space>, @, [, ], \, ", <comma>
			if (preg_match('/[ @\\[\\]\\\\",]/', $localElement) > 0)
				//	Check all excluded characters are escaped
				$stripped = preg_replace('/\\\\[ @\\[\\]\\\\",]/', '', $localElement);
				if (preg_match('/[ @\\[\\]\\\\",]/', $stripped) > 0)	return false;	//	Unquoted excluded characters
		}
	}

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')			return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')			return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)							return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))						return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)							return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP
[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')								return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$matches	= array();
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)											return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if (substr($TLD, strlen($TLD) - 1, 1) === '.')				return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)						return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		array_multisort($matches[1], SORT_DESC);
		if ($matches[1][0] !== '')									return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																	return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}
?>

Revision: 11225
at February 11, 2009 07:31 by dominicsayers


Updated Code
<?php
/*
Copyright 2009 Dominic Sayers
	[email protected]
	http://www.dominicsayers.com

Version 0.6

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)

	//	the upper limit on address lengths should normally be considered to be 256
	//		(http://www.rfc-editor.org/errata_search.php?rfc=3696)
	if (strlen($email) > 256)	return false;	//	Too long

	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	Period (".") may...appear, but may not be used to start or end the
	//	local part, nor may two or more consecutive periods appear.
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^\\.|\\.\\.|\\.$/', $localPart) > 0)				return false;	//	Dots in wrong place

	//	Any ASCII graphic (printing) character other than the
	//	at-sign ("@"), backslash, double quote, comma, or square brackets may
	//	appear without quoting.  If any of that list of excluded characters
	//	are to appear, they must be quoted
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^"(?:.)*"$/', $localPart) > 0) {
		//	Local part is a quoted string
		if (preg_match('/(?:.)+[^\\\\]"(?:.)+/', $localPart) > 0)		return false;	//	Unescaped quote character inside quoted string
	} else {
		if (preg_match('/[ @\\[\\]\\\\",]/', $localPart) > 0)
			//	Check all excluded characters are escaped
			$stripped = preg_replace('/\\\\[ @\\[\\]\\\\",]/', '', $localPart);
			if (preg_match('/[ @\\[\\]\\\\",]/', $stripped) > 0)		return false;	//	Unquoted excluded characters
	}

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')			return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')			return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)							return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))						return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)							return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')									return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$matches	= array();
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)											return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if (substr($TLD, strlen($TLD) - 1, 1) === '.')				return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)						return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		array_multisort($matches[1], SORT_DESC);
		if ($matches[1][0] !== '')							return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																	return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}
?>

Revision: 11224
at January 29, 2009 08:56 by dominicsayers


Updated Code
/*
Copyright 2009 Dominic Sayers
([email protected])
(http://www.dominicsayers.com)

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	Period (".") may...appear, but may not be used to start or end the
	//	local part, nor may two or more consecutive periods appear.
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^\\.|\\.\\.|\\.$/', $localPart) > 0)				return false;	//	Dots in wrong place

	//	Any ASCII graphic (printing) character other than the
	//	at-sign ("@"), backslash, double quote, comma, or square brackets may
	//	appear without quoting.  If any of that list of excluded characters
	//	are to appear, they must be quoted
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^"(?:.)*"$/', $localPart) > 0) {
		//	Local part is a quoted string
		if (preg_match('/(?:.)+[^\\\\]"(?:.)+/', $localPart) > 0)	return false;	//	Unescaped quote character inside quoted string
	} else {
		if (preg_match('/[ @\\[\\]\\\\",]/', $localPart) > 0)
			//	Check all excluded characters are escaped
			$stripped = preg_replace('/\\\\[ @\\[\\]\\\\",]/', '', $localPart);
			if (preg_match('/[ @\\[\\]\\\\",]/', $stripped) > 0)		return false;	//	Unquoted excluded characters
	}

	//	Now let's check the domain part...

	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matchesIP		= array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
			$index = strrpos($addressLiteral, $matchesIP[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')			return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')			return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)							return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))						return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)							return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		array_multisort($matchesIP[1], SORT_DESC);
		if ($matchesIP[1][0] !== '')									return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$matches	= array();
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)											return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if (substr($TLD, strlen($TLD) - 1, 1) === '.')				return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)						return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		array_multisort($matches[1], SORT_DESC);
		if ($matches[1][0] !== '')							return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																	return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}

function unitTest ($email, $reason = '') {
	$expected	= ($reason === '') ? true : false;
	$valid		= is_email($email);
	$not		= ($valid) ? '' : ' not';
	$unexpected	= ($valid !== $expected) ? ' <b>This was unexpected!</b>' : '';
	$reason		= ($reason === '') ? "" : " Reason: $reason";
	
	return "The address <i>$email</i> is$not valid.$unexpected$reason<br />\n";
}

//	Email validator test cases (Dominic Sayers, January 2009)
//	Valid addresses
echo unitTest('[email protected]');
echo unitTest('1234567890123456789012345678901234567890123456789012345678901234@example.com');
echo unitTest('"first last"@example.com');
echo unitTest('"first\\"last"@example.com');	//	Not totally sure whether this is valid or not
echo unitTest('first\\@[email protected]');
echo unitTest('"first@last"@example.com');
echo unitTest('first\\\\[email protected]');	//	Note that \ is escaped even in single-quote strings, so this is testing "first\\last"@example.com
echo unitTest('first.last@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x2345');
echo unitTest('first.last@[12.34.56.78]');
echo unitTest('first.last@[IPv6:::12.34.56.78]');
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:12.34.56.78]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:12.34.56.78]');
echo unitTest('first.last@[IPv6:::1111:2222:3333:4444:5555:6666]');
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:6666]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666::]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]');
echo unitTest('first.last@x23456789012345678901234567890123456789012345678901234567890123.example.com');
echo unitTest('[email protected]');
echo unitTest('[email protected]');

//	Invalid addresses
echo unitTest('first.last', "No @");
echo unitTest('@example.com', "No local part");
echo unitTest('12345678901234567890123456789012345678901234567890123456789012345@example.com', "Local part more than 64 characters");
echo unitTest('[email protected]', "Local part starts with a dot");
echo unitTest('[email protected]', "Local part ends with a dot");
echo unitTest('[email protected]', "Local part has consecutive dots");
echo unitTest('"first"last"@example.com', "Local part contains unescaped excluded characters");
echo unitTest('first\\\\@[email protected]', "Local part contains unescaped excluded characters");
echo unitTest('first.last@', "No domain");
echo unitTest('first.last@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456', "Domain exceeds 255 chars");
echo unitTest('first.last@[.12.34.56.78]', "Only char that can precede IPv4 address is ':'");
echo unitTest('first.last@[12.34.56.789]', "Can't be interpreted as IPv4 so IPv6 tag is missing");
echo unitTest('first.last@[::12.34.56.78]', "IPv6 tag is missing");
echo unitTest('first.last@[IPv5:::12.34.56.78]', "IPv6 tag is wrong");
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:12.34.56.78]', "Too many IPv6 groups (4 max)");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:12.34.56.78]', "Not enough IPv6 groups");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:12.34.56.78]', "Too many IPv6 groups (6 max)");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777]', "Not enough IPv6 groups");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]', "Too many IPv6 groups (8 max)");
echo unitTest('first.last@[IPv6:1111:2222::3333::4444:5555:6666]', "Too many '::' (can be none or one)");
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:6666:7777]', "Too many IPv6 groups (6 max)");
echo unitTest('first.last@[IPv6:1111:2222:333x::4444:5555]', "x is not valid in an IPv6 address");
echo unitTest('first.last@[IPv6:1111:2222:33333::4444:5555]', "33333 is not a valid group in an IPv6 address");
echo unitTest('[email protected]', "TLD can't be all digits");
echo unitTest('first.last@com', "Mail host must be second- or lower level");
echo unitTest('[email protected]', "Label can't begin with a hyphen");
echo unitTest('[email protected]', "Label can't end with a hyphen");
echo unitTest('first.last@x234567890123456789012345678901234567890123456789012345678901234.example.com', "Label can't be longer than 63 octets");

//	Test cases from RFC3696 (February 2004, http://tools.ietf.org/html/rfc3696#section-3)
echo unitTest('Abc\\@[email protected]');
echo unitTest('Fred\\ [email protected]');
echo unitTest('Joe.\\\\[email protected]');
echo unitTest('"Abc@def"@example.com');
echo unitTest('"Fred Bloggs"@example.com');
echo unitTest('[email protected]');
echo unitTest('customer/[email protected]');
echo unitTest('[email protected]');
echo unitTest('!def!xyz%[email protected]');
echo unitTest('[email protected]');

//	Test cases from Doug Lovell (LinuxJournal, June 2007, http://www.linuxjournal.com/article/9585)
echo unitTest("[email protected]");
echo unitTest("abc\\@[email protected]");
echo unitTest("abc\\\\@example.com");
echo unitTest("Fred\\ [email protected]");
echo unitTest("Joe.\\\\[email protected]");
echo unitTest("\"Abc@def\"@example.com");
echo unitTest("\"Fred Bloggs\"@example.com");
echo unitTest("customer/[email protected]");
echo unitTest("\[email protected]");
echo unitTest("!def!xyz%[email protected]");
echo unitTest("[email protected]");
echo unitTest("[email protected]");
echo unitTest("[email protected]");
echo unitTest("Doug\\ \\\"Ace\\\"\\ [email protected]");
echo unitTest("\"Doug \\\"Ace\\\" L.\"@example.com");
echo unitTest("abc@[email protected]", "Doug Lovell says this should fail");
echo unitTest("abc\\\\@[email protected]", "Doug Lovell says this should fail");
echo unitTest("abc\\@example.com", "Doug Lovell says this should fail");
echo unitTest("@example.com", "Doug Lovell says this should fail");
echo unitTest("doug@", "Doug Lovell says this should fail");
echo unitTest("\"[email protected]", "Doug Lovell says this should fail");
echo unitTest("ote\"@example.com", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("\"Doug \"Ace\" L.\"@example.com", "Doug Lovell says this should fail");
echo unitTest("Doug\\ \\\"Ace\\\"\\ L\\[email protected]", "Doug Lovell says this should fail");
echo unitTest("hello [email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected].", "Doug Lovell says this should fail");

Revision: 11223
at January 29, 2009 07:27 by dominicsayers


Updated Code
/*
Copyright 2009 Dominic Sayers
([email protected])
(http://www.dominicsayers.com)

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	Period (".") may...appear, but may not be used to start or end the
	//	local part, nor may two or more consecutive periods appear.
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^\\.|\\.\\.|\\.$/', $localPart) > 0)				return false;	//	Dots in wrong place

	//	Any ASCII graphic (printing) character other than the
	//	at-sign ("@"), backslash, double quote, comma, or square brackets may
	//	appear without quoting.  If any of that list of excluded characters
	//	are to appear, they must be quoted
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^"(?:.)*"$/', $localPart) > 0) {
		//	Local part is a quoted string
		if (preg_match('/(?:.)+[^\\\\]"(?:.)+/', $localPart) > 0)	return false;	//	Unescaped quote character inside quoted string
	} else {
		if (preg_match('/[ @\\[\\]\\\\",]/', $localPart) > 0)
			//	Check all excluded characters are escaped
			$stripped = preg_replace('/\\\\[ @\\[\\]\\\\",]/', '', $localPart);
			if (preg_match('/[ @\\[\\]\\\\",]/', $stripped) > 0)		return false;	//	Unquoted excluded characters
	}

	//	Now let's check the domain part...
	//
	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matches = array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matches) > 0) {
			$index = strrpos($addressLiteral, $matches[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')			return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')			return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matches);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)							return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))						return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)							return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		rsort($matches[1]);
		if ($matches[1][0] !== '')									return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)											return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if ($TLD[strlen($TLD) - 1] === '.')							return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)						return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		rsort($matches[1]);
		if ($matches[1][0] !== '')									return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																	return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}

function unitTest ($email, $reason = '') {
	$expected	= ($reason === '') ? true : false;
	$valid		= is_email($email);
	$not		= ($valid) ? '' : ' not';
	$unexpected	= ($valid !== $expected) ? ' <b>This was unexpected!</b>' : '';
	$reason		= ($reason === '') ? "" : " Reason: $reason";
	
	return "The address <i>$email</i> is$not valid.$unexpected$reason<br />\n";
}

//	Email validator test cases (Dominic Sayers, January 2009)
//	Valid addresses
echo unitTest('[email protected]');
echo unitTest('1234567890123456789012345678901234567890123456789012345678901234@example.com');
echo unitTest('"first last"@example.com');
echo unitTest('"first\\"last"@example.com');	//	Not totally sure whether this is valid or not
echo unitTest('first\\@[email protected]');
echo unitTest('"first@last"@example.com');
echo unitTest('first\\\\[email protected]');	//	Note that \ is escaped even in single-quote strings, so this is testing "first\\last"@example.com
echo unitTest('first.last@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x2345');
echo unitTest('first.last@[12.34.56.78]');
echo unitTest('first.last@[IPv6:::12.34.56.78]');
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:12.34.56.78]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:12.34.56.78]');
echo unitTest('first.last@[IPv6:::1111:2222:3333:4444:5555:6666]');
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:6666]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666::]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]');
echo unitTest('first.last@x23456789012345678901234567890123456789012345678901234567890123.example.com');
echo unitTest('[email protected]');
echo unitTest('[email protected]');

//	Invalid addresses
echo unitTest('first.last', "No @");
echo unitTest('@example.com', "No local part");
echo unitTest('12345678901234567890123456789012345678901234567890123456789012345@example.com', "Local part more than 64 characters");
echo unitTest('[email protected]', "Local part starts with a dot");
echo unitTest('[email protected]', "Local part ends with a dot");
echo unitTest('[email protected]', "Local part has consecutive dots");
echo unitTest('"first"last"@example.com', "Local part contains unescaped excluded characters");
echo unitTest('first\\\\@[email protected]', "Local part contains unescaped excluded characters");
echo unitTest('first.last@', "No domain");
echo unitTest('first.last@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456', "Domain exceeds 255 chars");
echo unitTest('first.last@[.12.34.56.78]', "Only char that can precede IPv4 address is ':'");
echo unitTest('first.last@[12.34.56.789]', "Can't be interpreted as IPv4 so IPv6 tag is missing");
echo unitTest('first.last@[::12.34.56.78]', "IPv6 tag is missing");
echo unitTest('first.last@[IPv5:::12.34.56.78]', "IPv6 tag is wrong");
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:12.34.56.78]', "Too many IPv6 groups (4 max)");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:12.34.56.78]', "Not enough IPv6 groups");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:12.34.56.78]', "Too many IPv6 groups (6 max)");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777]', "Not enough IPv6 groups");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]', "Too many IPv6 groups (8 max)");
echo unitTest('first.last@[IPv6:1111:2222::3333::4444:5555:6666]', "Too many '::' (can be none or one)");
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:6666:7777]', "Too many IPv6 groups (6 max)");
echo unitTest('first.last@[IPv6:1111:2222:333x::4444:5555]', "x is not valid in an IPv6 address");
echo unitTest('first.last@[IPv6:1111:2222:33333::4444:5555]', "33333 is not a valid group in an IPv6 address");
echo unitTest('[email protected]', "TLD can't be all digits");
echo unitTest('first.last@com', "Mail host must be second- or lower level");
echo unitTest('[email protected]', "Label can't begin with a hyphen");
echo unitTest('[email protected]', "Label can't end with a hyphen");
echo unitTest('first.last@x234567890123456789012345678901234567890123456789012345678901234.example.com', "Label can't be longer than 63 octets");

//	Test cases from RFC3696 (February 2004, http://tools.ietf.org/html/rfc3696#section-3)
echo unitTest('Abc\\@[email protected]');
echo unitTest('Fred\\ [email protected]');
echo unitTest('Joe.\\\\[email protected]');
echo unitTest('"Abc@def"@example.com');
echo unitTest('"Fred Bloggs"@example.com');
echo unitTest('[email protected]');
echo unitTest('customer/[email protected]');
echo unitTest('[email protected]');
echo unitTest('!def!xyz%[email protected]');
echo unitTest('[email protected]');

//	Test cases from Doug Lovell (LinuxJournal, June 2007, http://www.linuxjournal.com/article/9585)
echo unitTest("[email protected]");
echo unitTest("abc\\@[email protected]");
echo unitTest("abc\\\\@example.com");
echo unitTest("Fred\\ [email protected]");
echo unitTest("Joe.\\\\[email protected]");
echo unitTest("\"Abc@def\"@example.com");
echo unitTest("\"Fred Bloggs\"@example.com");
echo unitTest("customer/[email protected]");
echo unitTest("\[email protected]");
echo unitTest("!def!xyz%[email protected]");
echo unitTest("[email protected]");
echo unitTest("[email protected]");
echo unitTest("[email protected]");
echo unitTest("Doug\\ \\\"Ace\\\"\\ [email protected]");
echo unitTest("\"Doug \\\"Ace\\\" L.\"@example.com");
echo unitTest("abc@[email protected]", "Doug Lovell says this should fail");
echo unitTest("abc\\\\@[email protected]", "Doug Lovell says this should fail");
echo unitTest("abc\\@example.com", "Doug Lovell says this should fail");
echo unitTest("@example.com", "Doug Lovell says this should fail");
echo unitTest("doug@", "Doug Lovell says this should fail");
echo unitTest("\"[email protected]", "Doug Lovell says this should fail");
echo unitTest("ote\"@example.com", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("\"Doug \"Ace\" L.\"@example.com", "Doug Lovell says this should fail");
echo unitTest("Doug\\ \\\"Ace\\\"\\ L\\[email protected]", "Doug Lovell says this should fail");
echo unitTest("hello [email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected].", "Doug Lovell says this should fail");

Revision: 11222
at January 28, 2009 13:14 by dominicsayers


Initial Code
/*
Copyright 2009 Dominic Sayers
([email protected])
(http://www.dominicsayers.com)

This source file is subject to the Common Public Attribution License Version 1.0 (CPAL) license.
The license terms are available through the world-wide-web at http://www.opensource.org/licenses/cpal_1.0
*/
function is_email ($email, $checkDNS = false) {
	//	Check that $email is a valid address
	//		(http://tools.ietf.org/html/rfc3696)
	//		(http://tools.ietf.org/html/rfc5322#section-3.4.1)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	//		(http://tools.ietf.org/html/rfc1123#section-2.1)
	
	//	Contemporary email addresses consist of a "local part" separated from
	//	a "domain part" (a fully-qualified domain name) by an at-sign ("@").
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	$index = strrpos($email,'@');

	if ($index === false)		return false;	//	No at-sign
	if ($index === 0)			return false;	//	No local part
	if ($index > 64)			return false;	//	Local part too long

	$localPart		= substr($email, 0, $index);
	$domain			= substr($email, $index + 1);
	$domainLength	= strlen($domain);
	
	if ($domainLength === 0)	return false;	//	No domain part
	if ($domainLength > 255)	return false;	//	Domain part too long

	//	Let's check the local part for RFC compliance...
	//
	//	Period (".") may...appear, but may not be used to start or end the
	//	local part, nor may two or more consecutive periods appear.
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^\.|\.\.|\.$/', $localPart) > 0)				return false;	//	Dots in wrong place

	//	Any ASCII graphic (printing) character other than the
	//	at-sign ("@"), backslash, double quote, comma, or square brackets may
	//	appear without quoting.  If any of that list of excluded characters
	//	are to appear, they must be quoted
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	if (preg_match('/^"(?:.)*"$/', $localPart) > 0) {
		//	Local part is a quoted string
		if (preg_match('/(?:.)+[^\\\\]"(?:.)+/', $localPart) > 0)	return false;	//	Unescaped quote character inside quoted string
	} else {
		if (preg_match('/[ @\[\]\\\\",]/', $localPart) > 0)
			//	Check all excluded characters are escaped
			$stripped = preg_replace('/\\\\[ @\[\]\\\\",]/', '', $localPart);
			if (preg_match('/[ @\[\]\\\\",]/', $stripped) > 0)		return false;	//	Unquoted excluded characters
	}

	//	Now let's check the domain part...
	//
	//	The domain name can also be replaced by an IP address in square brackets
	//		(http://tools.ietf.org/html/rfc3696#section-3)
	//		(http://tools.ietf.org/html/rfc5321#section-4.1.3)
	//		(http://tools.ietf.org/html/rfc4291#section-2.2)
	if (preg_match('/^\[(.)+]$/', $domain) === 1) {
		//	It's an address-literal
		$addressLiteral = substr($domain, 1, $domainLength - 2);
		$matches = array();
		
		//	Extract IPv4 part from the end of the address-literal (if there is one)
		if (preg_match('/\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matches) > 0) {
			$index = strrpos($addressLiteral, $matches[0]);
			
			if ($index === 0) {
				//	Nothing there except a valid IPv4 address, so...
				return true;
			} else {
				//	Assume it's an attempt at a mixed address (IPv6 + IPv4)
				if ($addressLiteral[$index - 1] !== ':')			return false;	//	Character preceding IPv4 address must be ':'
				if (substr($addressLiteral, 0, 5) !== 'IPv6:')		return false;	//	RFC5321 section 4.1.3

				$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
				$groupMax = 6;
			}
		} else {
			//	It must be an attempt at pure IPv6
			if (substr($addressLiteral, 0, 5) !== 'IPv6:')			return false;	//	RFC5321 section 4.1.3
			$IPv6 = substr($addressLiteral, 5);
			$groupMax = 8;
		}

		$groupCount	= preg_match_all('/^[0-9a-fA-F]{0,4}|\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matches);
		$index		= strpos($IPv6,'::');

		if ($index === false) {
			//	We need exactly the right number of groups
			if ($groupCount !== $groupMax)							return false;	//	RFC5321 section 4.1.3
		} else {
			if ($index !== strrpos($IPv6,'::'))						return false;	//	More than one '::'
			$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
			if ($groupCount > $groupMax)							return false;	//	Too many IPv6 groups in address
		}

		//	Check for unmatched characters
		rsort($matches[1]);
		if ($matches[1][0] !== '')									return false;	//	Illegal characters in address

		//	It's a valid IPv6 address, so...
		return true;
	} else {
		//	It's a domain name...

		//	The syntax of a legal Internet host name was specified in RFC-952
		//	One aspect of host name syntax is hereby changed: the
		//	restriction on the first character is relaxed to allow either a
		//	letter or a digit.
		//		(http://tools.ietf.org/html/rfc1123#section-2.1)
		//
		//	NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
		//
		//	Most common applications, including email and the Web, will generally not permit...escaped strings
		//		(http://tools.ietf.org/html/rfc3696#section-2)
		//
		//	Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
		//	labels for SMTP clients or servers
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		//
		//	RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
		//		(http://tools.ietf.org/html/rfc5321#section-4.1.2)
		$groupCount	= preg_match_all('/(?:[0-9a-zA-Z][0-9a-zA-Z-]{0,61}[0-9a-zA-Z]|[a-zA-Z])(?:\.|$)|(.)/', $domain, $matches);
		$level		= count($matches[0]);

		if ($level == 1)											return false;	//	Mail host can't be a TLD

		$TLD = $matches[0][$level - 1];
		if ($TLD[strlen($TLD) - 1] === '.')							return false;	//	TLD can't end in a dot
		if (preg_match('/^[0-9]+$/', $TLD) > 0)						return false;	//	TLD can't be all-numeric

		//	Check for unmatched characters
		rsort($matches[1]);
		if ($matches[1][0] !== '')									return false;	//	Illegal characters in domain, or label longer than 63 characters

		//	Check DNS?
		if ($checkDNS && function_exists('checkdnsrr')) {
			if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
																	return false;	//	Domain doesn't actually exist
			}
		}

		//	Eliminate all other factors, and the one which remains must be the truth.
		//		(Sherlock Holmes, The Sign of Four)
		return true;
	}
}

function unitTest ($email, $reason = '') {
	$expected	= ($reason === '') ? true : false;
	$valid		= is_email($email);
	$not		= ($valid) ? '' : ' not';
	$unexpected	= ($valid !== $expected) ? ' <b>This was unexpected!</b>' : '';
	$reason		= ($reason === '') ? "" : " Reason: $reason";
	
	return "The address <i>$email</i> is$not valid.$unexpected$reason<br />\n";
}

//	Email validator test cases (Dominic Sayers, January 2009)
//	Valid addresses
echo unitTest('[email protected]');
echo unitTest('1234567890123456789012345678901234567890123456789012345678901234@example.com');
echo unitTest('"first last"@example.com');
echo unitTest('"first\"last"@example.com');	//	Not totally sure whether this is valid or not
echo unitTest('first\@[email protected]');
echo unitTest('"first@last"@example.com');
echo unitTest('first\\\\[email protected]');	//	Note that \ is escaped even in single-quote strings, so this is testing "first\\last"@example.com
echo unitTest('first.last@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x2345');
echo unitTest('first.last@[12.34.56.78]');
echo unitTest('first.last@[IPv6:::12.34.56.78]');
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:12.34.56.78]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:12.34.56.78]');
echo unitTest('first.last@[IPv6:::1111:2222:3333:4444:5555:6666]');
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:6666]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666::]');
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]');
echo unitTest('first.last@x23456789012345678901234567890123456789012345678901234567890123.example.com');
echo unitTest('[email protected]');
echo unitTest('[email protected]');

//	Invalid addresses
echo unitTest('first.last', "No @");
echo unitTest('@example.com', "No local part");
echo unitTest('12345678901234567890123456789012345678901234567890123456789012345@example.com', "Local part more than 64 characters");
echo unitTest('[email protected]', "Local part starts with a dot");
echo unitTest('[email protected]', "Local part ends with a dot");
echo unitTest('[email protected]', "Local part has consecutive dots");
echo unitTest('"first"last"@example.com', "Local part contains unescaped excluded characters");
echo unitTest('first\\\\@[email protected]', "Local part contains unescaped excluded characters");
echo unitTest('first.last@', "No domain");
echo unitTest('first.last@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456', "Domain exceeds 255 chars");
echo unitTest('first.last@[.12.34.56.78]', "Only char that can precede IPv4 address is ':'");
echo unitTest('first.last@[12.34.56.789]', "Can't be interpreted as IPv4 so IPv6 tag is missing");
echo unitTest('first.last@[::12.34.56.78]', "IPv6 tag is missing");
echo unitTest('first.last@[IPv5:::12.34.56.78]', "IPv6 tag is wrong");
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:12.34.56.78]', "Too many IPv6 groups (4 max)");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:12.34.56.78]', "Not enough IPv6 groups");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:12.34.56.78]', "Too many IPv6 groups (6 max)");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777]', "Not enough IPv6 groups");
echo unitTest('first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]', "Too many IPv6 groups (8 max)");
echo unitTest('first.last@[IPv6:1111:2222::3333::4444:5555:6666]', "Too many '::' (can be none or one)");
echo unitTest('first.last@[IPv6:1111:2222:3333::4444:5555:6666:7777]', "Too many IPv6 groups (6 max)");
echo unitTest('first.last@[IPv6:1111:2222:333x::4444:5555]', "x is not valid in an IPv6 address");
echo unitTest('first.last@[IPv6:1111:2222:33333::4444:5555]', "33333 is not a valid group in an IPv6 address");
echo unitTest('[email protected]', "TLD can't be all digits");
echo unitTest('first.last@com', "Mail host must be second- or lower level");
echo unitTest('[email protected]', "Label can't begin with a hyphen");
echo unitTest('[email protected]', "Label can't end with a hyphen");
echo unitTest('first.last@x234567890123456789012345678901234567890123456789012345678901234.example.com', "Label can't be longer than 63 octets");

//	Test cases from RFC3696 (February 2004, http://tools.ietf.org/html/rfc3696#section-3)
echo unitTest('Abc\@[email protected]');
echo unitTest('Fred\ [email protected]');
echo unitTest('Joe.\\\\[email protected]');
echo unitTest('"Abc@def"@example.com');
echo unitTest('"Fred Bloggs"@example.com');
echo unitTest('[email protected]');
echo unitTest('customer/[email protected]');
echo unitTest('[email protected]');
echo unitTest('!def!xyz%[email protected]');
echo unitTest('[email protected]');

//	Test cases from Doug Lovell (LinuxJournal, June 2007, http://www.linuxjournal.com/article/9585)
echo unitTest("[email protected]");
echo unitTest("abc\\@[email protected]");
echo unitTest("abc\\\\@example.com");
echo unitTest("Fred\\ [email protected]");
echo unitTest("Joe.\\\\[email protected]");
echo unitTest("\"Abc@def\"@example.com");
echo unitTest("\"Fred Bloggs\"@example.com");
echo unitTest("customer/[email protected]");
echo unitTest("\[email protected]");
echo unitTest("!def!xyz%[email protected]");
echo unitTest("[email protected]");
echo unitTest("[email protected]");
echo unitTest("[email protected]");
echo unitTest("Doug\\ \\\"Ace\\\"\\ [email protected]");
echo unitTest("\"Doug \\\"Ace\\\" L.\"@example.com");
echo unitTest("abc@[email protected]", "Doug Lovell says this should fail");
echo unitTest("abc\\\\@[email protected]", "Doug Lovell says this should fail");
echo unitTest("abc\\@example.com", "Doug Lovell says this should fail");
echo unitTest("@example.com", "Doug Lovell says this should fail");
echo unitTest("doug@", "Doug Lovell says this should fail");
echo unitTest("\"[email protected]", "Doug Lovell says this should fail");
echo unitTest("ote\"@example.com", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected]", "Doug Lovell says this should fail");
echo unitTest("\"Doug \"Ace\" L.\"@example.com", "Doug Lovell says this should fail");
echo unitTest("Doug\\ \\\"Ace\\\"\\ L\\[email protected]", "Doug Lovell says this should fail");
echo unitTest("hello [email protected]", "Doug Lovell says this should fail");
echo unitTest("[email protected].", "Doug Lovell says this should fail");

Initial URL
http://www.dominicsayers.com/isemail/

Initial Description
A PHP function that correctly validates all parts of a given email address, according to RFCs 5322, 5321, 1123, 2396, 3696, 4291, 4343, 2821 & 2822. I’ve released it under a license that allows you to use it royalty-free in commercial or non-commercial work.\r\n\r\nThe test cases and the latest version of the code will always be here: http://code.google.com/p/isemail/source/browse/#svn/trunk

Initial Title
RFC-compliant email address validator

Initial Tags
email, validation

Initial Language
PHP