Login | ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/javautils/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/is_email.php
Revision: 16
Committed: Mon Jun 14 19:26:08 2010 UTC (10 years, 1 month ago) by daniel-marschall
File size: 19186 byte(s)
Log Message:
typos

File Contents

# Content
1 <?php
2
3 // This file was edited by Daniel Marschall
4 // - Fixes: 4 typos
5
6 /**
7 * @package isemail
8 * @author Dominic Sayers <dominic_sayers@hotmail.com>
9 * @copyright 2010 Dominic Sayers
10 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
11 * @link http://www.dominicsayers.com/isemail
12 * @version 1.17 - Upper length limit corrected to 254 characters
13 */
14
15 /*
16 Copyright (c) 2008-2010, Dominic Sayers
17 All rights reserved.
18
19 Redistribution and use in source and binary forms, with or without modification,
20 are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice, this
23 list of conditions and the following disclaimer.
24 * Redistributions in binary form must reproduce the above copyright notice,
25 this list of conditions and the following disclaimer in the documentation
26 and/or other materials provided with the distribution.
27 * Neither the name of Dominic Sayers nor the names of its contributors may be
28 used to endorse or promote products derived from this software without
29 specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
32 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
34 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
35 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
36 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
38 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
40 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42
43 /*.
44 require_module 'standard';
45 require_module 'pcre';
46 .*/
47 /*.mixed.*/ function is_email (/*.string.*/ $email, $checkDNS = false, $diagnose = false) {
48 // Check that $email is a valid address. Read the following RFCs to understand the constraints:
49 // (http://tools.ietf.org/html/rfc5322)
50 // (http://tools.ietf.org/html/rfc3696)
51 // (http://tools.ietf.org/html/rfc5321)
52 // (http://tools.ietf.org/html/rfc4291#section-2.2)
53 // (http://tools.ietf.org/html/rfc1123#section-2.1)
54
55 if (!defined('ISEMAIL_VALID')) {
56 define('ISEMAIL_VALID' , 0);
57 define('ISEMAIL_TOOLONG' , 1);
58 define('ISEMAIL_NOAT' , 2);
59 define('ISEMAIL_NOLOCALPART' , 3);
60 define('ISEMAIL_NODOMAIN' , 4);
61 define('ISEMAIL_ZEROLENGTHELEMENT' , 5);
62 define('ISEMAIL_BADCOMMENT_START' , 6);
63 define('ISEMAIL_BADCOMMENT_END' , 7);
64 define('ISEMAIL_UNESCAPEDDELIM' , 8);
65 define('ISEMAIL_EMPTYELEMENT' , 9);
66 define('ISEMAIL_UNESCAPEDSPECIAL' , 10);
67 define('ISEMAIL_LOCALTOOLONG' , 11);
68 define('ISEMAIL_IPV4BADPREFIX' , 12);
69 define('ISEMAIL_IPV6BADPREFIXMIXED' , 13);
70 define('ISEMAIL_IPV6BADPREFIX' , 14);
71 define('ISEMAIL_IPV6GROUPCOUNT' , 15);
72 define('ISEMAIL_IPV6DOUBLEDOUBLECOLON' , 16);
73 define('ISEMAIL_IPV6BADCHAR' , 17);
74 define('ISEMAIL_IPV6TOOMANYGROUPS' , 18);
75 define('ISEMAIL_TLD' , 19);
76 define('ISEMAIL_DOMAINEMPTYELEMENT' , 20);
77 define('ISEMAIL_DOMAINELEMENTTOOLONG' , 21);
78 define('ISEMAIL_DOMAINBADCHAR' , 22);
79 define('ISEMAIL_DOMAINTOOLONG' , 23);
80 define('ISEMAIL_TLDNUMERIC' , 24);
81 define('ISEMAIL_DOMAINNOTFOUND' , 25);
82 define('ISEMAIL_NOTDEFINED' , 99);
83 }
84
85 // the upper limit on address lengths should normally be considered to be 254
86 // (http://www.rfc-editor.org/errata_search.php?rfc=3696)
87 // NB My erratum has now been verified by the IETF so the correct answer is 254
88 //
89 // The maximum total length of a reverse-path or forward-path is 256
90 // characters (including the punctuation and element separators)
91 // (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
92 // NB There is a mandatory 2-character wrapper round the actual address
93 $emailLength = strlen($email);
94 // revision 1.17: Max length reduced to 254 (see above)
95 if ($emailLength > 254) return $diagnose ? ISEMAIL_TOOLONG : false; // Too long
96
97 // Contemporary email addresses consist of a "local part" separated from
98 // a "domain part" (a fully-qualified domain name) by an at-sign ("@").
99 // (http://tools.ietf.org/html/rfc3696#section-3)
100 $atIndex = strrpos($email,'@');
101
102 if ($atIndex === false) return $diagnose ? ISEMAIL_NOAT : false; // No at-sign
103 if ($atIndex === 0) return $diagnose ? ISEMAIL_NOLOCALPART : false; // No local part
104 if ($atIndex === $emailLength - 1) return $diagnose ? ISEMAIL_NODOMAIN : false; // No domain part
105 // revision 1.14: Length test bug suggested by Andrew Campbell of Gloucester, MA
106
107 // Sanitize comments
108 // - remove nested comments, quotes and dots in comments
109 // - remove parentheses and dots from quoted strings
110 $braceDepth = 0;
111 $inQuote = false;
112 $escapeThisChar = false;
113
114 for ($i = 0; $i < $emailLength; ++$i) {
115 $char = $email[$i];
116 $replaceChar = false;
117
118 if ($char === '\\') {
119 $escapeThisChar = !$escapeThisChar; // Escape the next character?
120 } else {
121 switch ($char) {
122 case '(':
123 if ($escapeThisChar) {
124 $replaceChar = true;
125 } else {
126 if ($inQuote) {
127 $replaceChar = true;
128 } else {
129 if ($braceDepth++ > 0) $replaceChar = true; // Increment brace depth
130 }
131 }
132
133 break;
134 case ')':
135 if ($escapeThisChar) {
136 $replaceChar = true;
137 } else {
138 if ($inQuote) {
139 $replaceChar = true;
140 } else {
141 if (--$braceDepth > 0) $replaceChar = true; // Decrement brace depth
142 if ($braceDepth < 0) $braceDepth = 0;
143 }
144 }
145
146 break;
147 case '"':
148 if ($escapeThisChar) {
149 $replaceChar = true;
150 } else {
151 if ($braceDepth === 0) {
152 $inQuote = !$inQuote; // Are we inside a quoted string?
153 } else {
154 $replaceChar = true;
155 }
156 }
157
158 break;
159 case '.': // Dots don't help us either
160 if ($escapeThisChar) {
161 $replaceChar = true;
162 } else {
163 if ($braceDepth > 0) $replaceChar = true;
164 }
165
166 break;
167 default:
168 }
169
170 $escapeThisChar = false;
171 // if ($replaceChar) $email[$i] = 'x'; // Replace the offending character with something harmless
172 // revision 1.12: Line above replaced because PHPLint doesn't like that syntax
173 if ($replaceChar) $email = (string) substr_replace($email, 'x', $i, 1); // Replace the offending character with something harmless
174 }
175 }
176
177 $localPart = substr($email, 0, $atIndex);
178 $domain = substr($email, $atIndex + 1);
179 $FWS = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))"; // Folding white space
180 // Let's check the local part for RFC compliance...
181 //
182 // local-part = dot-atom / quoted-string / obs-local-part
183 // obs-local-part = word *("." word)
184 // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
185 //
186 // Problem: need to distinguish between "first.last" and "first"."last"
187 // (i.e. one element or two). And I suck at regular expressions.
188 $dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
189 $partLength = 0;
190
191 foreach ($dotArray as $element) {
192 // Remove any leading or trailing FWS
193 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
194 $elementLength = strlen($element);
195
196 if ($elementLength === 0) return $diagnose ? ISEMAIL_ZEROLENGTHELEMENT : false; // Can't have empty element (consecutive dots or dots at the start or end)
197 // revision 1.15: Speed up the test and get rid of "uninitialized string offset" notices from PHP
198
199 // We need to remove any valid comments (i.e. those at the start or end of the element)
200 if ($element[0] === '(') {
201 $indexBrace = strpos($element, ')');
202 if ($indexBrace !== false) {
203 if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
204 return $diagnose ? ISEMAIL_BADCOMMENT_START : false; // Illegal characters in comment
205 }
206 $element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
207 $elementLength = strlen($element);
208 }
209 }
210
211 if ($element[$elementLength - 1] === ')') {
212 $indexBrace = strrpos($element, '(');
213 if ($indexBrace !== false) {
214 if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
215 return $diagnose ? ISEMAIL_BADCOMMENT_END : false; // Illegal characters in comment
216 }
217 $element = substr($element, 0, $indexBrace);
218 $elementLength = strlen($element);
219 }
220 }
221
222 // Remove any leading or trailing FWS around the element (inside any comments)
223 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
224
225 // What's left counts towards the maximum length for this part
226 if ($partLength > 0) $partLength++; // for the dot
227 $partLength += strlen($element);
228
229 // Each dot-delimited component can be an atom or a quoted string
230 // (because of the obs-local-part provision)
231 if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
232 // Quoted-string tests:
233 //
234 // Remove any FWS
235 $element = preg_replace("/(?<!\\\\)$FWS/", '', $element);
236 // My regular expressions skills aren't up to distinguishing between \" \\" \\\" \\\\" etc.
237 // So remove all \\ from the string first...
238 $element = preg_replace('/\\\\\\\\/', ' ', $element);
239 if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDDELIM : false; // ", CR, LF and NUL must be escaped, "" is too short
240 } else {
241 // Unquoted string tests:
242 //
243 // Period (".") may...appear, but may not be used to start or end the
244 // local part, nor may two or more consecutive periods appear.
245 // (http://tools.ietf.org/html/rfc3696#section-3)
246 //
247 // A zero-length element implies a period at the beginning or end of the
248 // local part, or two periods together. Either way it's not allowed.
249 if ($element === '') return $diagnose ? ISEMAIL_EMPTYELEMENT : false; // Dots in wrong place
250
251 // Any ASCII graphic (printing) character other than the
252 // at-sign ("@"), backslash, double quote, comma, or square brackets may
253 // appear without quoting. If any of that list of excluded characters
254 // are to appear, they must be quoted
255 // (http://tools.ietf.org/html/rfc3696#section-3)
256 //
257 // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
258 if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDSPECIAL : false; // These characters must be in a quoted string
259 }
260 }
261
262 if ($partLength > 64) return $diagnose ? ISEMAIL_LOCALTOOLONG : false; // Local part must be 64 characters or less
263
264 // Now let's check the domain part...
265
266 // The domain name can also be replaced by an IP address in square brackets
267 // (http://tools.ietf.org/html/rfc3696#section-3)
268 // (http://tools.ietf.org/html/rfc5321#section-4.1.3)
269 // (http://tools.ietf.org/html/rfc4291#section-2.2)
270 if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
271 // It's an address-literal
272 $addressLiteral = substr($domain, 1, strlen($domain) - 2);
273 $matchesIP = array();
274
275 // Extract IPv4 part from the end of the address-literal (if there is one)
276 if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
277 $index = strrpos($addressLiteral, $matchesIP[0]);
278
279 if ($index === 0) {
280 // Nothing there except a valid IPv4 address, so...
281 return $diagnose ? ISEMAIL_VALID : true;
282 } else {
283 // Assume it's an attempt at a mixed address (IPv6 + IPv4)
284 if ($addressLiteral[$index - 1] !== ':') return $diagnose ? ISEMAIL_IPV4BADPREFIX : false; // Character preceding IPv4 address must be ':'
285 if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIXMIXED : false; // RFC5321 section 4.1.3
286
287 $IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
288 $groupMax = 6;
289 }
290 } else {
291 // It must be an attempt at pure IPv6
292 if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIX : false; // RFC5321 section 4.1.3
293 $IPv6 = substr($addressLiteral, 5);
294 $groupMax = 8;
295 }
296
297 $groupCount = preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
298 $index = strpos($IPv6,'::');
299
300 if ($index === false) {
301 // We need exactly the right number of groups
302 if ($groupCount !== $groupMax) return $diagnose ? ISEMAIL_IPV6GROUPCOUNT : false; // RFC5321 section 4.1.3
303 } else {
304 if ($index !== strrpos($IPv6,'::')) return $diagnose ? ISEMAIL_IPV6DOUBLEDOUBLECOLON : false; // More than one '::'
305 $groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
306 if ($groupCount > $groupMax) return $diagnose ? ISEMAIL_IPV6TOOMANYGROUPS : false; // Too many IPv6 groups in address
307 }
308
309 // Check for unmatched characters
310 array_multisort($matchesIP[1], SORT_DESC);
311 if ($matchesIP[1][0] !== '') return $diagnose ? ISEMAIL_IPV6BADCHAR : false; // Illegal characters in address
312
313 // It's a valid IPv6 address, so...
314 return $diagnose ? ISEMAIL_VALID : true;
315 } else {
316 // It's a domain name...
317
318 // The syntax of a legal Internet host name was specified in RFC-952
319 // One aspect of host name syntax is hereby changed: the
320 // restriction on the first character is relaxed to allow either a
321 // letter or a digit.
322 // (http://tools.ietf.org/html/rfc1123#section-2.1)
323 //
324 // NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
325 //
326 // Most common applications, including email and the Web, will generally not
327 // permit...escaped strings
328 // (http://tools.ietf.org/html/rfc3696#section-2)
329 //
330 // the better strategy has now become to make the "at least one period" test,
331 // to verify LDH conformance (including verification that the apparent TLD name
332 // is not all-numeric)
333 // (http://tools.ietf.org/html/rfc3696#section-2)
334 //
335 // Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
336 // labels for SMTP clients or servers
337 // (http://tools.ietf.org/html/rfc5321#section-4.1.2)
338 //
339 // RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
340 // (http://tools.ietf.org/html/rfc5321#section-4.1.2)
341 $dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
342 $partLength = 0;
343 $element = ''; // Since we use $element after the foreach loop let's make sure it has a value
344 // revision 1.13: Line above added because PHPLint now checks for Definitely Assigned Variables
345
346 if (count($dotArray) === 1) return $diagnose ? ISEMAIL_TLD : false; // Mail host can't be a TLD (cite? What about localhost?)
347
348 foreach ($dotArray as $element) {
349 // Remove any leading or trailing FWS
350 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
351 $elementLength = strlen($element);
352
353 // Each dot-delimited component must be of type atext
354 // A zero-length element implies a period at the beginning or end of the
355 // local part, or two periods together. Either way it's not allowed.
356 if ($elementLength === 0) return $diagnose ? ISEMAIL_DOMAINEMPTYELEMENT : false; // Dots in wrong place
357 // revision 1.15: Speed up the test and get rid of "uninitialized string offset" notices from PHP
358
359 // Then we need to remove all valid comments (i.e. those at the start or end of the element
360 if ($element[0] === '(') {
361 $indexBrace = strpos($element, ')');
362 if ($indexBrace !== false) {
363 if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
364 // revision 1.17: Fixed name of constant (also spotted by turboflash - thanks!)
365 return $diagnose ? ISEMAIL_BADCOMMENT_START : false; // Illegal characters in comment
366 }
367 $element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
368 $elementLength = strlen($element);
369 }
370 }
371
372 if ($element[$elementLength - 1] === ')') {
373 $indexBrace = strrpos($element, '(');
374 if ($indexBrace !== false) {
375 if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0)
376 // revision 1.17: Fixed name of constant (also spotted by turboflash - thanks!)
377 return $diagnose ? ISEMAIL_BADCOMMENT_END : false; // Illegal characters in comment
378
379 $element = substr($element, 0, $indexBrace);
380 $elementLength = strlen($element);
381 }
382 }
383
384 // Remove any leading or trailing FWS around the element (inside any comments)
385 $element = preg_replace("/^$FWS|$FWS\$/", '', $element);
386
387 // What's left counts towards the maximum length for this part
388 if ($partLength > 0) $partLength++; // for the dot
389 $partLength += strlen($element);
390
391 // The DNS defines domain name syntax very generally -- a
392 // string of labels each containing up to 63 8-bit octets,
393 // separated by dots, and with a maximum total of 255
394 // octets.
395 // (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
396 if ($elementLength > 63) return $diagnose ? ISEMAIL_DOMAINELEMENTTOOLONG : false; // Label must be 63 characters or less
397
398 // Any ASCII graphic (printing) character other than the
399 // at-sign ("@"), backslash, double quote, comma, or square brackets may
400 // appear without quoting. If any of that list of excluded characters
401 // are to appear, they must be quoted
402 // (http://tools.ietf.org/html/rfc3696#section-3)
403 //
404 // If the hyphen is used, it is not permitted to appear at
405 // either the beginning or end of a label.
406 // (http://tools.ietf.org/html/rfc3696#section-2)
407 //
408 // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
409 if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) {
410 return $diagnose ? ISEMAIL_DOMAINBADCHAR : false;
411 }
412 }
413
414 if ($partLength > 255) return $diagnose ? ISEMAIL_DOMAINTOOLONG : false; // Domain part must be 255 characters or less (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
415
416 if (preg_match('/^[0-9]+$/', $element) > 0) return $diagnose ? ISEMAIL_TLDNUMERIC : false; // TLD can't be all-numeric (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2)
417
418 // Check DNS?
419 if ($checkDNS && function_exists('checkdnsrr')) {
420 if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
421 return $diagnose ? ISEMAIL_DOMAINNOTFOUND : false; // Domain doesn't actually exist
422 }
423 }
424 }
425
426 // Eliminate all other factors, and the one which remains must be the truth.
427 // (Sherlock Holmes, The Sign of Four)
428 return $diagnose ? ISEMAIL_VALID : true;
429 }
430 ?>