Subversion Repositories javautils

Compare Revisions

No changes between revisions

Regard whitespace Rev 3 → Rev 4

/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/EMailSyntaxDiagnosis.java
0,0 → 1,7
package com.dominicsayers.isemail;
 
public enum EMailSyntaxDiagnosis {
 
ISEMAIL_VALID, ISEMAIL_TOOLONG, ISEMAIL_NOAT, ISEMAIL_NOLOCALPART, ISEMAIL_NODOMAIN, ISEMAIL_ZEROLENGTHELEMENT, ISEMAIL_BADCOMMENT_START, ISEMAIL_BADCOMMENT_END, ISEMAIL_UNESCAPEDDELIM, ISEMAIL_EMPTYELEMENT, ISEMAIL_UNESCAPEDSPECIAL, ISEMAIL_LOCALTOOLONG, ISEMAIL_IPV4BADPREFIX, ISEMAIL_IPV6BADPREFIXMIXED, ISEMAIL_IPV6BADPREFIX, ISEMAIL_IPV6GROUPCOUNT, ISEMAIL_IPV6DOUBLEDOUBLECOLON, ISEMAIL_IPV6BADCHAR, ISEMAIL_IPV6TOOMANYGROUPS, ISEMAIL_TLD, ISEMAIL_DOMAINEMPTYELEMENT, ISEMAIL_DOMAINELEMENTTOOLONG, ISEMAIL_DOMAINBADCHAR, ISEMAIL_DOMAINTOOLONG, ISEMAIL_TLDNUMERIC, ISEMAIL_DOMAINNOTFOUND/*, ISEMAIL_NOTDEFINED */
 
}
Property changes:
Added: svn:mime-type
+text/plain
\ No newline at end of property
/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/PHPFunctions.java
0,0 → 1,98
package com.dominicsayers.isemail;
 
import java.util.regex.Matcher;
import java.util.regex.Pattern;
 
/**
* IMPORTANT NOTE! These functions were developed during the translation process
* of the E-Mail-Address verification class for Dominic Sayers. These functions
* are NEITHER AN IDENTICAL NOR A OFFICIAL equivalence of PHP's functions. The
* functionality is only as much as needed by my initial purpose. Special cases
* are usually not implemented. Please also note that you have to use the JAVA
* REGULAR EXPRESSION syntax! PHP's PCRE IS NOT INTERPRETED OR CONVERTED!
*/
 
public class PHPFunctions {
 
public static int preg_match(String regex, String input) {
Matcher m = Pattern.compile(regex).matcher(input);
 
int c = 0;
while (m.find()) {
return 1; // preg_match() bricht bei erster Übereinstimmung ab.
}
return c;
}
 
public static String[] preg_match_to_array(String regex, String input) {
Matcher m = Pattern.compile(regex).matcher(input);
 
if (m.find()) {
String[] result = new String[m.groupCount() + 1];
for (int i = 0; i < result.length; i++) {
result[i] = m.group(i);
}
return result;
} else {
return new String[0];
}
}
 
public static String[] preg_split(String regex, String input) {
return input.split(regex, -1);
}
 
/**
* @returns [group#][match#]
*/
private static String[] appendToStringArray(String[] ary, String append) {
if (ary == null)
ary = new String[0];
String[] ary2 = new String[ary.length + 1];
 
for (int i = 0; i < ary.length; i++) {
ary2[i] = ary[i];
}
ary2[ary.length] = append;
 
return ary2;
}
 
public static String[][] preg_match_all(String regex, String input) {
Matcher m = Pattern.compile(regex).matcher(input);
 
if (m.find()) {
int j = -1;
 
String[][] result = new String[m.groupCount() + 1][];
do {
j++;
 
for (int i = 0; i < result.length; i++) {
result[i] = appendToStringArray(result[i], m.group(i));
}
} while (m.find());
 
return result;
} else {
return new String[0][0];
}
}
 
public static String substr(String input, int start) {
return input.substring(start);
}
 
public static String substr(String input, int start, int length) {
return input.substring(start, start + length);
}
 
public static String preg_replace(String pattern, String replacement,
String subject) {
return subject.replaceAll(pattern, replacement);
}
 
private PHPFunctions() {
}
 
}
Property changes:
Added: svn:mime-type
+text/plain
\ No newline at end of property
/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/IsEMail.java
0,0 → 1,648
package com.dominicsayers.isemail;
 
import javax.naming.NamingException;
 
/**
* @package isemail
* @author Dominic Sayers <dominic_sayers@hotmail.com>; Translated from PHP into
* Java by Daniel Marschall [www.daniel-marschall.de]
* @copyright 2010 Dominic Sayers
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @link http://www.dominicsayers.com/isemail
* @version 1.17 - Upper length limit corrected to 254 characters;
* Java-Translation 2010-06-13
*/
 
/*
* Copyright (c) 2008-2010, Dominic Sayers All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of Dominic Sayers nor the names of
* its contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
 
public class IsEMail {
 
/**
* Checks the syntax of an email address without DNS check.
*
* @param email
* The email address to be checked.
* @return True if the email address is valid.
*/
public static boolean is_email(String email) {
return (is_email_diagnosis(email, false) == EMailSyntaxDiagnosis.ISEMAIL_VALID);
}
 
/**
* Checks the syntax of an email address.
*
* @param email
* The email address to be checked.
* @param checkDNS
* Whether a DNS check should be performed or not.
* @return True if the email address is valid.
*/
public static boolean is_email(String email, boolean checkDNS) {
return (is_email_diagnosis(email, checkDNS) == EMailSyntaxDiagnosis.ISEMAIL_VALID);
}
 
/**
* Checks the syntax of an email address with diagnosis and without DNS
* check.
*
* @param email
* The email address to be checked.
* @return A diagnosis of the email syntax.
*/
public static EMailSyntaxDiagnosis is_email_diagnosis(String email) {
return is_email_diagnosis(email, false);
}
 
/**
* Checks the syntax of an email address with diagnosis.
*
* @param email
* The email address to be checked.
* @param checkDNS
* Whether a DNS check should be performed or not.
* @return A diagnosis of the email syntax.
*/
public static EMailSyntaxDiagnosis is_email_diagnosis(String email,
boolean checkDNS) {
 
if (email == null)
email = "";
 
// Check that 'email' is a valid address. Read the following RFCs to
// understand the constraints:
// (http://tools.ietf.org/html/rfc5322)
// (http://tools.ietf.org/html/rfc3696)
// (http://tools.ietf.org/html/rfc5321)
// (http://tools.ietf.org/html/rfc4291#section-2.2)
// (http://tools.ietf.org/html/rfc1123#section-2.1)
 
// the upper limit on address lengths should normally be considered to
// be 254
// (http://www.rfc-editor.org/errata_search.php?rfc=3696)
// NB My erratum has now been verified by the IETF so the correct answer
// is 254
//
// The maximum total length of a reverse-path or forward-path is 256
// characters (including the punctuation and element separators)
// (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
// NB There is a mandatory 2-character wrapper round the actual address
int emailLength = email.length();
// revision 1.17: Max length reduced to 254 (see above)
if (emailLength > 254) {
return EMailSyntaxDiagnosis.ISEMAIL_TOOLONG; // Too long
}
 
// Contemporary email addresses consist of a "local part" separated from
// a "domain part" (a fully-qualified domain name) by an at-sign ("@").
// (http://tools.ietf.org/html/rfc3696#section-3)
int atIndex = email.lastIndexOf('@');
 
if (atIndex == -1) {
return EMailSyntaxDiagnosis.ISEMAIL_NOAT; // No at-sign
}
if (atIndex == 0) {
return EMailSyntaxDiagnosis.ISEMAIL_NOLOCALPART; // No local part
}
if (atIndex == emailLength - 1) {
// No domain part
return EMailSyntaxDiagnosis.ISEMAIL_NODOMAIN;
// revision 1.14: Length test bug suggested by Andrew Campbell of
// Gloucester, MA
}
 
// Sanitize comments
// - remove nested comments, quotes and dots in comments
// - remove parentheses and dots from quoted strings
int braceDepth = 0;
boolean inQuote = false;
boolean escapeThisChar = false;
 
for (int i = 0; i < emailLength; ++i) {
char charX = email.charAt(i);
boolean replaceChar = false;
 
if (charX == '\\') {
escapeThisChar = !escapeThisChar; // Escape the next character?
} else {
switch (charX) {
case '(':
if (escapeThisChar) {
replaceChar = true;
} else {
if (inQuote) {
replaceChar = true;
} else {
if (braceDepth++ > 0) {
replaceChar = true; // Increment brace depth
}
}
}
 
break;
case ')':
if (escapeThisChar) {
replaceChar = true;
} else {
if (inQuote) {
replaceChar = true;
} else {
if (--braceDepth > 0)
replaceChar = true; // Decrement brace depth
if (braceDepth < 0) {
braceDepth = 0;
}
}
}
 
break;
case '"':
if (escapeThisChar) {
replaceChar = true;
} else {
if (braceDepth == 0) {
// Are we inside a quoted string?
inQuote = !inQuote;
} else {
replaceChar = true;
}
}
 
break;
case '.': // Dots don't help us either
if (escapeThisChar) {
replaceChar = true;
} else {
if (braceDepth > 0)
replaceChar = true;
}
 
break;
default:
}
 
escapeThisChar = false;
if (replaceChar) {
// Replace the offending character with something harmless
// revision 1.12: Line above replaced because PHPLint
// doesn't like that syntax
email = replaceCharAt(email, i, 'x');
}
 
}
}
 
String localPart = PHPFunctions.substr(email, 0, atIndex);
String domain = PHPFunctions.substr(email, atIndex + 1);
// Folding white space
final String FWS = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))";
// Let's check the local part for RFC compliance...
//
// local-part = dot-atom / quoted-string / obs-local-part
// obs-local-part = word *("." word)
// (http://tools.ietf.org/html/rfc5322#section-3.4.1)
//
// Problem: need to distinguish between "first.last" and "first"."last"
// (i.e. one element or two). And I suck at regexes.
 
String[] dotArray = PHPFunctions.preg_split(
"(?m)\\.(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*(?![^\\\"]*\\\"))",
localPart);
int partLength = 0;
 
for (String element : dotArray) {
// Remove any leading or trailing FWS
element = PHPFunctions.preg_replace("^" + FWS + "|" + FWS + "$",
"", element);
int elementLength = element.length();
 
if (elementLength == 0) {
// Can't have empty element (consecutive dots or
// dots at the start or end)
return EMailSyntaxDiagnosis.ISEMAIL_ZEROLENGTHELEMENT;
}
// revision 1.15: Speed up the test and get rid of
// "unitialized string offset" notices from PHP
 
// We need to remove any valid comments (i.e. those at the start or
// end of the element)
if (element.charAt(0) == '(') {
int indexBrace = element.indexOf(')');
if (indexBrace != -1) {
if (PHPFunctions.preg_match("(?<!\\\\)[\\(\\)]",
PHPFunctions.substr(element, 1, indexBrace - 1)) > 0) {
// Illegal characters in comment
return EMailSyntaxDiagnosis.ISEMAIL_BADCOMMENT_START;
}
element = PHPFunctions.substr(element, indexBrace + 1,
elementLength - indexBrace - 1);
elementLength = element.length();
}
}
 
if (element.charAt(elementLength - 1) == ')') {
int indexBrace = element.lastIndexOf('(');
if (indexBrace != -1) {
if (PHPFunctions.preg_match("(?<!\\\\)(?:[\\(\\)])",
PHPFunctions.substr(element, indexBrace + 1,
elementLength - indexBrace - 2)) > 0) {
// Illegal characters in comment
return EMailSyntaxDiagnosis.ISEMAIL_BADCOMMENT_END;
}
element = PHPFunctions.substr(element, 0, indexBrace);
elementLength = element.length();
}
}
 
// Remove any leading or trailing FWS around the element (inside any
// comments)
element = PHPFunctions.preg_replace("^" + FWS + "|" + FWS + "$",
"", element);
 
// What's left counts towards the maximum length for this part
if (partLength > 0)
partLength++; // for the dot
partLength += element.length();
 
// Each dot-delimited component can be an atom or a quoted string
// (because of the obs-local-part provision)
 
if (PHPFunctions.preg_match("(?s)^\"(?:.)*\"$", element) > 0) {
// Quoted-string tests:
//
// Remove any FWS
element = PHPFunctions.preg_replace("(?<!\\\\)" + FWS, "",
element);
// My regex skillz aren't up to distinguishing between \" \\"
// \\\" \\\\" etc.
// So remove all \\ from the string first...
element = PHPFunctions.preg_replace("\\\\\\\\", " ", element);
if (PHPFunctions
.preg_match(
"(?<!\\\\|^)[\"\\r\\n\\x00](?!$)|\\\\\"$|\"\"",
element) > 0) {
// ", CR, LF and NUL must be escaped, "" is too short
return EMailSyntaxDiagnosis.ISEMAIL_UNESCAPEDDELIM;
}
} else {
// Unquoted string tests:
//
// Period (".") may...appear, but may not be used to start or
// end the
// local part, nor may two or more consecutive periods appear.
// (http://tools.ietf.org/html/rfc3696#section-3)
//
// A zero-length element implies a period at the beginning or
// end of the
// local part, or two periods together. Either way it's not
// allowed.
if (element.equals("")) {
// Dots in wrong place
return EMailSyntaxDiagnosis.ISEMAIL_EMPTYELEMENT;
}
 
// Any ASCII graphic (printing) character other than the
// at-sign ("@"), backslash, double quote, comma, or square
// brackets may
// appear without quoting. If any of that list of excluded
// characters
// are to appear, they must be quoted
// (http://tools.ietf.org/html/rfc3696#section-3)
//
// Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :,
// ;, @, \, comma, period, "
if (PHPFunctions.preg_match(
"[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\.\"]", element) > 0) {
// These characters must be in a quoted string
return EMailSyntaxDiagnosis.ISEMAIL_UNESCAPEDSPECIAL;
}
}
}
 
if (partLength > 64) {
// Local part must be 64 characters or less
return EMailSyntaxDiagnosis.ISEMAIL_LOCALTOOLONG;
}
 
// Now let's check the domain part...
 
// The domain name can also be replaced by an IP address in square
// brackets
// (http://tools.ietf.org/html/rfc3696#section-3)
// (http://tools.ietf.org/html/rfc5321#section-4.1.3)
// (http://tools.ietf.org/html/rfc4291#section-2.2)
 
if (PHPFunctions.preg_match("^\\[(.)+]$", domain) == 1) {
// It's an address-literal
String addressLiteral = PHPFunctions.substr(domain, 1, domain
.length() - 2);
 
String IPv6;
int groupMax;
 
// Extract IPv4 part from the end of the address-literal (if there
// is one)
String[] matchesIP = PHPFunctions
.preg_match_to_array(
"\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$",
addressLiteral);
if (matchesIP.length > 0) {
int index = addressLiteral.lastIndexOf(matchesIP[0]);
 
if (index == 0) {
// Nothing there except a valid IPv4 address, so...
return EMailSyntaxDiagnosis.ISEMAIL_VALID;
} else {
// Assume it's an attempt at a mixed address (IPv6 + IPv4)
if (addressLiteral.charAt(index - 1) != ':') {
// Character preceding IPv4 address must be ':'
return EMailSyntaxDiagnosis.ISEMAIL_IPV4BADPREFIX;
}
if (!addressLiteral.startsWith("IPv6:")) {
// RFC5321 section 4.1.3
return EMailSyntaxDiagnosis.ISEMAIL_IPV6BADPREFIXMIXED;
}
 
IPv6 = PHPFunctions.substr(addressLiteral, 5,
(index == 7) ? 2 : index - 6);
groupMax = 6;
}
} else {
// It must be an attempt at pure IPv6
if (!addressLiteral.startsWith("IPv6:")) {
// RFC5321 section 4.1.3
return EMailSyntaxDiagnosis.ISEMAIL_IPV6BADPREFIX;
}
IPv6 = PHPFunctions.substr(addressLiteral, 5);
groupMax = 8;
}
 
String[][] matchesIP6 = PHPFunctions.preg_match_all(
"^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)", IPv6);
int groupCount = 0;
if (matchesIP6.length > 0) {
groupCount = matchesIP6[0].length;
} // else: Undefined state (should never be reached)
int index = IPv6.indexOf("::");
 
if (index == -1) {
// We need exactly the right number of groups
if (groupCount != groupMax) {
// RFC5321 section 4.1.3
return EMailSyntaxDiagnosis.ISEMAIL_IPV6GROUPCOUNT;
}
} else {
if (index != IPv6.lastIndexOf("::")) {
// More than one '::'
return EMailSyntaxDiagnosis.ISEMAIL_IPV6DOUBLEDOUBLECOLON;
}
groupMax = (index == 0 || index == (IPv6.length() - 2)) ? groupMax
: groupMax - 1;
if (groupCount > groupMax) {
// Too many IPv6 groups in address
return EMailSyntaxDiagnosis.ISEMAIL_IPV6TOOMANYGROUPS;
}
}
 
// Daniel Marschall: For the Java translation, I optimized
// the process. Instead of sorting the array (which needs
// null-pointer checks and array-length checks) and then
// checking element [0], I decided to directly check every
// element.
 
// Check for unmatched characters
// array_multisort(matchesIP6[1], SORT_DESC);
// if ($matchesIP6[1][0] !== '')) {
// return EMailResultState.ISEMAIL_IPV6BADCHAR;
// }
 
// Check for unmatched characters
if (matchesIP6.length > 1) {
for (String s : matchesIP6[1]) {
if ((s != null) && (!s.equals(""))) {
return EMailSyntaxDiagnosis.ISEMAIL_IPV6BADCHAR;
}
}
} // else: Undefined state (should never be reached)
 
// It's a valid IPv6 address, so...
return EMailSyntaxDiagnosis.ISEMAIL_VALID;
} else {
// It's a domain name...
 
// The syntax of a legal Internet host name was specified in RFC-952
// One aspect of host name syntax is hereby changed: the
// restriction on the first character is relaxed to allow either a
// letter or a digit.
// (http://tools.ietf.org/html/rfc1123#section-2.1)
//
// NB RFC 1123 updates RFC 1035, but this is not currently apparent
// from reading RFC 1035.
//
// Most common applications, including email and the Web, will
// generally not
// permit...escaped strings
// (http://tools.ietf.org/html/rfc3696#section-2)
//
// the better strategy has now become to make the
// "at least one period" test,
// to verify LDH conformance (including verification that the
// apparent TLD name
// is not all-numeric)
// (http://tools.ietf.org/html/rfc3696#section-2)
//
// Characters outside the set of alphabetic characters, digits, and
// hyphen MUST NOT appear in domain name
// labels for SMTP clients or servers
// (http://tools.ietf.org/html/rfc5321#section-4.1.2)
//
// RFC5321 precludes the use of a trailing dot in a domain name for
// SMTP purposes
// (http://tools.ietf.org/html/rfc5321#section-4.1.2)
 
dotArray = PHPFunctions.preg_split(
"(?m)\\.(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*(?![^\\\"]*\\\"))",
domain);
partLength = 0;
// Since we use 'element' after the foreach
// loop let's make sure it has a value
String lastElement = "";
// revision 1.13: Line above added because PHPLint now checks for
// Definitely Assigned Variables
 
if (dotArray.length == 1) {
// Mail host can't be a TLD (cite? What about localhost?)
return EMailSyntaxDiagnosis.ISEMAIL_TLD;
}
 
for (String element : dotArray) {
lastElement = element;
// Remove any leading or trailing FWS
element = PHPFunctions.preg_replace(
"^" + FWS + "|" + FWS + "$", "", element);
int elementLength = element.length();
 
// Each dot-delimited component must be of type atext
// A zero-length element implies a period at the beginning or
// end of the
// local part, or two periods together. Either way it's not
// allowed.
if (elementLength == 0) {
// Dots in wrong place
return EMailSyntaxDiagnosis.ISEMAIL_DOMAINEMPTYELEMENT;
}
// revision 1.15: Speed up the test and get rid of
// "unitialized string offset" notices from PHP
 
// Then we need to remove all valid comments (i.e. those at the
// start or end of the element
if (element.charAt(0) == '(') {
int indexBrace = element.indexOf(')');
if (indexBrace != -1) {
if (PHPFunctions
.preg_match("(?<!\\\\)[\\(\\)]", PHPFunctions
.substr(element, 1, indexBrace - 1)) > 0) {
// revision 1.17: Fixed name of constant (also
// spotted by turboflash - thanks!)
// Illegal characters in comment
return EMailSyntaxDiagnosis.ISEMAIL_BADCOMMENT_START;
}
element = PHPFunctions.substr(element, indexBrace + 1,
elementLength - indexBrace - 1);
elementLength = element.length();
}
}
 
if (element.charAt(elementLength - 1) == ')') {
int indexBrace = element.lastIndexOf('(');
if (indexBrace != -1) {
if (PHPFunctions.preg_match("(?<!\\\\)(?:[\\(\\)])",
PHPFunctions.substr(element, indexBrace + 1,
elementLength - indexBrace - 2)) > 0) {
// revision 1.17: Fixed name of constant (also
// spotted by turboflash - thanks!)
// Illegal characters in comment
return EMailSyntaxDiagnosis.ISEMAIL_BADCOMMENT_END;
}
 
element = PHPFunctions.substr(element, 0, indexBrace);
elementLength = element.length();
}
}
 
// Remove any leading or trailing FWS around the element (inside
// any comments)
element = PHPFunctions.preg_replace(
"^" + FWS + "|" + FWS + "$", "", element);
 
// What's left counts towards the maximum length for this part
if (partLength > 0)
partLength++; // for the dot
partLength += element.length();
 
// The DNS defines domain name syntax very generally -- a
// string of labels each containing up to 63 8-bit octets,
// separated by dots, and with a maximum total of 255
// octets.
// (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
if (elementLength > 63) {
// Label must be 63 characters or less
return EMailSyntaxDiagnosis.ISEMAIL_DOMAINELEMENTTOOLONG;
}
 
// Any ASCII graphic (printing) character other than the
// at-sign ("@"), backslash, double quote, comma, or square
// brackets may
// appear without quoting. If any of that list of excluded
// characters
// are to appear, they must be quoted
// (http://tools.ietf.org/html/rfc3696#section-3)
//
// If the hyphen is used, it is not permitted to appear at
// either the beginning or end of a label.
// (http://tools.ietf.org/html/rfc3696#section-2)
//
// Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :,
// ;, @, \, comma, period, "
 
if (PHPFunctions.preg_match(
"[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\.\"]|^-|-$",
element) > 0) {
return EMailSyntaxDiagnosis.ISEMAIL_DOMAINBADCHAR;
}
}
 
if (partLength > 255) {
// Domain part must be 255 characters or less
// (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
return EMailSyntaxDiagnosis.ISEMAIL_DOMAINTOOLONG;
}
 
if (PHPFunctions.preg_match("^[0-9]+$", lastElement) > 0) {
// TLD can't be all-numeric
// (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2)
return EMailSyntaxDiagnosis.ISEMAIL_TLDNUMERIC;
}
 
// Check DNS?
if (checkDNS) {
try {
if (!((DNSLookup.doLookup(domain, DNSType.A) > 0) || (DNSLookup
.doLookup(domain, DNSType.MX) > 0))) {
// Domain doesn't actually exist
return EMailSyntaxDiagnosis.ISEMAIL_DOMAINNOTFOUND;
}
} catch (NamingException e) {
return EMailSyntaxDiagnosis.ISEMAIL_DOMAINNOTFOUND;
}
}
}
 
// Eliminate all other factors, and the one which remains must be the
// truth. (Sherlock Holmes, The Sign of Four)
return EMailSyntaxDiagnosis.ISEMAIL_VALID;
}
 
/**
* Replaces a char in a String
*
* @param s
* The input string
* @param pos
* The position of the char to be replaced
* @param c
* The new char
* @return The new String
* @see http://www.rgagnon.com/javadetails/java-0030.html
*/
public static String replaceCharAt(String s, int pos, char c) {
return s.substring(0, pos) + c + s.substring(pos + 1);
}
 
private IsEMail() {
}
}
Property changes:
Added: svn:mime-type
+text/plain
\ No newline at end of property
/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/DNSType.java
0,0 → 1,7
package com.dominicsayers.isemail;
 
public enum DNSType {
A, MX, NS, SOA, PTR, CNAME, AAAA, A6, SRV, NAPTR, TXT, ANY;
 
}
Property changes:
Added: svn:mime-type
+text/plain
\ No newline at end of property
/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/is_email.php
0,0 → 1,426
<?php
/**
* @package isemail
* @author Dominic Sayers <dominic_sayers@hotmail.com>
* @copyright 2010 Dominic Sayers
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @link http://www.dominicsayers.com/isemail
* @version 1.17 - Upper length limit corrected to 254 characters
*/
 
/*
Copyright (c) 2008-2010, Dominic Sayers
All rights reserved.
 
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
 
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Dominic Sayers nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
 
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
 
/*.
require_module 'standard';
require_module 'pcre';
.*/
/*.mixed.*/ function is_email (/*.string.*/ $email, $checkDNS = false, $diagnose = false) {
// Check that $email is a valid address. Read the following RFCs to understand the constraints:
// (http://tools.ietf.org/html/rfc5322)
// (http://tools.ietf.org/html/rfc3696)
// (http://tools.ietf.org/html/rfc5321)
// (http://tools.ietf.org/html/rfc4291#section-2.2)
// (http://tools.ietf.org/html/rfc1123#section-2.1)
 
if (!defined('ISEMAIL_VALID')) {
define('ISEMAIL_VALID' , 0);
define('ISEMAIL_TOOLONG' , 1);
define('ISEMAIL_NOAT' , 2);
define('ISEMAIL_NOLOCALPART' , 3);
define('ISEMAIL_NODOMAIN' , 4);
define('ISEMAIL_ZEROLENGTHELEMENT' , 5);
define('ISEMAIL_BADCOMMENT_START' , 6);
define('ISEMAIL_BADCOMMENT_END' , 7);
define('ISEMAIL_UNESCAPEDDELIM' , 8);
define('ISEMAIL_EMPTYELEMENT' , 9);
define('ISEMAIL_UNESCAPEDSPECIAL' , 10);
define('ISEMAIL_LOCALTOOLONG' , 11);
define('ISEMAIL_IPV4BADPREFIX' , 12);
define('ISEMAIL_IPV6BADPREFIXMIXED' , 13);
define('ISEMAIL_IPV6BADPREFIX' , 14);
define('ISEMAIL_IPV6GROUPCOUNT' , 15);
define('ISEMAIL_IPV6DOUBLEDOUBLECOLON' , 16);
define('ISEMAIL_IPV6BADCHAR' , 17);
define('ISEMAIL_IPV6TOOMANYGROUPS' , 18);
define('ISEMAIL_TLD' , 19);
define('ISEMAIL_DOMAINEMPTYELEMENT' , 20);
define('ISEMAIL_DOMAINELEMENTTOOLONG' , 21);
define('ISEMAIL_DOMAINBADCHAR' , 22);
define('ISEMAIL_DOMAINTOOLONG' , 23);
define('ISEMAIL_TLDNUMERIC' , 24);
define('ISEMAIL_DOMAINNOTFOUND' , 25);
define('ISEMAIL_NOTDEFINED' , 99);
}
 
// the upper limit on address lengths should normally be considered to be 254
// (http://www.rfc-editor.org/errata_search.php?rfc=3696)
// NB My erratum has now been verified by the IETF so the correct answer is 254
//
// The maximum total length of a reverse-path or forward-path is 256
// characters (including the punctuation and element separators)
// (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3)
// NB There is a mandatory 2-character wrapper round the actual address
$emailLength = strlen($email);
// revision 1.17: Max length reduced to 254 (see above)
if ($emailLength > 254) return $diagnose ? ISEMAIL_TOOLONG : false; // Too long
 
// Contemporary email addresses consist of a "local part" separated from
// a "domain part" (a fully-qualified domain name) by an at-sign ("@").
// (http://tools.ietf.org/html/rfc3696#section-3)
$atIndex = strrpos($email,'@');
 
if ($atIndex === false) return $diagnose ? ISEMAIL_NOAT : false; // No at-sign
if ($atIndex === 0) return $diagnose ? ISEMAIL_NOLOCALPART : false; // No local part
if ($atIndex === $emailLength - 1) return $diagnose ? ISEMAIL_NODOMAIN : false; // No domain part
// revision 1.14: Length test bug suggested by Andrew Campbell of Gloucester, MA
// Sanitize comments
// - remove nested comments, quotes and dots in comments
// - remove parentheses and dots from quoted strings
$braceDepth = 0;
$inQuote = false;
$escapeThisChar = false;
 
for ($i = 0; $i < $emailLength; ++$i) {
$char = $email[$i];
$replaceChar = false;
 
if ($char === '\\') {
$escapeThisChar = !$escapeThisChar; // Escape the next character?
} else {
switch ($char) {
case '(':
if ($escapeThisChar) {
$replaceChar = true;
} else {
if ($inQuote) {
$replaceChar = true;
} else {
if ($braceDepth++ > 0) $replaceChar = true; // Increment brace depth
}
}
 
break;
case ')':
if ($escapeThisChar) {
$replaceChar = true;
} else {
if ($inQuote) {
$replaceChar = true;
} else {
if (--$braceDepth > 0) $replaceChar = true; // Decrement brace depth
if ($braceDepth < 0) $braceDepth = 0;
}
}
 
break;
case '"':
if ($escapeThisChar) {
$replaceChar = true;
} else {
if ($braceDepth === 0) {
$inQuote = !$inQuote; // Are we inside a quoted string?
} else {
$replaceChar = true;
}
}
 
break;
case '.': // Dots don't help us either
if ($escapeThisChar) {
$replaceChar = true;
} else {
if ($braceDepth > 0) $replaceChar = true;
}
 
break;
default:
}
 
$escapeThisChar = false;
// if ($replaceChar) $email[$i] = 'x'; // Replace the offending character with something harmless
// revision 1.12: Line above replaced because PHPLint doesn't like that syntax
if ($replaceChar) $email = (string) substr_replace($email, 'x', $i, 1); // Replace the offending character with something harmless
}
}
 
$localPart = substr($email, 0, $atIndex);
$domain = substr($email, $atIndex + 1);
$FWS = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))"; // Folding white space
// Let's check the local part for RFC compliance...
//
// local-part = dot-atom / quoted-string / obs-local-part
// obs-local-part = word *("." word)
// (http://tools.ietf.org/html/rfc5322#section-3.4.1)
//
// Problem: need to distinguish between "first.last" and "first"."last"
// (i.e. one element or two). And I suck at regexes.
$dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $localPart);
$partLength = 0;
 
foreach ($dotArray as $element) {
// Remove any leading or trailing FWS
$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
$elementLength = strlen($element);
 
if ($elementLength === 0) return $diagnose ? ISEMAIL_ZEROLENGTHELEMENT : false; // Can't have empty element (consecutive dots or dots at the start or end)
// revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP
 
// We need to remove any valid comments (i.e. those at the start or end of the element)
if ($element[0] === '(') {
$indexBrace = strpos($element, ')');
if ($indexBrace !== false) {
if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
return $diagnose ? ISEMAIL_BADCOMMENT_START : false; // Illegal characters in comment
}
$element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
$elementLength = strlen($element);
}
}
if ($element[$elementLength - 1] === ')') {
$indexBrace = strrpos($element, '(');
if ($indexBrace !== false) {
if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0) {
return $diagnose ? ISEMAIL_BADCOMMENT_END : false; // Illegal characters in comment
}
$element = substr($element, 0, $indexBrace);
$elementLength = strlen($element);
}
}
 
// Remove any leading or trailing FWS around the element (inside any comments)
$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
 
// What's left counts towards the maximum length for this part
if ($partLength > 0) $partLength++; // for the dot
$partLength += strlen($element);
 
// Each dot-delimited component can be an atom or a quoted string
// (because of the obs-local-part provision)
if (preg_match('/^"(?:.)*"$/s', $element) > 0) {
// Quoted-string tests:
//
// Remove any FWS
$element = preg_replace("/(?<!\\\\)$FWS/", '', $element);
// My regex skillz aren't up to distinguishing between \" \\" \\\" \\\\" etc.
// So remove all \\ from the string first...
$element = preg_replace('/\\\\\\\\/', ' ', $element);
if (preg_match('/(?<!\\\\|^)["\\r\\n\\x00](?!$)|\\\\"$|""/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDDELIM : false; // ", CR, LF and NUL must be escaped, "" is too short
} else {
// Unquoted string tests:
//
// Period (".") may...appear, but may not be used to start or end the
// local part, nor may two or more consecutive periods appear.
// (http://tools.ietf.org/html/rfc3696#section-3)
//
// A zero-length element implies a period at the beginning or end of the
// local part, or two periods together. Either way it's not allowed.
if ($element === '') return $diagnose ? ISEMAIL_EMPTYELEMENT : false; // Dots in wrong place
 
// Any ASCII graphic (printing) character other than the
// at-sign ("@"), backslash, double quote, comma, or square brackets may
// appear without quoting. If any of that list of excluded characters
// are to appear, they must be quoted
// (http://tools.ietf.org/html/rfc3696#section-3)
//
// Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]/', $element) > 0) return $diagnose ? ISEMAIL_UNESCAPEDSPECIAL : false; // These characters must be in a quoted string
}
}
 
if ($partLength > 64) return $diagnose ? ISEMAIL_LOCALTOOLONG : false; // Local part must be 64 characters or less
 
// Now let's check the domain part...
 
// The domain name can also be replaced by an IP address in square brackets
// (http://tools.ietf.org/html/rfc3696#section-3)
// (http://tools.ietf.org/html/rfc5321#section-4.1.3)
// (http://tools.ietf.org/html/rfc4291#section-2.2)
if (preg_match('/^\\[(.)+]$/', $domain) === 1) {
// It's an address-literal
$addressLiteral = substr($domain, 1, strlen($domain) - 2);
$matchesIP = array();
// Extract IPv4 part from the end of the address-literal (if there is one)
if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressLiteral, $matchesIP) > 0) {
$index = strrpos($addressLiteral, $matchesIP[0]);
if ($index === 0) {
// Nothing there except a valid IPv4 address, so...
return $diagnose ? ISEMAIL_VALID : true;
} else {
// Assume it's an attempt at a mixed address (IPv6 + IPv4)
if ($addressLiteral[$index - 1] !== ':') return $diagnose ? ISEMAIL_IPV4BADPREFIX : false; // Character preceding IPv4 address must be ':'
if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIXMIXED : false; // RFC5321 section 4.1.3
 
$IPv6 = substr($addressLiteral, 5, ($index ===7) ? 2 : $index - 6);
$groupMax = 6;
}
} else {
// It must be an attempt at pure IPv6
if (substr($addressLiteral, 0, 5) !== 'IPv6:') return $diagnose ? ISEMAIL_IPV6BADPREFIX : false; // RFC5321 section 4.1.3
$IPv6 = substr($addressLiteral, 5);
$groupMax = 8;
}
 
$groupCount = preg_match_all('/^[0-9a-fA-F]{0,4}|\\:[0-9a-fA-F]{0,4}|(.)/', $IPv6, $matchesIP);
$index = strpos($IPv6,'::');
 
if ($index === false) {
// We need exactly the right number of groups
if ($groupCount !== $groupMax) return $diagnose ? ISEMAIL_IPV6GROUPCOUNT : false; // RFC5321 section 4.1.3
} else {
if ($index !== strrpos($IPv6,'::')) return $diagnose ? ISEMAIL_IPV6DOUBLEDOUBLECOLON : false; // More than one '::'
$groupMax = ($index === 0 || $index === (strlen($IPv6) - 2)) ? $groupMax : $groupMax - 1;
if ($groupCount > $groupMax) return $diagnose ? ISEMAIL_IPV6TOOMANYGROUPS : false; // Too many IPv6 groups in address
}
 
// Check for unmatched characters
array_multisort($matchesIP[1], SORT_DESC);
if ($matchesIP[1][0] !== '') return $diagnose ? ISEMAIL_IPV6BADCHAR : false; // Illegal characters in address
 
// It's a valid IPv6 address, so...
return $diagnose ? ISEMAIL_VALID : true;
} else {
// It's a domain name...
 
// The syntax of a legal Internet host name was specified in RFC-952
// One aspect of host name syntax is hereby changed: the
// restriction on the first character is relaxed to allow either a
// letter or a digit.
// (http://tools.ietf.org/html/rfc1123#section-2.1)
//
// NB RFC 1123 updates RFC 1035, but this is not currently apparent from reading RFC 1035.
//
// Most common applications, including email and the Web, will generally not
// permit...escaped strings
// (http://tools.ietf.org/html/rfc3696#section-2)
//
// the better strategy has now become to make the "at least one period" test,
// to verify LDH conformance (including verification that the apparent TLD name
// is not all-numeric)
// (http://tools.ietf.org/html/rfc3696#section-2)
//
// Characters outside the set of alphabetic characters, digits, and hyphen MUST NOT appear in domain name
// labels for SMTP clients or servers
// (http://tools.ietf.org/html/rfc5321#section-4.1.2)
//
// RFC5321 precludes the use of a trailing dot in a domain name for SMTP purposes
// (http://tools.ietf.org/html/rfc5321#section-4.1.2)
$dotArray = /*. (array[int]string) .*/ preg_split('/\\.(?=(?:[^\\"]*\\"[^\\"]*\\")*(?![^\\"]*\\"))/m', $domain);
$partLength = 0;
$element = ''; // Since we use $element after the foreach loop let's make sure it has a value
// revision 1.13: Line above added because PHPLint now checks for Definitely Assigned Variables
 
if (count($dotArray) === 1) return $diagnose ? ISEMAIL_TLD : false; // Mail host can't be a TLD (cite? What about localhost?)
 
foreach ($dotArray as $element) {
// Remove any leading or trailing FWS
$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
$elementLength = strlen($element);
// Each dot-delimited component must be of type atext
// A zero-length element implies a period at the beginning or end of the
// local part, or two periods together. Either way it's not allowed.
if ($elementLength === 0) return $diagnose ? ISEMAIL_DOMAINEMPTYELEMENT : false; // Dots in wrong place
// revision 1.15: Speed up the test and get rid of "unitialized string offset" notices from PHP
// Then we need to remove all valid comments (i.e. those at the start or end of the element
if ($element[0] === '(') {
$indexBrace = strpos($element, ')');
if ($indexBrace !== false) {
if (preg_match('/(?<!\\\\)[\\(\\)]/', substr($element, 1, $indexBrace - 1)) > 0) {
// revision 1.17: Fixed name of constant (also spotted by turboflash - thanks!)
return $diagnose ? ISEMAIL_BADCOMMENT_START : false; // Illegal characters in comment
}
$element = substr($element, $indexBrace + 1, $elementLength - $indexBrace - 1);
$elementLength = strlen($element);
}
}
if ($element[$elementLength - 1] === ')') {
$indexBrace = strrpos($element, '(');
if ($indexBrace !== false) {
if (preg_match('/(?<!\\\\)(?:[\\(\\)])/', substr($element, $indexBrace + 1, $elementLength - $indexBrace - 2)) > 0)
// revision 1.17: Fixed name of constant (also spotted by turboflash - thanks!)
return $diagnose ? ISEMAIL_BADCOMMENT_END : false; // Illegal characters in comment
 
$element = substr($element, 0, $indexBrace);
$elementLength = strlen($element);
}
}
// Remove any leading or trailing FWS around the element (inside any comments)
$element = preg_replace("/^$FWS|$FWS\$/", '', $element);
// What's left counts towards the maximum length for this part
if ($partLength > 0) $partLength++; // for the dot
$partLength += strlen($element);
// The DNS defines domain name syntax very generally -- a
// string of labels each containing up to 63 8-bit octets,
// separated by dots, and with a maximum total of 255
// octets.
// (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
if ($elementLength > 63) return $diagnose ? ISEMAIL_DOMAINELEMENTTOOLONG : false; // Label must be 63 characters or less
// Any ASCII graphic (printing) character other than the
// at-sign ("@"), backslash, double quote, comma, or square brackets may
// appear without quoting. If any of that list of excluded characters
// are to appear, they must be quoted
// (http://tools.ietf.org/html/rfc3696#section-3)
//
// If the hyphen is used, it is not permitted to appear at
// either the beginning or end of a label.
// (http://tools.ietf.org/html/rfc3696#section-2)
//
// Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, comma, period, "
if (preg_match('/[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\."]|^-|-$/', $element) > 0) {
return $diagnose ? ISEMAIL_DOMAINBADCHAR : false;
}
}
 
if ($partLength > 255) return $diagnose ? ISEMAIL_DOMAINTOOLONG : false; // Domain part must be 255 characters or less (http://tools.ietf.org/html/rfc1123#section-6.1.3.5)
 
if (preg_match('/^[0-9]+$/', $element) > 0) return $diagnose ? ISEMAIL_TLDNUMERIC : false; // TLD can't be all-numeric (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2)
 
// Check DNS?
if ($checkDNS && function_exists('checkdnsrr')) {
if (!(checkdnsrr($domain, 'A') || checkdnsrr($domain, 'MX'))) {
return $diagnose ? ISEMAIL_DOMAINNOTFOUND : false; // Domain doesn't actually exist
}
}
}
 
// Eliminate all other factors, and the one which remains must be the truth.
// (Sherlock Holmes, The Sign of Four)
return $diagnose ? ISEMAIL_VALID : true;
}
?>
/ViaThinkSoft Java Utils/src/com/dominicsayers/isemail/DNSLookup.java
0,0 → 1,28
package com.dominicsayers.isemail;
 
import java.util.Hashtable;
import javax.naming.*;
import javax.naming.directory.*;
 
// Source: http://www.rgagnon.com/javadetails/java-0452.html
// Modified
 
public class DNSLookup {
public static int doLookup(String hostName, DNSType type)
throws NamingException {
Hashtable<String, String> env = new Hashtable<String, String>();
env.put("java.naming.factory.initial",
"com.sun.jndi.dns.DnsContextFactory");
DirContext ictx = new InitialDirContext(env);
Attributes attrs = ictx.getAttributes(hostName, new String[] { type
.toString() });
Attribute attr = attrs.get(type.toString());
if (attr == null) {
return 0;
}
return attr.size();
}
 
private DNSLookup() {
}
}
Property changes:
Added: svn:mime-type
+text/plain
\ No newline at end of property