Rev 3 | Rev 17 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3 | daniel-mar | 1 | package de.viathinksoft.utils.mail.syntaxchecker; |
2 | |||
3 | import java.util.Arrays; |
||
4 | import java.util.HashSet; |
||
5 | import java.util.regex.Pattern; |
||
6 | |||
7 | import de.viathinksoft.utils.mail.EMailAddress; |
||
8 | |||
9 | /** |
||
9 | daniel-mar | 10 | * This class is not stable. For a good syntax check, please use the classes of |
11 | * Dominic Sayers or Cal Henderson. |
||
3 | daniel-mar | 12 | * |
13 | * @author Daniel Marschall |
||
9 | daniel-mar | 14 | * @version 0.1 |
3 | daniel-mar | 15 | * |
16 | */ |
||
17 | public class MailSyntaxChecker { |
||
9 | daniel-mar | 18 | |
3 | daniel-mar | 19 | private static final String REGEX_IP = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b"; |
20 | |||
21 | // Führt eine Prüfung der E-Mail-Adresse gemäß SMTP-Spezifikation RFC 5321 |
||
22 | // aus |
||
23 | private static final boolean CHECK_SMTP_SIZE_LIMITS = false; |
||
24 | |||
25 | // Führt eine Prüfung der TLD gemäß IANA-Daten aus |
||
26 | private static final boolean CHECK_TLD_RECOGNIZED = true; |
||
27 | |||
28 | // Führt eine DNS-Prüfung durch |
||
29 | private static final boolean CHECK_DNS = true; |
||
30 | |||
31 | // http://data.iana.org/TLD/tlds-alpha-by-domain.txt |
||
32 | // Version 2010052500, Last Updated Tue May 25 14:07:02 2010 UTC |
||
33 | private static final HashSet<String> RECOGNIZED_TLDS_PUNYCODE = hmaker(new String[] { |
||
34 | "AC", "AD", "AE", "AERO", "AF", "AG", "AI", "AL", "AM", "AN", "AO", |
||
35 | "AQ", "AR", "ARPA", "AS", "ASIA", "AT", "AU", "AW", "AX", "AZ", |
||
36 | "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BIZ", "BJ", "BM", |
||
37 | "BN", "BO", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CAT", |
||
38 | "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", |
||
39 | "COM", "COOP", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", |
||
40 | "DK", "DM", "DO", "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", |
||
41 | "EU", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", |
||
42 | "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR", |
||
43 | "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", |
||
44 | "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ", "IR", |
||
45 | "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG", "KH", "KI", |
||
46 | "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", |
||
47 | "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME", |
||
48 | "MG", "MH", "MIL", "MK", "ML", "MM", "MN", "MO", "MOBI", "MP", |
||
49 | "MQ", "MR", "MS", "MT", "MU", "MUSEUM", "MV", "MW", "MX", "MY", |
||
50 | "MZ", "NA", "NAME", "NC", "NE", "NET", "NF", "NG", "NI", "NL", |
||
51 | "NO", "NP", "NR", "NU", "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", |
||
52 | "PH", "PK", "PL", "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", |
||
53 | "QA", "RE", "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", |
||
54 | "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", |
||
55 | "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH", "TJ", |
||
56 | "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT", "TV", |
||
57 | "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA", "VC", "VE", |
||
58 | "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D", |
||
59 | "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A", |
||
60 | "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA", |
||
61 | "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV", |
||
62 | "XN--MGBAAM7A8H", "XN--MGBERP4A5D4AR", "XN--P1AI", "XN--WGBH1C", |
||
63 | "XN--ZCKZAH", "YE", "YT", "ZA", "ZM", "ZW", }); |
||
64 | |||
65 | private static boolean checkSmtpSizeLimits(EMailAddress email) { |
||
66 | // RFC 5321: 4.5.3.1.1. Local-part Längenbegrenzung bei SMTP: 64 |
||
67 | // Byte |
||
68 | // QUE: Soll das auch als Punicode-Variante geprüft werden? |
||
69 | if ((email.getLocalPart().length() > 64) |
||
70 | || (email.getLocalPart().length() < 1)) { |
||
71 | return false; |
||
72 | } |
||
73 | |||
74 | // RFC 5321: 4.5.3.1.2. Domain-part Längenbegrenzung bei SMTP: 255 |
||
75 | // Byte |
||
76 | if ((email.getDomainPartPunycode().length() > 255) |
||
77 | || (email.getDomainPartPunycode().length() < 1)) { |
||
78 | return false; |
||
79 | } |
||
80 | |||
81 | // RFC 5321: 4.5.3.1.5. Reply-Line Längenbegrenzung bei SMTP: 512 |
||
82 | // Byte. Laut |
||
83 | // http://de.wikipedia.org/wiki/E-Mail-Adresse#L.C3.A4nge_der_E-Mail-Adresse |
||
84 | // folgt daraus: Länge der MailAddresse ist 254 Bytes. |
||
85 | if (email.getMailAddressPunycodedDomain().length() > 254) { |
||
86 | return false; |
||
87 | } |
||
88 | |||
89 | return true; |
||
90 | } |
||
91 | |||
92 | private static boolean checkTldRecognized(EMailAddress email) { |
||
93 | // TODO: Mailadressen sind aber auch als ...@[IP] gültig. Dann keine |
||
94 | // TLD! |
||
95 | return RECOGNIZED_TLDS_PUNYCODE.contains(email.getTldPunycode() |
||
96 | .toUpperCase()); |
||
97 | } |
||
98 | |||
99 | private static boolean preg_match(String regex, String data) { |
||
100 | return Pattern.compile(regex).matcher(data).matches(); |
||
101 | } |
||
9 | daniel-mar | 102 | |
3 | daniel-mar | 103 | private static boolean checkDns(String domainOrIP) { |
104 | // TODO |
||
9 | daniel-mar | 105 | |
3 | daniel-mar | 106 | return true; |
107 | } |
||
108 | |||
9 | daniel-mar | 109 | public static boolean isMailValid(String email) { |
3 | daniel-mar | 110 | return isMailValid(new EMailAddress(email)); |
111 | } |
||
112 | |||
113 | /** |
||
114 | * Checks if an E-Mail-Address is valid |
||
115 | * |
||
116 | * @param email |
||
117 | * @return |
||
118 | */ |
||
119 | public static boolean isMailValid(EMailAddress email) { |
||
120 | if (CHECK_SMTP_SIZE_LIMITS) { |
||
121 | if (!checkSmtpSizeLimits(email)) |
||
122 | return false; |
||
123 | } |
||
124 | |||
125 | // Begin RFC-Checks |
||
126 | |||
127 | final String address = email.getMailAddressUnicode(); |
||
128 | final String localPart = email.getLocalPart(); |
||
129 | final String domainPart = email.getDomainPartPunycode(); |
||
130 | |||
131 | // Weder localPart noch domainPart dürfen zwei aufeinanderfolgende |
||
132 | // Punkte besitzen. |
||
133 | |||
134 | if (address.contains("..")) { |
||
135 | return false; |
||
136 | } |
||
137 | |||
138 | // localPart darf keine Punkte am Anfang oder Ende besitzen |
||
9 | daniel-mar | 139 | |
140 | if (localPart.length() == 0) { |
||
3 | daniel-mar | 141 | return false; |
142 | } |
||
9 | daniel-mar | 143 | if (localPart.startsWith(".") || localPart.endsWith(".")) { |
144 | return false; |
||
145 | } |
||
3 | daniel-mar | 146 | |
147 | // domainPart darf keine Punkte am Anfang oder Ende besitzen |
||
148 | |||
9 | daniel-mar | 149 | if (domainPart.startsWith(".") || domainPart.endsWith(".")) { |
3 | daniel-mar | 150 | return false; |
151 | } |
||
152 | |||
153 | // domainPart prüfen |
||
9 | daniel-mar | 154 | |
155 | if (preg_match("^" + REGEX_IP + "$", domainPart)) { |
||
3 | daniel-mar | 156 | // domainPart is <IP> |
157 | // QUE: Ist das überhaupt gemäß RFC gültig? |
||
9 | daniel-mar | 158 | |
3 | daniel-mar | 159 | String ip = ""; // TODO |
9 | daniel-mar | 160 | |
3 | daniel-mar | 161 | if (CHECK_DNS) { |
9 | daniel-mar | 162 | if (!checkDns(ip)) |
163 | return false; |
||
3 | daniel-mar | 164 | } |
9 | daniel-mar | 165 | } else if (preg_match("^\\[" + REGEX_IP + "\\]$", domainPart)) { |
3 | daniel-mar | 166 | // domainPart is [<IP>] |
9 | daniel-mar | 167 | |
3 | daniel-mar | 168 | String ip = ""; // TODO |
9 | daniel-mar | 169 | |
3 | daniel-mar | 170 | if (CHECK_DNS) { |
9 | daniel-mar | 171 | if (!checkDns(ip)) |
172 | return false; |
||
3 | daniel-mar | 173 | } |
174 | } else { |
||
175 | if (!preg_match("^[A-Za-z0-9\\-\\.]+$", domainPart)) { |
||
176 | return false; |
||
177 | } |
||
178 | |||
179 | if (CHECK_TLD_RECOGNIZED) { |
||
180 | if (!checkTldRecognized(email)) |
||
181 | return false; |
||
182 | } |
||
9 | daniel-mar | 183 | |
3 | daniel-mar | 184 | if (CHECK_DNS) { |
9 | daniel-mar | 185 | if (!checkDns(domainPart)) |
186 | return false; |
||
3 | daniel-mar | 187 | } |
188 | } |
||
9 | daniel-mar | 189 | |
3 | daniel-mar | 190 | // localPart prüfen |
9 | daniel-mar | 191 | |
3 | daniel-mar | 192 | if (!preg_match("^(\\\\.|[A-Za-z0-9!#%&`_=\\/$\'*+?^{}|~.-])+$", |
9 | daniel-mar | 193 | localPart.replaceAll("\\\\", "").replaceAll("@", ""))) { |
3 | daniel-mar | 194 | // character not valid in local part unless |
195 | // local part is quoted |
||
9 | daniel-mar | 196 | if (!preg_match("^\"(\\\\\"|[^\"])+\"$", localPart.replaceAll( |
197 | "\\\\", "").replaceAll("@", ""))) { |
||
3 | daniel-mar | 198 | return false; |
199 | } |
||
200 | } |
||
9 | daniel-mar | 201 | |
3 | daniel-mar | 202 | // TODO: Weitere Tests gemäß RFC? |
9 | daniel-mar | 203 | |
3 | daniel-mar | 204 | return true; |
205 | } |
||
206 | |||
207 | /** |
||
208 | * build a HashSet from a array of String literals. |
||
209 | * |
||
210 | * @param list |
||
211 | * array of strings |
||
212 | * |
||
213 | * @return HashSet you can use to test if a string is in the set. |
||
214 | */ |
||
215 | private static HashSet<String> hmaker(String[] list) { |
||
216 | HashSet<String> map = new HashSet<String>(Math.max( |
||
217 | (int) (list.length / .75f) + 1, 16)); |
||
218 | map.addAll(Arrays.asList(list)); |
||
219 | return map; |
||
220 | } |
||
221 | } |