Rev 9 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 9 | Rev 17 | ||
---|---|---|---|
1 | package de.viathinksoft.utils.mail.syntaxchecker; |
1 | package de.viathinksoft.utils.mail.syntaxchecker; |
2 | 2 | ||
3 | import java.util.Arrays; |
3 | import java.util.Arrays; |
4 | import java.util.HashSet; |
4 | import java.util.HashSet; |
5 | import java.util.regex.Pattern; |
5 | import java.util.regex.Pattern; |
6 | 6 | ||
7 | import de.viathinksoft.utils.mail.EMailAddress; |
7 | import de.viathinksoft.utils.mail.address.EMailAddress; |
8 | 8 | ||
9 | /** |
9 | /** |
10 | * This class is not stable. For a good syntax check, please use the classes of |
10 | * This class is not stable. For a good syntax check, please use the classes of |
11 | * Dominic Sayers or Cal Henderson. |
11 | * Dominic Sayers or Cal Henderson. |
12 | * |
12 | * |
13 | * @author Daniel Marschall |
13 | * @author Daniel Marschall |
14 | * @version 0.1 |
14 | * @version 0.1 |
15 | * |
15 | * |
16 | */ |
16 | */ |
17 | public class MailSyntaxChecker { |
17 | public class MailSyntaxChecker { |
18 | 18 | ||
19 | private static final String REGEX_IP = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b"; |
19 | private static final String REGEX_IP = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b"; |
20 | 20 | ||
21 | // Führt eine Prüfung der E-Mail-Adresse gemäß SMTP-Spezifikation RFC 5321 |
21 | // Führt eine Prüfung der E-Mail-Adresse gemäß SMTP-Spezifikation RFC 5321 |
22 | // aus |
22 | // aus |
23 | private static final boolean CHECK_SMTP_SIZE_LIMITS = false; |
23 | private static final boolean CHECK_SMTP_SIZE_LIMITS = false; |
24 | 24 | ||
25 | // Führt eine Prüfung der TLD gemäß IANA-Daten aus |
25 | // Führt eine Prüfung der TLD gemäß IANA-Daten aus |
26 | private static final boolean CHECK_TLD_RECOGNIZED = true; |
26 | private static final boolean CHECK_TLD_RECOGNIZED = true; |
27 | 27 | ||
28 | // Führt eine DNS-Prüfung durch |
28 | // Führt eine DNS-Prüfung durch |
29 | private static final boolean CHECK_DNS = true; |
29 | private static final boolean CHECK_DNS = true; |
30 | 30 | ||
31 | // http://data.iana.org/TLD/tlds-alpha-by-domain.txt |
31 | // http://data.iana.org/TLD/tlds-alpha-by-domain.txt |
32 | // Version 2010052500, Last Updated Tue May 25 14:07:02 2010 UTC |
32 | // Version 2010052500, Last Updated Tue May 25 14:07:02 2010 UTC |
33 | private static final HashSet<String> RECOGNIZED_TLDS_PUNYCODE = hmaker(new String[] { |
33 | private static final HashSet<String> RECOGNIZED_TLDS_PUNYCODE = hmaker(new String[] { |
34 | "AC", "AD", "AE", "AERO", "AF", "AG", "AI", "AL", "AM", "AN", "AO", |
34 | "AC", "AD", "AE", "AERO", "AF", "AG", "AI", "AL", "AM", "AN", "AO", |
35 | "AQ", "AR", "ARPA", "AS", "ASIA", "AT", "AU", "AW", "AX", "AZ", |
35 | "AQ", "AR", "ARPA", "AS", "ASIA", "AT", "AU", "AW", "AX", "AZ", |
36 | "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BIZ", "BJ", "BM", |
36 | "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BIZ", "BJ", "BM", |
37 | "BN", "BO", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CAT", |
37 | "BN", "BO", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CAT", |
38 | "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", |
38 | "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", |
39 | "COM", "COOP", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", |
39 | "COM", "COOP", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", |
40 | "DK", "DM", "DO", "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", |
40 | "DK", "DM", "DO", "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", |
41 | "EU", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", |
41 | "EU", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", |
42 | "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR", |
42 | "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR", |
43 | "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", |
43 | "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", |
44 | "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ", "IR", |
44 | "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ", "IR", |
45 | "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG", "KH", "KI", |
45 | "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG", "KH", "KI", |
46 | "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", |
46 | "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", |
47 | "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME", |
47 | "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME", |
48 | "MG", "MH", "MIL", "MK", "ML", "MM", "MN", "MO", "MOBI", "MP", |
48 | "MG", "MH", "MIL", "MK", "ML", "MM", "MN", "MO", "MOBI", "MP", |
49 | "MQ", "MR", "MS", "MT", "MU", "MUSEUM", "MV", "MW", "MX", "MY", |
49 | "MQ", "MR", "MS", "MT", "MU", "MUSEUM", "MV", "MW", "MX", "MY", |
50 | "MZ", "NA", "NAME", "NC", "NE", "NET", "NF", "NG", "NI", "NL", |
50 | "MZ", "NA", "NAME", "NC", "NE", "NET", "NF", "NG", "NI", "NL", |
51 | "NO", "NP", "NR", "NU", "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", |
51 | "NO", "NP", "NR", "NU", "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", |
52 | "PH", "PK", "PL", "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", |
52 | "PH", "PK", "PL", "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", |
53 | "QA", "RE", "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", |
53 | "QA", "RE", "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", |
54 | "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", |
54 | "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", |
55 | "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH", "TJ", |
55 | "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH", "TJ", |
56 | "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT", "TV", |
56 | "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT", "TV", |
57 | "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA", "VC", "VE", |
57 | "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA", "VC", "VE", |
58 | "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D", |
58 | "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D", |
59 | "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A", |
59 | "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A", |
60 | "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA", |
60 | "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA", |
61 | "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV", |
61 | "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV", |
62 | "XN--MGBAAM7A8H", "XN--MGBERP4A5D4AR", "XN--P1AI", "XN--WGBH1C", |
62 | "XN--MGBAAM7A8H", "XN--MGBERP4A5D4AR", "XN--P1AI", "XN--WGBH1C", |
63 | "XN--ZCKZAH", "YE", "YT", "ZA", "ZM", "ZW", }); |
63 | "XN--ZCKZAH", "YE", "YT", "ZA", "ZM", "ZW", }); |
64 | 64 | ||
65 | private static boolean checkSmtpSizeLimits(EMailAddress email) { |
65 | private static boolean checkSmtpSizeLimits(EMailAddress email) { |
66 | // RFC 5321: 4.5.3.1.1. Local-part Längenbegrenzung bei SMTP: 64 |
66 | // RFC 5321: 4.5.3.1.1. Local-part Längenbegrenzung bei SMTP: 64 |
67 | // Byte |
67 | // Byte |
68 | // QUE: Soll das auch als Punicode-Variante geprüft werden? |
68 | // QUE: Soll das auch als Punicode-Variante geprüft werden? |
69 | if ((email.getLocalPart().length() > 64) |
69 | if ((email.getLocalPart().length() > 64) |
70 | || (email.getLocalPart().length() < 1)) { |
70 | || (email.getLocalPart().length() < 1)) { |
71 | return false; |
71 | return false; |
72 | } |
72 | } |
73 | 73 | ||
74 | // RFC 5321: 4.5.3.1.2. Domain-part Längenbegrenzung bei SMTP: 255 |
74 | // RFC 5321: 4.5.3.1.2. Domain-part Längenbegrenzung bei SMTP: 255 |
75 | // Byte |
75 | // Byte |
76 | if ((email.getDomainPartPunycode().length() > 255) |
76 | if ((email.getDomainPartPunycode().length() > 255) |
77 | || (email.getDomainPartPunycode().length() < 1)) { |
77 | || (email.getDomainPartPunycode().length() < 1)) { |
78 | return false; |
78 | return false; |
79 | } |
79 | } |
80 | 80 | ||
81 | // RFC 5321: 4.5.3.1.5. Reply-Line Längenbegrenzung bei SMTP: 512 |
81 | // RFC 5321: 4.5.3.1.5. Reply-Line Längenbegrenzung bei SMTP: 512 |
82 | // Byte. Laut |
82 | // Byte. Laut |
83 | // http://de.wikipedia.org/wiki/E-Mail-Adresse#L.C3.A4nge_der_E-Mail-Adresse |
83 | // http://de.wikipedia.org/wiki/E-Mail-Adresse#L.C3.A4nge_der_E-Mail-Adresse |
84 | // folgt daraus: Länge der MailAddresse ist 254 Bytes. |
84 | // folgt daraus: Länge der MailAddresse ist 254 Bytes. |
85 | if (email.getMailAddressPunycodedDomain().length() > 254) { |
85 | if (email.getMailAddressPunycodedDomain().length() > 254) { |
86 | return false; |
86 | return false; |
87 | } |
87 | } |
88 | 88 | ||
89 | return true; |
89 | return true; |
90 | } |
90 | } |
91 | 91 | ||
92 | private static boolean checkTldRecognized(EMailAddress email) { |
92 | private static boolean checkTldRecognized(EMailAddress email) { |
93 | // TODO: Mailadressen sind aber auch als ...@[IP] gültig. Dann keine |
93 | // TODO: Mailadressen sind aber auch als ...@[IP] gültig. Dann keine |
94 | // TLD! |
94 | // TLD! |
95 | return RECOGNIZED_TLDS_PUNYCODE.contains(email.getTldPunycode() |
95 | return RECOGNIZED_TLDS_PUNYCODE.contains(email.getTldPunycode() |
96 | .toUpperCase()); |
96 | .toUpperCase()); |
97 | } |
97 | } |
98 | 98 | ||
99 | private static boolean preg_match(String regex, String data) { |
99 | private static boolean preg_match(String regex, String data) { |
100 | return Pattern.compile(regex).matcher(data).matches(); |
100 | return Pattern.compile(regex).matcher(data).matches(); |
101 | } |
101 | } |
102 | 102 | ||
103 | private static boolean checkDns(String domainOrIP) { |
103 | private static boolean checkDns(String domainOrIP) { |
104 | // TODO |
104 | // TODO |
105 | 105 | ||
106 | return true; |
106 | return true; |
107 | } |
107 | } |
108 | 108 | ||
109 | public static boolean isMailValid(String email) { |
109 | public static boolean isMailValid(String email) { |
110 | return isMailValid(new EMailAddress(email)); |
110 | return isMailValid(new EMailAddress(email)); |
111 | } |
111 | } |
112 | 112 | ||
113 | /** |
113 | /** |
114 | * Checks if an E-Mail-Address is valid |
114 | * Checks if an E-Mail-Address is valid |
115 | * |
115 | * |
116 | * @param email |
116 | * @param email |
117 | * @return |
117 | * @return |
118 | */ |
118 | */ |
119 | public static boolean isMailValid(EMailAddress email) { |
119 | public static boolean isMailValid(EMailAddress email) { |
120 | if (CHECK_SMTP_SIZE_LIMITS) { |
120 | if (CHECK_SMTP_SIZE_LIMITS) { |
121 | if (!checkSmtpSizeLimits(email)) |
121 | if (!checkSmtpSizeLimits(email)) |
122 | return false; |
122 | return false; |
123 | } |
123 | } |
124 | 124 | ||
125 | // Begin RFC-Checks |
125 | // Begin RFC-Checks |
126 | 126 | ||
127 | final String address = email.getMailAddressUnicode(); |
127 | final String address = email.getMailAddressUnicode(); |
128 | final String localPart = email.getLocalPart(); |
128 | final String localPart = email.getLocalPart(); |
129 | final String domainPart = email.getDomainPartPunycode(); |
129 | final String domainPart = email.getDomainPartPunycode(); |
130 | 130 | ||
131 | // Weder localPart noch domainPart dürfen zwei aufeinanderfolgende |
131 | // Weder localPart noch domainPart dürfen zwei aufeinanderfolgende |
132 | // Punkte besitzen. |
132 | // Punkte besitzen. |
133 | 133 | ||
134 | if (address.contains("..")) { |
134 | if (address.contains("..")) { |
135 | return false; |
135 | return false; |
136 | } |
136 | } |
137 | 137 | ||
138 | // localPart darf keine Punkte am Anfang oder Ende besitzen |
138 | // localPart darf keine Punkte am Anfang oder Ende besitzen |
139 | 139 | ||
140 | if (localPart.length() == 0) { |
140 | if (localPart.length() == 0) { |
141 | return false; |
141 | return false; |
142 | } |
142 | } |
143 | if (localPart.startsWith(".") || localPart.endsWith(".")) { |
143 | if (localPart.startsWith(".") || localPart.endsWith(".")) { |
144 | return false; |
144 | return false; |
145 | } |
145 | } |
146 | 146 | ||
147 | // domainPart darf keine Punkte am Anfang oder Ende besitzen |
147 | // domainPart darf keine Punkte am Anfang oder Ende besitzen |
148 | 148 | ||
149 | if (domainPart.startsWith(".") || domainPart.endsWith(".")) { |
149 | if (domainPart.startsWith(".") || domainPart.endsWith(".")) { |
150 | return false; |
150 | return false; |
151 | } |
151 | } |
152 | 152 | ||
153 | // domainPart prüfen |
153 | // domainPart prüfen |
154 | 154 | ||
155 | if (preg_match("^" + REGEX_IP + "$", domainPart)) { |
155 | if (preg_match("^" + REGEX_IP + "$", domainPart)) { |
156 | // domainPart is <IP> |
156 | // domainPart is <IP> |
157 | // QUE: Ist das überhaupt gemäß RFC gültig? |
157 | // QUE: Ist das überhaupt gemäß RFC gültig? |
158 | 158 | ||
159 | String ip = ""; // TODO |
159 | String ip = ""; // TODO |
160 | 160 | ||
161 | if (CHECK_DNS) { |
161 | if (CHECK_DNS) { |
162 | if (!checkDns(ip)) |
162 | if (!checkDns(ip)) |
163 | return false; |
163 | return false; |
164 | } |
164 | } |
165 | } else if (preg_match("^\\[" + REGEX_IP + "\\]$", domainPart)) { |
165 | } else if (preg_match("^\\[" + REGEX_IP + "\\]$", domainPart)) { |
166 | // domainPart is [<IP>] |
166 | // domainPart is [<IP>] |
167 | 167 | ||
168 | String ip = ""; // TODO |
168 | String ip = ""; // TODO |
169 | 169 | ||
170 | if (CHECK_DNS) { |
170 | if (CHECK_DNS) { |
171 | if (!checkDns(ip)) |
171 | if (!checkDns(ip)) |
172 | return false; |
172 | return false; |
173 | } |
173 | } |
174 | } else { |
174 | } else { |
175 | if (!preg_match("^[A-Za-z0-9\\-\\.]+$", domainPart)) { |
175 | if (!preg_match("^[A-Za-z0-9\\-\\.]+$", domainPart)) { |
176 | return false; |
176 | return false; |
177 | } |
177 | } |
178 | 178 | ||
179 | if (CHECK_TLD_RECOGNIZED) { |
179 | if (CHECK_TLD_RECOGNIZED) { |
180 | if (!checkTldRecognized(email)) |
180 | if (!checkTldRecognized(email)) |
181 | return false; |
181 | return false; |
182 | } |
182 | } |
183 | 183 | ||
184 | if (CHECK_DNS) { |
184 | if (CHECK_DNS) { |
185 | if (!checkDns(domainPart)) |
185 | if (!checkDns(domainPart)) |
186 | return false; |
186 | return false; |
187 | } |
187 | } |
188 | } |
188 | } |
189 | 189 | ||
190 | // localPart prüfen |
190 | // localPart prüfen |
191 | 191 | ||
192 | if (!preg_match("^(\\\\.|[A-Za-z0-9!#%&`_=\\/$\'*+?^{}|~.-])+$", |
192 | if (!preg_match("^(\\\\.|[A-Za-z0-9!#%&`_=\\/$\'*+?^{}|~.-])+$", |
193 | localPart.replaceAll("\\\\", "").replaceAll("@", ""))) { |
193 | localPart.replaceAll("\\\\", "").replaceAll("@", ""))) { |
194 | // character not valid in local part unless |
194 | // character not valid in local part unless |
195 | // local part is quoted |
195 | // local part is quoted |
196 | if (!preg_match("^\"(\\\\\"|[^\"])+\"$", localPart.replaceAll( |
196 | if (!preg_match("^\"(\\\\\"|[^\"])+\"$", localPart.replaceAll( |
197 | "\\\\", "").replaceAll("@", ""))) { |
197 | "\\\\", "").replaceAll("@", ""))) { |
198 | return false; |
198 | return false; |
199 | } |
199 | } |
200 | } |
200 | } |
201 | 201 | ||
202 | // TODO: Weitere Tests gemäß RFC? |
202 | // TODO: Weitere Tests gemäß RFC? |
203 | 203 | ||
204 | return true; |
204 | return true; |
205 | } |
205 | } |
206 | 206 | ||
207 | /** |
207 | /** |
208 | * build a HashSet from a array of String literals. |
208 | * build a HashSet from a array of String literals. |
209 | * |
209 | * |
210 | * @param list |
210 | * @param list |
211 | * array of strings |
211 | * array of strings |
212 | * |
212 | * |
213 | * @return HashSet you can use to test if a string is in the set. |
213 | * @return HashSet you can use to test if a string is in the set. |
214 | */ |
214 | */ |
215 | private static HashSet<String> hmaker(String[] list) { |
215 | private static HashSet<String> hmaker(String[] list) { |
216 | HashSet<String> map = new HashSet<String>(Math.max( |
216 | HashSet<String> map = new HashSet<String>(Math.max( |
217 | (int) (list.length / .75f) + 1, 16)); |
217 | (int) (list.length / .75f) + 1, 16)); |
218 | map.addAll(Arrays.asList(list)); |
218 | map.addAll(Arrays.asList(list)); |
219 | return map; |
219 | return map; |
220 | } |
220 | } |
221 | } |
221 | } |
222 | 222 |