Rev 4 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 4 | Rev 26 | ||
---|---|---|---|
1 | /* |
1 | /* |
2 | * @(#)EmailSyntaxValidator.java |
2 | * @(#)EmailSyntaxValidator.java |
3 | * |
3 | * |
4 | * Summary: Validate syntax of email addresses. |
4 | * Summary: Validate syntax of email addresses. |
5 | * |
5 | * |
6 | * Copyright: (c) 2002-2010 Roedy Green, Canadian Mind Products, http://mindprod.com |
6 | * Copyright: (c) 2002-2010 Roedy Green, Canadian Mind Products, http://mindprod.com |
7 | * |
7 | * |
8 | * Licence: This software may be copied and used freely for any purpose but military. |
8 | * Licence: This software may be copied and used freely for any purpose but military. |
9 | * http://mindprod.com/contact/nonmil.html |
9 | * http://mindprod.com/contact/nonmil.html |
10 | * |
10 | * |
11 | * Requires: JDK 1.5+ |
11 | * Requires: JDK 1.5+ |
12 | * |
12 | * |
13 | * Created with: IntelliJ IDEA IDE. |
13 | * Created with: IntelliJ IDEA IDE. |
14 | * |
14 | * |
15 | * Version History: |
15 | * Version History: |
16 | * 1.7 2007-08-21 |
16 | * 1.7 2007-08-21 |
17 | */ |
17 | */ |
18 | package com.mindprod.bulk; |
18 | package com.mindprod.bulk; |
19 | 19 | ||
20 | // Download newest version here: |
20 | // Download newest version here: |
21 | // http://mindprod.com/products1.html#BULK |
21 | // http://mindprod.com/products1.html#BULK |
22 | // SVN: |
22 | // SVN: |
23 | // http://wush.net/svn/mindprod/com/mindprod/bulk/EmailSyntaxValidator.java |
23 | // http://wush.net/svn/mindprod/com/mindprod/bulk/EmailSyntaxValidator.java |
24 | 24 | ||
25 | // TODO: E-Mail-Aufbereiter... Puny, Trim |
- | |
26 | // TODO: BAD TLDS + PSEUDO (TOR: EXIT ETC) |
25 | // TODO: BAD TLDS + PSEUDO (TOR: EXIT ETC) |
27 | // TODO: Awaiting bulk comit |
26 | // TODO: Awaiting official commit for this Patch |
28 | 27 | ||
29 | // CHANGELOG BY DANIEL MARSCHALL |
28 | // CHANGELOG BY DANIEL MARSCHALL |
30 | // |
29 | // |
31 | //Added ccTLDs |
30 | //Added ccTLDs |
32 | // |
31 | // |
33 | //.ax = Aland Islands |
32 | //.ax = Aland Islands |
34 | //.eu = European Union |
33 | //.eu = European Union |
35 | //.me = Montenegro |
34 | //.me = Montenegro |
36 | //.rs = Serbia |
35 | //.rs = Serbia |
37 | //.su = Soviet Union (being phased out) |
36 | //.su = Soviet Union (being phased out) |
38 | //.tl = Timor-Leste |
37 | //.tl = Timor-Leste |
39 | // |
38 | // |
40 | //Deleted ccTLDs |
39 | //Deleted ccTLDs |
41 | // |
40 | // |
42 | //.bv = Bouvet Island [Allocated/unused] |
41 | //.bv = Bouvet Island [Allocated/unused] |
43 | //.eh = Western Sahara [Reserved/unassigned] |
42 | //.eh = Western Sahara [Reserved/unassigned] |
44 | //.fx = UNKNOWN |
43 | //.fx = UNKNOWN |
45 | //.gb = United Kingdom [Allocated/unused] |
44 | //.gb = United Kingdom [Allocated/unused] |
46 | //.pm = Saint Pierre and Miquelon [Allocated/unused] |
45 | //.pm = Saint Pierre and Miquelon [Allocated/unused] |
47 | //.sj = Svalbard and Jan Mayen [Allocated/unused] |
46 | //.sj = Svalbard and Jan Mayen [Allocated/unused] |
48 | //.so = Somalia [Allocated/unused] |
47 | //.so = Somalia [Allocated/unused] |
49 | //.um = United States Minor Outlying Islands [Reserved/unassigned] |
48 | //.um = United States Minor Outlying Islands [Reserved/unassigned] |
50 | //.yt = Mayotte [Allocated/unused] |
49 | //.yt = Mayotte [Allocated/unused] |
51 | //.yu = Yugoslavia [Deleted/retired] |
50 | //.yu = Yugoslavia [Deleted/retired] |
52 | // |
51 | // |
53 | //Added BAD TLDs |
52 | //Added BAD TLDs |
54 | // |
53 | // |
55 | //.example (RFC 2606) |
54 | //.example (RFC 2606) |
56 | //.localhost (RFC 2606) |
55 | //.localhost (RFC 2606) |
57 | //.test (RFC 2606) |
56 | //.test (RFC 2606) |
58 | // |
57 | // |
59 | //Added official TLDs |
58 | //Added official TLDs |
60 | // |
59 | // |
61 | //.arpa (infrastructure TLD) |
60 | //.arpa (infrastructure TLD) |
62 | //.tel (sponsored TLD) -- official TLD or rare TLD? |
61 | //.tel (sponsored TLD) -- official TLD or rare TLD? |
63 | //.mobi (sponsored TLD) -- official TLD or rare TLD? |
62 | //.mobi (sponsored TLD) -- official TLD or rare TLD? |
64 | //.jobs (sponsored TLD) -- official TLD or rare TLD? |
63 | //.jobs (sponsored TLD) -- official TLD or rare TLD? |
65 | //.cat (sponsored TLD) -- official TLD or rare TLD? |
64 | //.cat (sponsored TLD) -- official TLD or rare TLD? |
66 | // |
65 | // |
67 | //Other changes |
66 | //Other changes |
68 | // |
67 | // |
69 | //* Commented out unused debugging stuff |
68 | //* Commented out unused debugging stuff |
70 | //* Removed main procedure and syso import |
69 | //* Removed main procedure and syso import |
71 | 70 | ||
72 | import javax.mail.internet.AddressException; |
71 | import javax.mail.internet.AddressException; |
73 | import javax.mail.internet.InternetAddress; |
72 | import javax.mail.internet.InternetAddress; |
74 | import java.util.Arrays; |
73 | import java.util.Arrays; |
75 | import java.util.HashSet; |
74 | import java.util.HashSet; |
76 | import java.util.Locale; |
75 | import java.util.Locale; |
77 | import java.util.regex.Matcher; |
76 | import java.util.regex.Matcher; |
78 | import java.util.regex.Pattern; |
77 | import java.util.regex.Pattern; |
79 | 78 | ||
80 | /** |
79 | /** |
81 | * Validate syntax of email addresses. |
80 | * Validate syntax of email addresses. |
82 | * <p/> |
81 | * <p/> |
83 | * Does not probe to see if mailserver exists in DNS or online. See MailProber |
82 | * Does not probe to see if mailserver exists in DNS or online. See MailProber |
84 | * for that. See ValidateEmailFile for an example of how to use this class. |
83 | * for that. See ValidateEmailFile for an example of how to use this class. |
85 | * |
84 | * |
86 | * @author Roedy Green, Canadian Mind Products |
85 | * @author Roedy Green, Canadian Mind Products |
87 | * @version 1.7 2007-08-21 |
86 | * @version 1.7 2007-08-21 |
88 | * @since 2002 |
87 | * @since 2002 |
89 | */ |
88 | */ |
90 | // TODO: @version check validity of & in first part of email address. Appears in |
89 | // TODO: @version check validity of & in first part of email address. Appears in |
91 | // practice. |
90 | // practice. |
92 | 91 | ||
93 | public final class EmailSyntaxValidator { |
92 | public final class EmailSyntaxValidator { |
94 | // ------------------------------ CONSTANTS ------------------------------ |
93 | // ------------------------------ CONSTANTS ------------------------------ |
95 | 94 | ||
96 | /** |
95 | /** |
97 | * True if want extra debugging output. |
96 | * True if want extra debugging output. |
98 | */ |
97 | */ |
99 | // @SuppressWarnings( { "UnusedDeclaration" }) |
98 | // @SuppressWarnings( { "UnusedDeclaration" }) |
100 | // private static final boolean DEBUGGING = false; |
99 | // private static final boolean DEBUGGING = false; |
101 | 100 | ||
102 | /** |
101 | /** |
103 | * Country where this program is running. |
102 | * Country where this program is running. |
104 | */ |
103 | */ |
105 | private static final String THIS_COUNTRY = Locale.getDefault().getCountry() |
104 | private static final String THIS_COUNTRY = Locale.getDefault().getCountry() |
106 | .toLowerCase(); |
105 | .toLowerCase(); |
107 | 106 | ||
108 | /** |
107 | /** |
109 | * Bad top level domains -- ones never valid. |
108 | * Bad top level domains -- ones never valid. |
110 | */ |
109 | */ |
111 | private static final HashSet<String> BAD_TLDS = hmaker(new String[] { |
110 | private static final HashSet<String> BAD_TLDS = hmaker(new String[] { |
112 | "invalid", "nowhere", "noone", "test", "example", "localhost", }); |
111 | "invalid", "nowhere", "noone", "test", "example", "localhost", }); |
113 | 112 | ||
114 | /** |
113 | /** |
115 | * Top level domains for countries. |
114 | * Top level domains for countries. |
116 | */ |
115 | */ |
117 | private static final HashSet<String> NATIONAL_TLDS = hmaker(new String[] { |
116 | private static final HashSet<String> NATIONAL_TLDS = hmaker(new String[] { |
118 | "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", |
117 | "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", |
119 | "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", |
118 | "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", |
120 | "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt", |
119 | "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt", |
121 | "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", |
120 | "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", |
122 | "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de", |
121 | "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de", |
123 | "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et", |
122 | "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et", |
124 | "eu", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gd", "ge", |
123 | "eu", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gd", "ge", |
125 | "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", |
124 | "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", |
126 | "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", |
125 | "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", |
127 | "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", |
126 | "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", |
128 | "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", |
127 | "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", |
129 | "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", |
128 | "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", |
130 | "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm", |
129 | "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm", |
131 | "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", |
130 | "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", |
132 | "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", |
131 | "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", |
133 | "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", |
132 | "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", |
134 | "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", |
133 | "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", |
135 | "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sk", "sl", |
134 | "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sk", "sl", |
136 | "sm", "sn", "sr", "st", "su", "sv", "sy", "sz", "tc", "td", "tf", |
135 | "sm", "sn", "sr", "st", "su", "sv", "sy", "sz", "tc", "td", "tf", |
137 | "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", |
136 | "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", |
138 | "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", "vc", |
137 | "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", "vc", |
139 | "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "za", "zm", "zw", }); |
138 | "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "za", "zm", "zw", }); |
140 | 139 | ||
141 | /** |
140 | /** |
142 | * Official top level domains. |
141 | * Official top level domains. |
143 | */ |
142 | */ |
144 | private static final HashSet<String> OFFICIAL_TLDS = hmaker(new String[] { |
143 | private static final HashSet<String> OFFICIAL_TLDS = hmaker(new String[] { |
145 | "aero", "biz", "coop", "com", "edu", "gov", "info", "mil", |
144 | "aero", "biz", "coop", "com", "edu", "gov", "info", "mil", |
146 | "museum", "name", "net", "org", "pro", "tel", "mobi", "jobs", |
145 | "museum", "name", "net", "org", "pro", "tel", "mobi", "jobs", |
147 | "cat", "arpa", }); |
146 | "cat", "arpa", }); |
148 | 147 | ||
149 | /** |
148 | /** |
150 | * Rarely used top level domains |
149 | * Rarely used top level domains |
151 | */ |
150 | */ |
152 | private static final HashSet<String> RARE_TLDS = hmaker(new String[] { |
151 | private static final HashSet<String> RARE_TLDS = hmaker(new String[] { |
153 | "cam", "mp3", "agent", "art", "arts", "asia", "auction", "aus", |
152 | "cam", "mp3", "agent", "art", "arts", "asia", "auction", "aus", |
154 | "bank", "cam", "chat", "church", "club", "corp", "dds", "design", |
153 | "bank", "cam", "chat", "church", "club", "corp", "dds", "design", |
155 | "dns2go", "e", "email", "exp", "fam", "family", "faq", "fed", |
154 | "dns2go", "e", "email", "exp", "fam", "family", "faq", "fed", |
156 | "film", "firm", "free", "fun", "g", "game", "games", "gay", "ger", |
155 | "film", "firm", "free", "fun", "g", "game", "games", "gay", "ger", |
157 | "globe", "gmbh", "golf", "gov", "help", "hola", "i", "inc", "int", |
156 | "globe", "gmbh", "golf", "gov", "help", "hola", "i", "inc", "int", |
158 | "jpn", "k12", "kids", "law", "learn", "llb", "llc", "llp", "lnx", |
157 | "jpn", "k12", "kids", "law", "learn", "llb", "llc", "llp", "lnx", |
159 | "love", "ltd", "mag", "mail", "med", "media", "mp3", "netz", "nic", |
158 | "love", "ltd", "mag", "mail", "med", "media", "mp3", "netz", "nic", |
160 | "nom", "npo", "per", "pol", "prices", "radio", "rsc", "school", |
159 | "nom", "npo", "per", "pol", "prices", "radio", "rsc", "school", |
161 | "scifi", "sea", "service", "sex", "shop", "sky", "soc", "space", |
160 | "scifi", "sea", "service", "sex", "shop", "sky", "soc", "space", |
162 | "sport", "tech", "tour", "travel", "usvi", "video", "web", "wine", |
161 | "sport", "tech", "tour", "travel", "usvi", "video", "web", "wine", |
163 | "wir", "wired", "zine", "zoo", }); |
162 | "wir", "wired", "zine", "zoo", }); |
164 | 163 | ||
165 | /** |
164 | /** |
166 | * regex to allow dots anywhere, but not at start of domain name, no + |
165 | * regex to allow dots anywhere, but not at start of domain name, no + |
167 | */ |
166 | */ |
168 | private static final Pattern p3 = Pattern |
167 | private static final Pattern p3 = Pattern |
169 | .compile("[a-z0-9\\-_\\.]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++"); |
168 | .compile("[a-z0-9\\-_\\.]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++"); |
170 | 169 | ||
171 | /** |
170 | /** |
172 | * regex IP style names, no + |
171 | * regex IP style names, no + |
173 | */ |
172 | */ |
174 | private static final Pattern p4 = Pattern |
173 | private static final Pattern p4 = Pattern |
175 | .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\]"); |
174 | .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\]"); |
176 | 175 | ||
177 | /** |
176 | /** |
178 | * regex to allow - _ dots in name, no + |
177 | * regex to allow - _ dots in name, no + |
179 | */ |
178 | */ |
180 | private static final Pattern p5 = Pattern |
179 | private static final Pattern p5 = Pattern |
181 | .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++"); |
180 | .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++"); |
182 | 181 | ||
183 | /** |
182 | /** |
184 | * regex to allow _ - in name, lead and trailing ones are filtered later, no |
183 | * regex to allow _ - in name, lead and trailing ones are filtered later, no |
185 | * +. |
184 | * +. |
186 | */ |
185 | */ |
187 | private static final Pattern p9 = Pattern |
186 | private static final Pattern p9 = Pattern |
188 | .compile("[a-z0-9\\-_]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++"); |
187 | .compile("[a-z0-9\\-_]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++"); |
189 | 188 | ||
190 | /** |
189 | /** |
191 | * regex to split into fields |
190 | * regex to split into fields |
192 | */ |
191 | */ |
193 | private static final Pattern splitter = Pattern.compile("[@\\.]"); |
192 | private static final Pattern splitter = Pattern.compile("[@\\.]"); |
194 | 193 | ||
195 | // -------------------------- PUBLIC STATIC METHODS |
194 | // -------------------------- PUBLIC STATIC METHODS |
196 | // -------------------------- |
195 | // -------------------------- |
197 | 196 | ||
198 | /** |
197 | /** |
199 | * Check how likely an email address is to be valid. The higher the number |
198 | * Check how likely an email address is to be valid. The higher the number |
200 | * returned, the more likely the address is valid. This method does not |
199 | * returned, the more likely the address is valid. This method does not |
201 | * probe the internet in any way to see if the corresponding mail server or |
200 | * probe the internet in any way to see if the corresponding mail server or |
202 | * domain exists. |
201 | * domain exists. |
203 | * |
202 | * |
204 | * @param email |
203 | * @param email |
205 | * bare computer email address. e.g. roedyg@mindprod.com No |
204 | * bare computer email address. e.g. roedyg@mindprod.com No |
206 | * "Roedy Green" <roedyg@mindprod.com> style addresses. No local |
205 | * "Roedy Green" <roedyg@mindprod.com> style addresses. No local |
207 | * addresses, e.g. roedy. |
206 | * addresses, e.g. roedy. |
208 | * |
207 | * |
209 | * @return <ul> |
208 | * @return <ul> |
210 | * <li>0 = email address is definitely malformed, e.g. missing |
209 | * <li>0 = email address is definitely malformed, e.g. missing |
211 | * @. ends in .invalid</li> <li>1 = address does not meet one of the valid |
210 | * @. ends in .invalid</li> <li>1 = address does not meet one of the valid |
212 | * patterns below. It still might be ok according to some obscure rule in |
211 | * patterns below. It still might be ok according to some obscure rule in |
213 | * RFC 822 Java InternetAddress accepts it as valid.</li> <li>2 = unknown |
212 | * RFC 822 Java InternetAddress accepts it as valid.</li> <li>2 = unknown |
214 | * top level domain.</li> <li>3 = dots at beginning or end, doubled in |
213 | * top level domain.</li> <li>3 = dots at beginning or end, doubled in |
215 | * name.</li> <li>4 = address of form xxx@[209.139.205.2] using IP</li> |
214 | * name.</li> <li>4 = address of form xxx@[209.139.205.2] using IP</li> |
216 | * <li>5 = address of form xxx.xxx.xxx@xxx.xxx.xxx Dots _ or - in first |
215 | * <li>5 = address of form xxx.xxx.xxx@xxx.xxx.xxx Dots _ or - in first |
217 | * part of name</li> <li>6 = addreess of form xxx@xxx.xxx.xxx rare, but |
216 | * part of name</li> <li>6 = addreess of form xxx@xxx.xxx.xxx rare, but |
218 | * known, domain</li> <li>7 = address of form xxx@xxx.xxx.ca or any |
217 | * known, domain</li> <li>7 = address of form xxx@xxx.xxx.ca or any |
219 | * national suffix.</li> <li>8 = address of form xxx@xxx.xxx.xx the |
218 | * national suffix.</li> <li>8 = address of form xxx@xxx.xxx.xx the |
220 | * matching this national suffix, e.g. .ca in Canada, .de in Germany</li> |
219 | * matching this national suffix, e.g. .ca in Canada, .de in Germany</li> |
221 | * <li>9 = address of form xxx@xxx.xxx.com .org .net .edu .gov .biz -- |
220 | * <li>9 = address of form xxx@xxx.xxx.com .org .net .edu .gov .biz -- |
222 | * official domains</li> |
221 | * official domains</li> |
223 | * </ul> |
222 | * </ul> |
224 | */ |
223 | */ |
225 | public static int howValid(String email) { |
224 | public static int howValid(String email) { |
226 | if (email == null) { |
225 | if (email == null) { |
227 | return 0; |
226 | return 0; |
228 | } |
227 | } |
229 | email = email.trim().toLowerCase(); |
228 | email = email.trim().toLowerCase(); |
230 | int dotPlace = email.lastIndexOf('.'); |
229 | int dotPlace = email.lastIndexOf('.'); |
231 | if (0 < dotPlace && dotPlace < email.length() - 1) { |
230 | if (0 < dotPlace && dotPlace < email.length() - 1) { |
232 | String tld = email.substring(dotPlace + 1); |
231 | String tld = email.substring(dotPlace + 1); |
233 | if (BAD_TLDS.contains(tld)) { |
232 | if (BAD_TLDS.contains(tld)) { |
234 | /* deliberate invalid address */ |
233 | /* deliberate invalid address */ |
235 | return 0; |
234 | return 0; |
236 | } |
235 | } |
237 | // make sure none of fragments start or end in _ or - |
236 | // make sure none of fragments start or end in _ or - |
238 | String[] fragments = splitter.split(email); |
237 | String[] fragments = splitter.split(email); |
239 | boolean clean = true; |
238 | boolean clean = true; |
240 | for (String fragment : fragments) { |
239 | for (String fragment : fragments) { |
241 | if (fragment.startsWith("_") || fragment.endsWith("_") |
240 | if (fragment.startsWith("_") || fragment.endsWith("_") |
242 | || fragment.startsWith("-") || fragment.endsWith("-")) { |
241 | || fragment.startsWith("-") || fragment.endsWith("-")) { |
243 | clean = false; |
242 | clean = false; |
244 | break; |
243 | break; |
245 | } |
244 | } |
246 | }// end for |
245 | }// end for |
247 | if (clean) { |
246 | if (clean) { |
248 | Matcher m9 = p9.matcher(email); |
247 | Matcher m9 = p9.matcher(email); |
249 | if (m9.matches()) { |
248 | if (m9.matches()) { |
250 | if (OFFICIAL_TLDS.contains(tld)) { |
249 | if (OFFICIAL_TLDS.contains(tld)) { |
251 | return 9; |
250 | return 9; |
252 | } else if (THIS_COUNTRY.equals(tld)) { |
251 | } else if (THIS_COUNTRY.equals(tld)) { |
253 | return 8; |
252 | return 8; |
254 | } else if (NATIONAL_TLDS.contains(tld)) { |
253 | } else if (NATIONAL_TLDS.contains(tld)) { |
255 | return 7; |
254 | return 7; |
256 | } else if (RARE_TLDS.contains(tld)) { |
255 | } else if (RARE_TLDS.contains(tld)) { |
257 | return 6; |
256 | return 6; |
258 | } else { |
257 | } else { |
259 | // TODO: Why is that 3 and not 2? |
258 | // TODO: Why is that 3 and not 2? |
260 | return 3;/* unknown tld */ |
259 | return 3;/* unknown tld */ |
261 | } |
260 | } |
262 | } |
261 | } |
263 | // allow dots in name |
262 | // allow dots in name |
264 | Matcher m5 = p5.matcher(email); |
263 | Matcher m5 = p5.matcher(email); |
265 | if (m5.matches()) { |
264 | if (m5.matches()) { |
266 | if (OFFICIAL_TLDS.contains(tld)) { |
265 | if (OFFICIAL_TLDS.contains(tld)) { |
267 | return 5; |
266 | return 5; |
268 | } else if (THIS_COUNTRY.equals(tld)) { |
267 | } else if (THIS_COUNTRY.equals(tld)) { |
269 | return 5; |
268 | return 5; |
270 | } else if (NATIONAL_TLDS.contains(tld)) { |
269 | } else if (NATIONAL_TLDS.contains(tld)) { |
271 | return 5; |
270 | return 5; |
272 | } else if (RARE_TLDS.contains(tld)) { |
271 | } else if (RARE_TLDS.contains(tld)) { |
273 | return 5; |
272 | return 5; |
274 | } else { |
273 | } else { |
275 | return 2;/* unknown tld */ |
274 | return 2;/* unknown tld */ |
276 | } |
275 | } |
277 | } |
276 | } |
278 | 277 | ||
279 | // IP |
278 | // IP |
280 | Matcher m4 = p4.matcher(email); |
279 | Matcher m4 = p4.matcher(email); |
281 | if (m4.matches()) { |
280 | if (m4.matches()) { |
282 | return 4;/* can't tell TLD */ |
281 | return 4;/* can't tell TLD */ |
283 | } |
282 | } |
284 | 283 | ||
285 | // allow even lead/trail dots in name, except at start of domain |
284 | // allow even lead/trail dots in name, except at start of domain |
286 | Matcher m3 = p3.matcher(email); |
285 | Matcher m3 = p3.matcher(email); |
287 | if (m3.matches()) { |
286 | if (m3.matches()) { |
288 | if (OFFICIAL_TLDS.contains(tld)) { |
287 | if (OFFICIAL_TLDS.contains(tld)) { |
289 | return 3; |
288 | return 3; |
290 | } else if (THIS_COUNTRY.equals(tld)) { |
289 | } else if (THIS_COUNTRY.equals(tld)) { |
291 | return 3; |
290 | return 3; |
292 | } else if (NATIONAL_TLDS.contains(tld)) { |
291 | } else if (NATIONAL_TLDS.contains(tld)) { |
293 | return 3; |
292 | return 3; |
294 | } else if (RARE_TLDS.contains(tld)) { |
293 | } else if (RARE_TLDS.contains(tld)) { |
295 | return 3; |
294 | return 3; |
296 | } else { |
295 | } else { |
297 | return 2;/* unknown domain */ |
296 | return 2;/* unknown domain */ |
298 | } |
297 | } |
299 | } |
298 | } |
300 | }// end if clean |
299 | }// end if clean |
301 | } |
300 | } |
302 | // allow even unclean addresses, and addresses without a TLD to have a |
301 | // allow even unclean addresses, and addresses without a TLD to have a |
303 | // whack at passing RFC:822 |
302 | // whack at passing RFC:822 |
304 | try { |
303 | try { |
305 | /* |
304 | /* |
306 | * see if InternetAddress likes it, it follows RFC:822. It will |
305 | * see if InternetAddress likes it, it follows RFC:822. It will |
307 | * names without domains though. |
306 | * names without domains though. |
308 | */ |
307 | */ |
309 | InternetAddress.parse(email, true/* strict */); |
308 | InternetAddress.parse(email, true/* strict */); |
310 | // it liked it, no exception happened. Seems very sloppy. |
309 | // it liked it, no exception happened. Seems very sloppy. |
311 | return 1; |
310 | return 1; |
312 | } catch (AddressException e) { |
311 | } catch (AddressException e) { |
313 | // it did not like it |
312 | // it did not like it |
314 | return 0; |
313 | return 0; |
315 | } |
314 | } |
316 | } |
315 | } |
317 | 316 | ||
318 | // -------------------------- STATIC METHODS -------------------------- |
317 | // -------------------------- STATIC METHODS -------------------------- |
319 | 318 | ||
320 | /** |
319 | /** |
321 | * build a HashSet from a array of String literals. |
320 | * build a HashSet from a array of String literals. |
322 | * |
321 | * |
323 | * @param list |
322 | * @param list |
324 | * array of strings |
323 | * array of strings |
325 | * |
324 | * |
326 | * @return HashSet you can use to test if a string is in the set. |
325 | * @return HashSet you can use to test if a string is in the set. |
327 | */ |
326 | */ |
328 | private static HashSet<String> hmaker(String[] list) { |
327 | private static HashSet<String> hmaker(String[] list) { |
329 | HashSet<String> map = new HashSet<String>(Math.max( |
328 | HashSet<String> map = new HashSet<String>(Math.max( |
330 | (int) (list.length / .75f) + 1, 16)); |
329 | (int) (list.length / .75f) + 1, 16)); |
331 | map.addAll(Arrays.asList(list)); |
330 | map.addAll(Arrays.asList(list)); |
332 | return map; |
331 | return map; |
333 | } |
332 | } |
334 | 333 | ||
335 | // --------------------------- main() method --------------------------- |
334 | // --------------------------- main() method --------------------------- |
336 | 335 | ||
337 | /** |
336 | /** |
338 | * main debugging harness. |
337 | * main debugging harness. |
339 | * |
338 | * |
340 | * @param args |
339 | * @param args |
341 | * not used |
340 | * not used |
342 | */ |
341 | */ |
343 | // public static void main(String[] args) { |
342 | // public static void main(String[] args) { |
344 | // out.println(howValid("kellizer@.hotmail.com")); |
343 | // out.println(howValid("kellizer@.hotmail.com")); |
345 | // } |
344 | // } |
346 | } |
345 | } |
347 | 346 |