Subversion Repositories javautils

Rev

Rev 4 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4 Rev 26
1
/*
1
/*
2
 * @(#)EmailSyntaxValidator.java
2
 * @(#)EmailSyntaxValidator.java
3
 *
3
 *
4
 * Summary: Validate syntax of email addresses.
4
 * Summary: Validate syntax of email addresses.
5
 *
5
 *
6
 * Copyright: (c) 2002-2010 Roedy Green, Canadian Mind Products, http://mindprod.com
6
 * Copyright: (c) 2002-2010 Roedy Green, Canadian Mind Products, http://mindprod.com
7
 *
7
 *
8
 * Licence: This software may be copied and used freely for any purpose but military.
8
 * Licence: This software may be copied and used freely for any purpose but military.
9
 *          http://mindprod.com/contact/nonmil.html
9
 *          http://mindprod.com/contact/nonmil.html
10
 *
10
 *
11
 * Requires: JDK 1.5+
11
 * Requires: JDK 1.5+
12
 *
12
 *
13
 * Created with: IntelliJ IDEA IDE.
13
 * Created with: IntelliJ IDEA IDE.
14
 *
14
 *
15
 * Version History:
15
 * Version History:
16
 *  1.7 2007-08-21
16
 *  1.7 2007-08-21
17
 */
17
 */
18
package com.mindprod.bulk;
18
package com.mindprod.bulk;
19
 
19
 
20
// Download newest version here:
20
// Download newest version here:
21
// http://mindprod.com/products1.html#BULK
21
// http://mindprod.com/products1.html#BULK
22
// SVN:
22
// SVN:
23
// http://wush.net/svn/mindprod/com/mindprod/bulk/EmailSyntaxValidator.java
23
// http://wush.net/svn/mindprod/com/mindprod/bulk/EmailSyntaxValidator.java
24
 
24
 
25
// TODO: E-Mail-Aufbereiter... Puny, Trim
-
 
26
// TODO: BAD TLDS + PSEUDO (TOR: EXIT ETC)
25
// TODO: BAD TLDS + PSEUDO (TOR: EXIT ETC)
27
// TODO: Awaiting bulk comit
26
// TODO: Awaiting official commit for this Patch
28
 
27
 
29
// CHANGELOG BY DANIEL MARSCHALL
28
// CHANGELOG BY DANIEL MARSCHALL
30
//
29
//
31
//Added ccTLDs
30
//Added ccTLDs
32
//
31
//
33
//.ax = Aland Islands
32
//.ax = Aland Islands
34
//.eu = European Union
33
//.eu = European Union
35
//.me = Montenegro
34
//.me = Montenegro
36
//.rs = Serbia
35
//.rs = Serbia
37
//.su = Soviet Union (being phased out)
36
//.su = Soviet Union (being phased out)
38
//.tl = Timor-Leste
37
//.tl = Timor-Leste
39
//
38
//
40
//Deleted ccTLDs
39
//Deleted ccTLDs
41
//
40
//
42
//.bv = Bouvet Island [Allocated/unused]
41
//.bv = Bouvet Island [Allocated/unused]
43
//.eh = Western Sahara [Reserved/unassigned]
42
//.eh = Western Sahara [Reserved/unassigned]
44
//.fx = UNKNOWN
43
//.fx = UNKNOWN
45
//.gb = United Kingdom [Allocated/unused]
44
//.gb = United Kingdom [Allocated/unused]
46
//.pm = Saint Pierre and Miquelon [Allocated/unused]
45
//.pm = Saint Pierre and Miquelon [Allocated/unused]
47
//.sj = Svalbard and Jan Mayen [Allocated/unused]
46
//.sj = Svalbard and Jan Mayen [Allocated/unused]
48
//.so = Somalia [Allocated/unused]
47
//.so = Somalia [Allocated/unused]
49
//.um = United States Minor Outlying Islands [Reserved/unassigned]
48
//.um = United States Minor Outlying Islands [Reserved/unassigned]
50
//.yt = Mayotte [Allocated/unused]
49
//.yt = Mayotte [Allocated/unused]
51
//.yu = Yugoslavia [Deleted/retired]
50
//.yu = Yugoslavia [Deleted/retired]
52
//
51
//
53
//Added BAD TLDs
52
//Added BAD TLDs
54
//
53
//
55
//.example (RFC 2606)
54
//.example (RFC 2606)
56
//.localhost (RFC 2606)
55
//.localhost (RFC 2606)
57
//.test (RFC 2606)
56
//.test (RFC 2606)
58
//
57
//
59
//Added official TLDs
58
//Added official TLDs
60
//
59
//
61
//.arpa (infrastructure TLD)
60
//.arpa (infrastructure TLD)
62
//.tel (sponsored TLD)               -- official TLD or rare TLD?
61
//.tel (sponsored TLD)               -- official TLD or rare TLD?
63
//.mobi (sponsored TLD)              -- official TLD or rare TLD?
62
//.mobi (sponsored TLD)              -- official TLD or rare TLD?
64
//.jobs (sponsored TLD)              -- official TLD or rare TLD?
63
//.jobs (sponsored TLD)              -- official TLD or rare TLD?
65
//.cat (sponsored TLD)               -- official TLD or rare TLD?
64
//.cat (sponsored TLD)               -- official TLD or rare TLD?
66
//
65
//
67
//Other changes
66
//Other changes
68
//
67
//
69
//* Commented out unused debugging stuff
68
//* Commented out unused debugging stuff
70
//* Removed main procedure and syso import
69
//* Removed main procedure and syso import
71
 
70
 
72
import javax.mail.internet.AddressException;
71
import javax.mail.internet.AddressException;
73
import javax.mail.internet.InternetAddress;
72
import javax.mail.internet.InternetAddress;
74
import java.util.Arrays;
73
import java.util.Arrays;
75
import java.util.HashSet;
74
import java.util.HashSet;
76
import java.util.Locale;
75
import java.util.Locale;
77
import java.util.regex.Matcher;
76
import java.util.regex.Matcher;
78
import java.util.regex.Pattern;
77
import java.util.regex.Pattern;
79
 
78
 
80
/**
79
/**
81
 * Validate syntax of email addresses.
80
 * Validate syntax of email addresses.
82
 * <p/>
81
 * <p/>
83
 * Does not probe to see if mailserver exists in DNS or online. See MailProber
82
 * Does not probe to see if mailserver exists in DNS or online. See MailProber
84
 * for that. See ValidateEmailFile for an example of how to use this class.
83
 * for that. See ValidateEmailFile for an example of how to use this class.
85
 *
84
 *
86
 * @author Roedy Green, Canadian Mind Products
85
 * @author Roedy Green, Canadian Mind Products
87
 * @version 1.7 2007-08-21
86
 * @version 1.7 2007-08-21
88
 * @since 2002
87
 * @since 2002
89
 */
88
 */
90
// TODO: @version check validity of & in first part of email address. Appears in
89
// TODO: @version check validity of & in first part of email address. Appears in
91
// practice.
90
// practice.
92
 
91
 
93
public final class EmailSyntaxValidator {
92
public final class EmailSyntaxValidator {
94
        // ------------------------------ CONSTANTS ------------------------------
93
        // ------------------------------ CONSTANTS ------------------------------
95
 
94
 
96
        /**
95
        /**
97
         * True if want extra debugging output.
96
         * True if want extra debugging output.
98
         */
97
         */
99
        // @SuppressWarnings( { "UnusedDeclaration" })
98
        // @SuppressWarnings( { "UnusedDeclaration" })
100
        // private static final boolean DEBUGGING = false;
99
        // private static final boolean DEBUGGING = false;
101
 
100
 
102
        /**
101
        /**
103
         * Country where this program is running.
102
         * Country where this program is running.
104
         */
103
         */
105
        private static final String THIS_COUNTRY = Locale.getDefault().getCountry()
104
        private static final String THIS_COUNTRY = Locale.getDefault().getCountry()
106
                        .toLowerCase();
105
                        .toLowerCase();
107
 
106
 
108
        /**
107
        /**
109
         * Bad top level domains -- ones never valid.
108
         * Bad top level domains -- ones never valid.
110
         */
109
         */
111
        private static final HashSet<String> BAD_TLDS = hmaker(new String[] {
110
        private static final HashSet<String> BAD_TLDS = hmaker(new String[] {
112
                        "invalid", "nowhere", "noone", "test", "example", "localhost", });
111
                        "invalid", "nowhere", "noone", "test", "example", "localhost", });
113
 
112
 
114
        /**
113
        /**
115
         * Top level domains for countries.
114
         * Top level domains for countries.
116
         */
115
         */
117
        private static final HashSet<String> NATIONAL_TLDS = hmaker(new String[] {
116
        private static final HashSet<String> NATIONAL_TLDS = hmaker(new String[] {
118
                        "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq",
117
                        "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq",
119
                        "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be",
118
                        "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be",
120
                        "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt",
119
                        "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt",
121
                        "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck",
120
                        "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck",
122
                        "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de",
121
                        "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de",
123
                        "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et",
122
                        "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et",
124
                        "eu", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gd", "ge",
123
                        "eu", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gd", "ge",
125
                        "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs",
124
                        "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs",
126
                        "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id",
125
                        "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id",
127
                        "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm",
126
                        "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm",
128
                        "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw",
127
                        "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw",
129
                        "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu",
128
                        "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu",
130
                        "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm",
129
                        "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm",
131
                        "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx",
130
                        "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx",
132
                        "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np",
131
                        "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np",
133
                        "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl",
132
                        "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl",
134
                        "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru",
133
                        "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru",
135
                        "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sk", "sl",
134
                        "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sk", "sl",
136
                        "sm", "sn", "sr", "st", "su", "sv", "sy", "sz", "tc", "td", "tf",
135
                        "sm", "sn", "sr", "st", "su", "sv", "sy", "sz", "tc", "td", "tf",
137
                        "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt",
136
                        "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt",
138
                        "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", "vc",
137
                        "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", "vc",
139
                        "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "za", "zm", "zw", });
138
                        "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "za", "zm", "zw", });
140
 
139
 
141
        /**
140
        /**
142
         * Official top level domains.
141
         * Official top level domains.
143
         */
142
         */
144
        private static final HashSet<String> OFFICIAL_TLDS = hmaker(new String[] {
143
        private static final HashSet<String> OFFICIAL_TLDS = hmaker(new String[] {
145
                        "aero", "biz", "coop", "com", "edu", "gov", "info", "mil",
144
                        "aero", "biz", "coop", "com", "edu", "gov", "info", "mil",
146
                        "museum", "name", "net", "org", "pro", "tel", "mobi", "jobs",
145
                        "museum", "name", "net", "org", "pro", "tel", "mobi", "jobs",
147
                        "cat", "arpa", });
146
                        "cat", "arpa", });
148
 
147
 
149
        /**
148
        /**
150
         * Rarely used top level domains
149
         * Rarely used top level domains
151
         */
150
         */
152
        private static final HashSet<String> RARE_TLDS = hmaker(new String[] {
151
        private static final HashSet<String> RARE_TLDS = hmaker(new String[] {
153
                        "cam", "mp3", "agent", "art", "arts", "asia", "auction", "aus",
152
                        "cam", "mp3", "agent", "art", "arts", "asia", "auction", "aus",
154
                        "bank", "cam", "chat", "church", "club", "corp", "dds", "design",
153
                        "bank", "cam", "chat", "church", "club", "corp", "dds", "design",
155
                        "dns2go", "e", "email", "exp", "fam", "family", "faq", "fed",
154
                        "dns2go", "e", "email", "exp", "fam", "family", "faq", "fed",
156
                        "film", "firm", "free", "fun", "g", "game", "games", "gay", "ger",
155
                        "film", "firm", "free", "fun", "g", "game", "games", "gay", "ger",
157
                        "globe", "gmbh", "golf", "gov", "help", "hola", "i", "inc", "int",
156
                        "globe", "gmbh", "golf", "gov", "help", "hola", "i", "inc", "int",
158
                        "jpn", "k12", "kids", "law", "learn", "llb", "llc", "llp", "lnx",
157
                        "jpn", "k12", "kids", "law", "learn", "llb", "llc", "llp", "lnx",
159
                        "love", "ltd", "mag", "mail", "med", "media", "mp3", "netz", "nic",
158
                        "love", "ltd", "mag", "mail", "med", "media", "mp3", "netz", "nic",
160
                        "nom", "npo", "per", "pol", "prices", "radio", "rsc", "school",
159
                        "nom", "npo", "per", "pol", "prices", "radio", "rsc", "school",
161
                        "scifi", "sea", "service", "sex", "shop", "sky", "soc", "space",
160
                        "scifi", "sea", "service", "sex", "shop", "sky", "soc", "space",
162
                        "sport", "tech", "tour", "travel", "usvi", "video", "web", "wine",
161
                        "sport", "tech", "tour", "travel", "usvi", "video", "web", "wine",
163
                        "wir", "wired", "zine", "zoo", });
162
                        "wir", "wired", "zine", "zoo", });
164
 
163
 
165
        /**
164
        /**
166
         * regex to allow dots anywhere, but not at start of domain name, no +
165
         * regex to allow dots anywhere, but not at start of domain name, no +
167
         */
166
         */
168
        private static final Pattern p3 = Pattern
167
        private static final Pattern p3 = Pattern
169
                        .compile("[a-z0-9\\-_\\.]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++");
168
                        .compile("[a-z0-9\\-_\\.]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++");
170
 
169
 
171
        /**
170
        /**
172
         * regex IP style names, no +
171
         * regex IP style names, no +
173
         */
172
         */
174
        private static final Pattern p4 = Pattern
173
        private static final Pattern p4 = Pattern
175
                        .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\]");
174
                        .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\]");
176
 
175
 
177
        /**
176
        /**
178
         * regex to allow - _ dots in name, no +
177
         * regex to allow - _ dots in name, no +
179
         */
178
         */
180
        private static final Pattern p5 = Pattern
179
        private static final Pattern p5 = Pattern
181
                        .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++");
180
                        .compile("[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)*@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++");
182
 
181
 
183
        /**
182
        /**
184
         * regex to allow _ - in name, lead and trailing ones are filtered later, no
183
         * regex to allow _ - in name, lead and trailing ones are filtered later, no
185
         * +.
184
         * +.
186
         */
185
         */
187
        private static final Pattern p9 = Pattern
186
        private static final Pattern p9 = Pattern
188
                        .compile("[a-z0-9\\-_]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++");
187
                        .compile("[a-z0-9\\-_]++@[a-z0-9\\-_]++(\\.[a-z0-9\\-_]++)++");
189
 
188
 
190
        /**
189
        /**
191
         * regex to split into fields
190
         * regex to split into fields
192
         */
191
         */
193
        private static final Pattern splitter = Pattern.compile("[@\\.]");
192
        private static final Pattern splitter = Pattern.compile("[@\\.]");
194
 
193
 
195
        // -------------------------- PUBLIC STATIC METHODS
194
        // -------------------------- PUBLIC STATIC METHODS
196
        // --------------------------
195
        // --------------------------
197
 
196
 
198
        /**
197
        /**
199
         * Check how likely an email address is to be valid. The higher the number
198
         * Check how likely an email address is to be valid. The higher the number
200
         * returned, the more likely the address is valid. This method does not
199
         * returned, the more likely the address is valid. This method does not
201
         * probe the internet in any way to see if the corresponding mail server or
200
         * probe the internet in any way to see if the corresponding mail server or
202
         * domain exists.
201
         * domain exists.
203
         *
202
         *
204
         * @param email
203
         * @param email
205
         *            bare computer email address. e.g. roedyg@mindprod.com No
204
         *            bare computer email address. e.g. roedyg@mindprod.com No
206
         *            "Roedy Green" <roedyg@mindprod.com> style addresses. No local
205
         *            "Roedy Green" <roedyg@mindprod.com> style addresses. No local
207
         *            addresses, e.g. roedy.
206
         *            addresses, e.g. roedy.
208
         *
207
         *
209
         * @return <ul>
208
         * @return <ul>
210
         *         <li>0 = email address is definitely malformed, e.g. missing
209
         *         <li>0 = email address is definitely malformed, e.g. missing
211
         * @. ends in .invalid</li> <li>1 = address does not meet one of the valid
210
         * @. ends in .invalid</li> <li>1 = address does not meet one of the valid
212
         *    patterns below. It still might be ok according to some obscure rule in
211
         *    patterns below. It still might be ok according to some obscure rule in
213
         *    RFC 822 Java InternetAddress accepts it as valid.</li> <li>2 = unknown
212
         *    RFC 822 Java InternetAddress accepts it as valid.</li> <li>2 = unknown
214
         *    top level domain.</li> <li>3 = dots at beginning or end, doubled in
213
         *    top level domain.</li> <li>3 = dots at beginning or end, doubled in
215
         *    name.</li> <li>4 = address of form xxx@[209.139.205.2] using IP</li>
214
         *    name.</li> <li>4 = address of form xxx@[209.139.205.2] using IP</li>
216
         *    <li>5 = address of form xxx.xxx.xxx@xxx.xxx.xxx Dots _ or - in first
215
         *    <li>5 = address of form xxx.xxx.xxx@xxx.xxx.xxx Dots _ or - in first
217
         *    part of name</li> <li>6 = addreess of form xxx@xxx.xxx.xxx rare, but
216
         *    part of name</li> <li>6 = addreess of form xxx@xxx.xxx.xxx rare, but
218
         *    known, domain</li> <li>7 = address of form xxx@xxx.xxx.ca or any
217
         *    known, domain</li> <li>7 = address of form xxx@xxx.xxx.ca or any
219
         *    national suffix.</li> <li>8 = address of form xxx@xxx.xxx.xx the
218
         *    national suffix.</li> <li>8 = address of form xxx@xxx.xxx.xx the
220
         *    matching this national suffix, e.g. .ca in Canada, .de in Germany</li>
219
         *    matching this national suffix, e.g. .ca in Canada, .de in Germany</li>
221
         *    <li>9 = address of form xxx@xxx.xxx.com .org .net .edu .gov .biz --
220
         *    <li>9 = address of form xxx@xxx.xxx.com .org .net .edu .gov .biz --
222
         *    official domains</li>
221
         *    official domains</li>
223
         *    </ul>
222
         *    </ul>
224
         */
223
         */
225
        public static int howValid(String email) {
224
        public static int howValid(String email) {
226
                if (email == null) {
225
                if (email == null) {
227
                        return 0;
226
                        return 0;
228
                }
227
                }
229
                email = email.trim().toLowerCase();
228
                email = email.trim().toLowerCase();
230
                int dotPlace = email.lastIndexOf('.');
229
                int dotPlace = email.lastIndexOf('.');
231
                if (0 < dotPlace && dotPlace < email.length() - 1) {
230
                if (0 < dotPlace && dotPlace < email.length() - 1) {
232
                        String tld = email.substring(dotPlace + 1);
231
                        String tld = email.substring(dotPlace + 1);
233
                        if (BAD_TLDS.contains(tld)) {
232
                        if (BAD_TLDS.contains(tld)) {
234
                                /* deliberate invalid address */
233
                                /* deliberate invalid address */
235
                                return 0;
234
                                return 0;
236
                        }
235
                        }
237
                        // make sure none of fragments start or end in _ or -
236
                        // make sure none of fragments start or end in _ or -
238
                        String[] fragments = splitter.split(email);
237
                        String[] fragments = splitter.split(email);
239
                        boolean clean = true;
238
                        boolean clean = true;
240
                        for (String fragment : fragments) {
239
                        for (String fragment : fragments) {
241
                                if (fragment.startsWith("_") || fragment.endsWith("_")
240
                                if (fragment.startsWith("_") || fragment.endsWith("_")
242
                                                || fragment.startsWith("-") || fragment.endsWith("-")) {
241
                                                || fragment.startsWith("-") || fragment.endsWith("-")) {
243
                                        clean = false;
242
                                        clean = false;
244
                                        break;
243
                                        break;
245
                                }
244
                                }
246
                        }// end for
245
                        }// end for
247
                        if (clean) {
246
                        if (clean) {
248
                                Matcher m9 = p9.matcher(email);
247
                                Matcher m9 = p9.matcher(email);
249
                                if (m9.matches()) {
248
                                if (m9.matches()) {
250
                                        if (OFFICIAL_TLDS.contains(tld)) {
249
                                        if (OFFICIAL_TLDS.contains(tld)) {
251
                                                return 9;
250
                                                return 9;
252
                                        } else if (THIS_COUNTRY.equals(tld)) {
251
                                        } else if (THIS_COUNTRY.equals(tld)) {
253
                                                return 8;
252
                                                return 8;
254
                                        } else if (NATIONAL_TLDS.contains(tld)) {
253
                                        } else if (NATIONAL_TLDS.contains(tld)) {
255
                                                return 7;
254
                                                return 7;
256
                                        } else if (RARE_TLDS.contains(tld)) {
255
                                        } else if (RARE_TLDS.contains(tld)) {
257
                                                return 6;
256
                                                return 6;
258
                                        } else {
257
                                        } else {
259
                                                // TODO: Why is that 3 and not 2?
258
                                                // TODO: Why is that 3 and not 2?
260
                                                return 3;/* unknown tld */
259
                                                return 3;/* unknown tld */
261
                                        }
260
                                        }
262
                                }
261
                                }
263
                                // allow dots in name
262
                                // allow dots in name
264
                                Matcher m5 = p5.matcher(email);
263
                                Matcher m5 = p5.matcher(email);
265
                                if (m5.matches()) {
264
                                if (m5.matches()) {
266
                                        if (OFFICIAL_TLDS.contains(tld)) {
265
                                        if (OFFICIAL_TLDS.contains(tld)) {
267
                                                return 5;
266
                                                return 5;
268
                                        } else if (THIS_COUNTRY.equals(tld)) {
267
                                        } else if (THIS_COUNTRY.equals(tld)) {
269
                                                return 5;
268
                                                return 5;
270
                                        } else if (NATIONAL_TLDS.contains(tld)) {
269
                                        } else if (NATIONAL_TLDS.contains(tld)) {
271
                                                return 5;
270
                                                return 5;
272
                                        } else if (RARE_TLDS.contains(tld)) {
271
                                        } else if (RARE_TLDS.contains(tld)) {
273
                                                return 5;
272
                                                return 5;
274
                                        } else {
273
                                        } else {
275
                                                return 2;/* unknown tld */
274
                                                return 2;/* unknown tld */
276
                                        }
275
                                        }
277
                                }
276
                                }
278
 
277
 
279
                                // IP
278
                                // IP
280
                                Matcher m4 = p4.matcher(email);
279
                                Matcher m4 = p4.matcher(email);
281
                                if (m4.matches()) {
280
                                if (m4.matches()) {
282
                                        return 4;/* can't tell TLD */
281
                                        return 4;/* can't tell TLD */
283
                                }
282
                                }
284
 
283
 
285
                                // allow even lead/trail dots in name, except at start of domain
284
                                // allow even lead/trail dots in name, except at start of domain
286
                                Matcher m3 = p3.matcher(email);
285
                                Matcher m3 = p3.matcher(email);
287
                                if (m3.matches()) {
286
                                if (m3.matches()) {
288
                                        if (OFFICIAL_TLDS.contains(tld)) {
287
                                        if (OFFICIAL_TLDS.contains(tld)) {
289
                                                return 3;
288
                                                return 3;
290
                                        } else if (THIS_COUNTRY.equals(tld)) {
289
                                        } else if (THIS_COUNTRY.equals(tld)) {
291
                                                return 3;
290
                                                return 3;
292
                                        } else if (NATIONAL_TLDS.contains(tld)) {
291
                                        } else if (NATIONAL_TLDS.contains(tld)) {
293
                                                return 3;
292
                                                return 3;
294
                                        } else if (RARE_TLDS.contains(tld)) {
293
                                        } else if (RARE_TLDS.contains(tld)) {
295
                                                return 3;
294
                                                return 3;
296
                                        } else {
295
                                        } else {
297
                                                return 2;/* unknown domain */
296
                                                return 2;/* unknown domain */
298
                                        }
297
                                        }
299
                                }
298
                                }
300
                        }// end if clean
299
                        }// end if clean
301
                }
300
                }
302
                // allow even unclean addresses, and addresses without a TLD to have a
301
                // allow even unclean addresses, and addresses without a TLD to have a
303
                // whack at passing RFC:822
302
                // whack at passing RFC:822
304
                try {
303
                try {
305
                        /*
304
                        /*
306
                         * see if InternetAddress likes it, it follows RFC:822. It will
305
                         * see if InternetAddress likes it, it follows RFC:822. It will
307
                         * names without domains though.
306
                         * names without domains though.
308
                         */
307
                         */
309
                        InternetAddress.parse(email, true/* strict */);
308
                        InternetAddress.parse(email, true/* strict */);
310
                        // it liked it, no exception happened. Seems very sloppy.
309
                        // it liked it, no exception happened. Seems very sloppy.
311
                        return 1;
310
                        return 1;
312
                } catch (AddressException e) {
311
                } catch (AddressException e) {
313
                        // it did not like it
312
                        // it did not like it
314
                        return 0;
313
                        return 0;
315
                }
314
                }
316
        }
315
        }
317
 
316
 
318
        // -------------------------- STATIC METHODS --------------------------
317
        // -------------------------- STATIC METHODS --------------------------
319
 
318
 
320
        /**
319
        /**
321
         * build a HashSet from a array of String literals.
320
         * build a HashSet from a array of String literals.
322
         *
321
         *
323
         * @param list
322
         * @param list
324
         *            array of strings
323
         *            array of strings
325
         *
324
         *
326
         * @return HashSet you can use to test if a string is in the set.
325
         * @return HashSet you can use to test if a string is in the set.
327
         */
326
         */
328
        private static HashSet<String> hmaker(String[] list) {
327
        private static HashSet<String> hmaker(String[] list) {
329
                HashSet<String> map = new HashSet<String>(Math.max(
328
                HashSet<String> map = new HashSet<String>(Math.max(
330
                                (int) (list.length / .75f) + 1, 16));
329
                                (int) (list.length / .75f) + 1, 16));
331
                map.addAll(Arrays.asList(list));
330
                map.addAll(Arrays.asList(list));
332
                return map;
331
                return map;
333
        }
332
        }
334
 
333
 
335
        // --------------------------- main() method ---------------------------
334
        // --------------------------- main() method ---------------------------
336
 
335
 
337
        /**
336
        /**
338
         * main debugging harness.
337
         * main debugging harness.
339
         *
338
         *
340
         * @param args
339
         * @param args
341
         *            not used
340
         *            not used
342
         */
341
         */
343
        // public static void main(String[] args) {
342
        // public static void main(String[] args) {
344
        // out.println(howValid("kellizer@.hotmail.com"));
343
        // out.println(howValid("kellizer@.hotmail.com"));
345
        // }
344
        // }
346
}
345
}
347
 
346