Rev 7 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | daniel-mar | 1 | #!/usr/bin/php |
2 | <?php |
||
3 | |||
4 | # |
||
11 | daniel-mar | 5 | # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois) |
5 | daniel-mar | 6 | # Maintenance / Developer utilities |
2 | daniel-mar | 7 | # |
5 | daniel-mar | 8 | # (c) 2012-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
2 | daniel-mar | 9 | # |
5 | daniel-mar | 10 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
2 | daniel-mar | 11 | # |
12 | |||
13 | # TODO: ":notice||Whois server unknown (2014-03-15)" in pattern_newgtld automatisch umwandeln in :whois| wenn etwas bei IANA vorliegt. |
||
14 | |||
15 | error_reporting(E_ALL | E_NOTICE | E_STRICT | E_DEPRECATED); |
||
16 | |||
4 | daniel-mar | 17 | require_once __DIR__ . '/--/config.inc.phps'; |
2 | daniel-mar | 18 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.phps'; |
19 | require_once __DIR__ . '/../../shared/php_includes/idna_convert.class.php'; |
||
20 | |||
21 | # --- |
||
22 | |||
23 | $iana_tld_data = null; |
||
24 | |||
25 | // Step 1: |
||
26 | // Check if in the meantime TLDs without an official whois server were updated to have one |
||
27 | // Attention/TODO: A change of the whois name still needs manual intervention! |
||
28 | |||
29 | $newgtld_cont_original = file_get_contents(NEWGTLD_PATTERN_FILE); |
||
30 | |||
31 | $newgtld_cont_new = preg_replace_callback( |
||
32 | '@# TODO: Entry generated automatically\. Needs manual check\.\n:notice\|\|Whois server unknown \((.*)\)\n\.(.*)\$\n@imU', |
||
33 | function ($treffer) { |
||
34 | $in_all = $treffer[0]; |
||
35 | $in_ts = $treffer[1]; |
||
36 | $in_tld = $treffer[2]; |
||
37 | |||
38 | $days_passed = (time()-strtotime($treffer[1]))/(60*60*24); |
||
39 | if ($days_passed < NEWGTLD_RECHECK_MISSING_WHOIS_SERVERS) { |
||
40 | return $in_all; // leave everything unchanged |
||
41 | } |
||
42 | |||
43 | $whois_serv = find_rootzone_whois_server($in_tld); |
||
44 | |||
45 | if (!$whois_serv) { |
||
46 | // Nothing found. Just update last check date. |
||
47 | return str_replace($in_ts, date('Y-m-d'), $in_all); |
||
48 | } else { |
||
49 | // Update the entry |
||
50 | return ":whois|$whois_serv\n.${in_tld}\$\n"; |
||
51 | } |
||
52 | }, |
||
53 | $newgtld_cont_original |
||
54 | ); |
||
55 | |||
56 | if ($newgtld_cont_original != $newgtld_cont_new) { |
||
57 | file_put_contents(NEWGTLD_PATTERN_FILE, $newgtld_cont_new); |
||
58 | gwi_update_newgtld_patternfile(); |
||
59 | } |
||
60 | |||
61 | // Step 2: |
||
62 | // Search for new gTLDs which are not in our pattern file |
||
63 | |||
64 | $newgtld_data = gwi_newgtld_get_all_delegated_strings(); |
||
65 | foreach ($newgtld_data as $data) { |
||
66 | $date = $data[0]; |
||
67 | $string = $data[1]; |
||
68 | $tld = $data[2]; |
||
69 | $tld_uc = $data[3]; |
||
70 | $explanation = $data[4]; |
||
71 | |||
72 | // Is it already in our pattern file? |
||
73 | if (does_exist($tld)) { |
||
74 | # echo "Info: $tld is already in pattern.\n"; |
||
75 | continue; |
||
76 | } |
||
77 | |||
78 | // Only add the TLD to our pattern file if it has also been published by IANA. |
||
79 | // The reason is that the newGTLD page had temporary typos many times (e.g. calogne instead of cologne) |
||
3 | daniel-mar | 80 | if (!isset($iana_tld_data)) { |
81 | $iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR)); |
||
82 | $iana_tld_data = array_map('trim', $iana_tld_data); |
||
83 | } |
||
2 | daniel-mar | 84 | if (!in_array(strtoupper($tld), $iana_tld_data)) { |
85 | echo "Info: Will not add $tld, since it is not yet added in IANA's registry.\n"; |
||
86 | continue; |
||
87 | } |
||
88 | |||
89 | $to_append = "\n"; |
||
90 | $to_append .= "# Delegated on $date\n"; |
||
91 | if ($explanation) { |
||
92 | $to_append .= "# $explanation\n"; |
||
93 | } |
||
94 | |||
95 | $whois_serv = find_rootzone_whois_server($tld); |
||
96 | |||
97 | if ($whois_serv) { |
||
98 | $to_append .= ":whois|$whois_serv\n"; |
||
99 | } else { |
||
100 | $to_append .= "# TODO: Entry generated automatically. Needs manual check.\n"; |
||
101 | $to_append .= ":notice||Whois server unknown (".date('Y-m-d').")\n"; |
||
102 | } |
||
103 | $to_append .= "\\.$tld$\n"; |
||
104 | |||
105 | file_put_contents(NEWGTLD_PATTERN_FILE, $to_append, FILE_APPEND); |
||
106 | |||
107 | gwi_update_newgtld_patternfile(); |
||
108 | } |
||
109 | |||
110 | |||
111 | |||
112 | |||
113 | |||
114 | |||
115 | // TODO: |
||
116 | // Step 3: Check if there are IANA TLDs which are not in our pattern files yet |
||
3 | daniel-mar | 117 | if (!isset($iana_tld_data)) { |
118 | $iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR)); |
||
119 | $iana_tld_data = array_map('trim', $iana_tld_data); |
||
120 | } |
||
2 | daniel-mar | 121 | foreach ($iana_tld_data as $tld) { |
122 | |||
123 | if (does_exist($tld)) { # TODO: in allen pattern files schauen |
||
124 | continue; |
||
125 | } |
||
126 | |||
127 | |||
128 | # echo "Does not exist: $tld\n"; |
||
129 | continue; |
||
130 | |||
131 | $newgtld_res = count_newgtld_applications($tld); |
||
132 | if ($newgtld_res === false) { |
||
133 | // TODO: ignore? |
||
134 | } else if ($newgtld_res > 0) { |
||
135 | // TODO: add to newgtld pattern file |
||
136 | } else { |
||
137 | // TODO: add to normal pattern file |
||
138 | } |
||
139 | |||
140 | |||
141 | } |
||
142 | |||
143 | |||
144 | |||
145 | # ------------------------------------------------------ |
||
146 | |||
147 | function gwi_update_newgtld_patternfile() { |
||
148 | $now = date('Ymd'); |
||
149 | |||
150 | $pcont_original = file_get_contents(NEWGTLD_PATTERN_FILE); |
||
151 | $pcont = $pcont_original; |
||
152 | |||
153 | $count = 0; |
||
154 | $pcont = preg_replace("@#: version (\\S+)@i", "#: version $now", $pcont, 1, $count); |
||
155 | |||
156 | if ($count == 0) { |
||
157 | // Add header |
||
158 | $pcont = "#: version $now\n". |
||
159 | "# New gTLD\n". |
||
160 | "# see: http://newgtlds.icann.org/en/program-status/delegated-strings\n". |
||
11 | daniel-mar | 161 | "# This file can be updated by running "vgwhois-pattern-update", but it does only ADD new \"New gTLDs\"\n". |
2 | daniel-mar | 162 | "# --------------------------------------------------------------------\n". |
163 | "\n".$pcont; |
||
164 | } |
||
165 | |||
166 | if ($pcont != $pcont_original) { |
||
167 | file_put_contents(NEWGTLD_PATTERN_FILE, $pcont); |
||
168 | } |
||
169 | } |
||
170 | |||
171 | function find_rootzone_whois_server($tld) { |
||
172 | $whois_serv = iana_get_rootzone_whois_server($tld); |
||
173 | |||
174 | // Try to find "secret whois servers" |
||
175 | if (TRY_FINDING_HIDDEN_WHOIS_SERVERS) { |
||
176 | // TODO: also try out to use the URL of the homepage (in IANAs root DB) |
||
177 | if (!$whois_serv) { |
||
178 | $check_server = "whois.nic.$tld"; |
||
179 | if (gwitc_is_port_open($check_server, 43)) { |
||
180 | $whois_serv = $check_server; |
||
181 | } |
||
182 | } |
||
183 | if (!$whois_serv) { |
||
184 | $check_server = "whois.$tld"; |
||
185 | if (gwitc_is_port_open($check_server, 43)) { |
||
186 | $whois_serv = $check_server; |
||
187 | } |
||
188 | } |
||
189 | if (!$whois_serv) { |
||
190 | $check_server = "$tld"; |
||
191 | if (gwitc_is_port_open($check_server, 43)) { |
||
192 | $whois_serv = $check_server; |
||
193 | } |
||
194 | } |
||
195 | } |
||
196 | |||
197 | return $whois_serv; |
||
198 | } |
||
199 | |||
200 | function iana_get_rootzone_whois_server($tld) { |
||
201 | $tld = strtolower($tld); |
||
202 | $cont = QueryWhoisServer('whois.iana.org', $tld); |
||
203 | if (!preg_match('@whois:\\s*(\\S+)@i', $cont, $m)) return false; |
||
204 | return $m[1]; |
||
205 | } |
||
206 | |||
207 | function does_exist($tld) { |
||
208 | $cont = file_get_contents(NEWGTLD_PATTERN_FILE); |
||
209 | $tld = strtolower($tld); |
||
210 | $cont = strtolower($cont); |
||
211 | return (strpos($cont, "\n\\.$tld\$\n") !== false); |
||
212 | } |
||
213 | |||
214 | function gwi_newgtld_get_all_delegated_strings() { |
||
215 | $cont = file_get_contents('http://newgtlds.icann.org/en/program-status/delegated-strings'); |
||
216 | |||
217 | // Convert Unicode stuff |
||
218 | $cont = str_replace('xn'.unichr(0x2013), 'xn--', $cont); |
||
219 | $cont = str_replace('xn'.unichr(0x2015), 'xn--', $cont); // used in Samsung TLD |
||
220 | $cont = str_replace(unichr(0x2013), '-', $cont); // used in most explanations |
||
221 | $cont = str_replace(unichr(0x2015), '-', $cont); |
||
222 | $cont = str_replace(unichr(0x00fc), 'ue', $cont); // German umlaut ue (used in .koeln) |
||
223 | $cont = utf8_decode($cont); |
||
224 | |||
225 | // Do some minor corrections |
||
226 | $cont = str_replace('game (s)', 'game(s)', $cont); |
||
227 | |||
228 | $cont = explode('STRING</th>', $cont, 2); |
||
229 | $cont = $cont[1]; |
||
230 | |||
231 | preg_match_all('@<tr>\s*<td[^>]*>(.*)</td>\s*<td[^>]*>(.*)</td>\s*</tr>@ismU', $cont, $m, PREG_SET_ORDER); |
||
232 | |||
233 | $m = array_reverse($m); |
||
234 | |||
235 | $out = array(); |
||
236 | foreach ($m as $data) { |
||
237 | $date = html_entity_decode(strip_tags($data[1])); |
||
238 | $string = html_entity_decode(strip_tags($data[2])); |
||
239 | |||
240 | $string = str_replace('(', ' (', $string); |
||
241 | while (strpos($string, ' ') !== false) $string = str_replace(' ', ' ', $string); |
||
242 | |||
243 | # Fixing some misplaced white spaces |
||
244 | $string = preg_replace('@\.\s+@m', '.', $string); |
||
245 | $string = preg_replace('@\(\s+@m', '(', $string); |
||
246 | $string = preg_replace('@\s+\)@m', ')', $string); |
||
247 | |||
248 | $ary = explode(' ', $string, 3); |
||
249 | if (count($ary) > 1) { |
||
250 | $tld_uc = trim($ary[0]); // Unicode TLD |
||
251 | |||
252 | $tld = trim($ary[1]); // Punycode TLD |
||
253 | $tld = substr($tld, 1, strlen($tld)-2); |
||
254 | |||
255 | $explanation = trim($ary[2]); |
||
256 | $explanation = substr($explanation, 1); // remove "-" |
||
257 | $explanation = trim($explanation); |
||
258 | } else { |
||
259 | $tld = trim(strtolower($string)); |
||
260 | $tld_uc = $tld; |
||
261 | $explanation = ''; |
||
262 | } |
||
263 | |||
264 | $out[] = array($date, $string, $tld, $tld_uc, $explanation); |
||
265 | } |
||
266 | |||
267 | return $out; |
||
268 | } |
||
269 | |||
270 | /** |
||
271 | * Return unicode char by its code |
||
272 | * |
||
273 | * @param int $u |
||
274 | * @return char |
||
275 | * @source http://www.php.net/manual/de/function.chr.php#88611 |
||
276 | */ |
||
277 | function unichr($u) { |
||
278 | return mb_convert_encoding('&#' . intval($u) . ';', 'UTF-8', 'HTML-ENTITIES'); |
||
279 | } |
||
280 | |||
281 | function count_newgtld_applications($string) { |
||
282 | #global $punycoder; |
||
283 | |||
284 | #if (is_null($punycoder)) |
||
285 | $punycoder = new idna_convert(); |
||
286 | $ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'; |
||
287 | |||
288 | if (($x = $punycoder->decode(strtolower($string))) !== false) $string = $x; |
||
289 | |||
290 | $out = array(); |
||
291 | exec('curl -i -s https://gtldresult.icann.org/application-result/applicationstatus/viewstatus -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua), $out, $code); |
||
292 | if ($code != 0) return false; |
||
293 | $html = implode("\n", $out); |
||
294 | |||
295 | if (!preg_match('@JSESSIONID=(.+);@ismU', $html, $m)) return false; |
||
296 | $jsessionid = $m[1]; |
||
297 | |||
298 | if (!preg_match('@<input value="([^"]+)" name="t:formdata"@ismU', $html, $m)) return false; |
||
299 | $formdata = $m[1]; |
||
300 | $formdata = str_replace('+', '%2B', $formdata); |
||
301 | $formdata = str_replace('/', '%2F', $formdata); |
||
302 | |||
303 | $out = array(); |
||
304 | exec('curl -s "https://gtldresult.icann.org/application-result/applicationstatus/viewstatus.applicationstatusform" -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua).' -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: de,en-US;q=0.7,en;q=0.3" -H "Referer: https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H '.escapeshellarg('Cookie: JSESSIONID='.$jsessionid).' -H "Connection: keep-alive" --data '.escapeshellarg('t%3Aformdata='.$formdata.'&t%3Asubmit=%5B%22searchButton%22%2C%22searchButton%22%5D&searchField='.$string.'&searchButton=Search&status=&updates=&objections=&gacew=&similar=&pic='), $out, $code); |
||
305 | if ($code != 0) return false; |
||
306 | |||
307 | $out = array(); |
||
308 | exec('curl -s "https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua).' -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: de,en-US;q=0.7,en;q=0.3" -H "Referer: https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H '.escapeshellarg('Cookie: JSESSIONID='.$jsessionid).' -H "DNT: 1" -H "Connection: keep-alive"', $out, $code); |
||
309 | if ($code != 0) return false; |
||
310 | |||
311 | $html = implode("\n", $out); |
||
312 | |||
313 | preg_match_all('@href="/application-result/applicationstatus/viewstatus:viewapplicationdetails/(\d+)">'.preg_quote($string, '@').'</a>@ismU', $html, $m); |
||
314 | |||
315 | # DEBUG |
||
316 | echo "$string = ".count($m[1])."\n"; |
||
317 | |||
318 | # return (count($m[1]) > 0); |
||
319 | return count($m[1]); |
||
320 | } |
||
321 | |||
322 | #assert(count_newgtld_applications('shopping') == 2); |