Subversion Repositories vgwhois

Rev

Rev 7 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 daniel-mar 1
#!/usr/bin/php
2
<?php
3
 
4
#
11 daniel-mar 5
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
5 daniel-mar 6
#  Maintenance / Developer utilities
2 daniel-mar 7
#
5 daniel-mar 8
#  (c) 2012-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
2 daniel-mar 9
#
5 daniel-mar 10
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
2 daniel-mar 11
#
12
 
13
# TODO: ":notice||Whois server unknown (2014-03-15)" in pattern_newgtld automatisch umwandeln in :whois| wenn etwas bei IANA vorliegt.
14
 
15
error_reporting(E_ALL | E_NOTICE | E_STRICT | E_DEPRECATED);
16
 
4 daniel-mar 17
require_once __DIR__ . '/--/config.inc.phps';
2 daniel-mar 18
require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.phps';
19
require_once __DIR__ . '/../../shared/php_includes/idna_convert.class.php';
20
 
21
# ---
22
 
23
$iana_tld_data = null;
24
 
25
// Step 1:
26
// Check if in the meantime TLDs without an official whois server were updated to have one
27
// Attention/TODO: A change of the whois name still needs manual intervention!
28
 
29
$newgtld_cont_original = file_get_contents(NEWGTLD_PATTERN_FILE);
30
 
31
$newgtld_cont_new = preg_replace_callback(
32
	'@# TODO: Entry generated automatically\. Needs manual check\.\n:notice\|\|Whois server unknown \((.*)\)\n\.(.*)\$\n@imU',
33
	function ($treffer) {
34
		$in_all = $treffer[0];
35
		$in_ts  = $treffer[1];
36
		$in_tld = $treffer[2];
37
 
38
		$days_passed = (time()-strtotime($treffer[1]))/(60*60*24);
39
		if ($days_passed < NEWGTLD_RECHECK_MISSING_WHOIS_SERVERS) {
40
			return $in_all; // leave everything unchanged
41
		}
42
 
43
		$whois_serv = find_rootzone_whois_server($in_tld);
44
 
45
		if (!$whois_serv) {
46
			// Nothing found. Just update last check date.
47
			return str_replace($in_ts, date('Y-m-d'), $in_all);
48
		} else {
49
			// Update the entry
50
			return ":whois|$whois_serv\n.${in_tld}\$\n";
51
		}
52
	},
53
	$newgtld_cont_original
54
);
55
 
56
if ($newgtld_cont_original != $newgtld_cont_new) {
57
	file_put_contents(NEWGTLD_PATTERN_FILE, $newgtld_cont_new);
58
	gwi_update_newgtld_patternfile();
59
}
60
 
61
// Step 2:
62
// Search for new gTLDs which are not in our pattern file
63
 
64
$newgtld_data = gwi_newgtld_get_all_delegated_strings();
65
foreach ($newgtld_data as $data) {
66
	$date        = $data[0];
67
	$string      = $data[1];
68
	$tld         = $data[2];
69
	$tld_uc      = $data[3];
70
	$explanation = $data[4];
71
 
72
	// Is it already in our pattern file?
73
	if (does_exist($tld)) {
74
#		echo "Info: $tld is already in pattern.\n";
75
		continue;
76
	}
77
 
78
	// Only add the TLD to our pattern file if it has also been published by IANA.
79
	// The reason is that the newGTLD page had temporary typos many times (e.g. calogne instead of cologne)
3 daniel-mar 80
	if (!isset($iana_tld_data)) {
81
		$iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR));
82
		$iana_tld_data = array_map('trim', $iana_tld_data);
83
	}
2 daniel-mar 84
	if (!in_array(strtoupper($tld), $iana_tld_data)) {
85
		echo "Info: Will not add $tld, since it is not yet added in IANA's registry.\n";
86
		continue;
87
	}
88
 
89
	$to_append  = "\n";
90
	$to_append .= "# Delegated on $date\n";
91
	if ($explanation) {
92
		$to_append .= "# $explanation\n";
93
	}
94
 
95
	$whois_serv = find_rootzone_whois_server($tld);
96
 
97
	if ($whois_serv) {
98
		$to_append .= ":whois|$whois_serv\n";
99
	} else {
100
		$to_append .= "# TODO: Entry generated automatically. Needs manual check.\n";
101
		$to_append .= ":notice||Whois server unknown (".date('Y-m-d').")\n";
102
	}
103
	$to_append .= "\\.$tld$\n";
104
 
105
	file_put_contents(NEWGTLD_PATTERN_FILE, $to_append, FILE_APPEND);
106
 
107
	gwi_update_newgtld_patternfile();
108
}
109
 
110
 
111
 
112
 
113
 
114
 
115
// TODO:
116
// Step 3: Check if there are IANA TLDs which are not in our pattern files yet
3 daniel-mar 117
if (!isset($iana_tld_data)) {
118
	$iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR));
119
	$iana_tld_data = array_map('trim', $iana_tld_data);
120
}
2 daniel-mar 121
foreach ($iana_tld_data as $tld) {
122
 
123
	if (does_exist($tld)) { # TODO: in allen pattern files schauen
124
		continue;
125
	}
126
 
127
 
128
# echo "Does not exist: $tld\n";
129
continue;
130
 
131
	$newgtld_res = count_newgtld_applications($tld);
132
	if ($newgtld_res === false) {
133
		// TODO: ignore?
134
	} else if ($newgtld_res > 0) {
135
		// TODO: add to newgtld pattern file
136
	} else {
137
		// TODO: add to normal pattern file
138
	}
139
 
140
 
141
}
142
 
143
 
144
 
145
# ------------------------------------------------------
146
 
147
function gwi_update_newgtld_patternfile() {
148
	$now = date('Ymd');
149
 
150
	$pcont_original = file_get_contents(NEWGTLD_PATTERN_FILE);
151
	$pcont = $pcont_original;
152
 
153
	$count = 0;
154
	$pcont = preg_replace("@#: version (\\S+)@i", "#: version $now", $pcont, 1, $count);
155
 
156
	if ($count == 0) {
157
		// Add header
158
		$pcont = "#: version $now\n".
159
		         "# New gTLD\n".
160
		         "# see: http://newgtlds.icann.org/en/program-status/delegated-strings\n".
11 daniel-mar 161
		         "# This file can be updated by running "vgwhois-pattern-update", but it does only ADD new \"New gTLDs\"\n".
2 daniel-mar 162
		         "# --------------------------------------------------------------------\n".
163
		         "\n".$pcont;
164
	}
165
 
166
	if ($pcont != $pcont_original) {
167
		file_put_contents(NEWGTLD_PATTERN_FILE, $pcont);
168
	}
169
}
170
 
171
function find_rootzone_whois_server($tld) {
172
	$whois_serv = iana_get_rootzone_whois_server($tld);
173
 
174
	// Try to find "secret whois servers"
175
	if (TRY_FINDING_HIDDEN_WHOIS_SERVERS) {
176
		// TODO: also try out to use the URL of the homepage (in IANAs root DB)
177
		if (!$whois_serv) {
178
			$check_server = "whois.nic.$tld";
179
			if (gwitc_is_port_open($check_server, 43)) {
180
				$whois_serv = $check_server;
181
			}
182
		}
183
		if (!$whois_serv) {
184
			$check_server = "whois.$tld";
185
			if (gwitc_is_port_open($check_server, 43)) {
186
				$whois_serv = $check_server;
187
			}
188
		}
189
		if (!$whois_serv) {
190
			$check_server = "$tld";
191
			if (gwitc_is_port_open($check_server, 43)) {
192
				$whois_serv = $check_server;
193
			}
194
		}
195
	}
196
 
197
	return $whois_serv;
198
}
199
 
200
function iana_get_rootzone_whois_server($tld) {
201
	$tld  = strtolower($tld);
202
	$cont = QueryWhoisServer('whois.iana.org', $tld);
203
	if (!preg_match('@whois:\\s*(\\S+)@i', $cont, $m)) return false;
204
	return $m[1];
205
}
206
 
207
function does_exist($tld) {
208
	$cont = file_get_contents(NEWGTLD_PATTERN_FILE);
209
	$tld  = strtolower($tld);
210
	$cont = strtolower($cont);
211
	return (strpos($cont, "\n\\.$tld\$\n") !== false);
212
}
213
 
214
function gwi_newgtld_get_all_delegated_strings() {
215
	$cont = file_get_contents('http://newgtlds.icann.org/en/program-status/delegated-strings');
216
 
217
	// Convert Unicode stuff
218
	$cont = str_replace('xn'.unichr(0x2013), 'xn--', $cont);
219
	$cont = str_replace('xn'.unichr(0x2015), 'xn--', $cont); // used in Samsung TLD
220
	$cont = str_replace(unichr(0x2013), '-', $cont); // used in most explanations
221
	$cont = str_replace(unichr(0x2015), '-', $cont);
222
	$cont = str_replace(unichr(0x00fc), 'ue', $cont); // German umlaut ue (used in .koeln)
223
	$cont = utf8_decode($cont);
224
 
225
	// Do some minor corrections
226
	$cont = str_replace('game (s)', 'game(s)', $cont);
227
 
228
	$cont = explode('STRING</th>', $cont, 2);
229
	$cont = $cont[1];
230
 
231
	preg_match_all('@<tr>\s*<td[^>]*>(.*)</td>\s*<td[^>]*>(.*)</td>\s*</tr>@ismU', $cont, $m, PREG_SET_ORDER);
232
 
233
	$m = array_reverse($m);
234
 
235
	$out = array();
236
	foreach ($m as $data) {
237
		$date     = html_entity_decode(strip_tags($data[1]));
238
		$string   = html_entity_decode(strip_tags($data[2]));
239
 
240
		$string = str_replace('(', ' (', $string);
241
		while (strpos($string, '  ') !== false) $string = str_replace('  ', ' ', $string);
242
 
243
		# Fixing some misplaced white spaces
244
		$string = preg_replace('@\.\s+@m', '.', $string);
245
		$string = preg_replace('@\(\s+@m', '(', $string);
246
		$string = preg_replace('@\s+\)@m', ')', $string);
247
 
248
		$ary = explode(' ', $string, 3);
249
		if (count($ary) > 1) {
250
			$tld_uc      = trim($ary[0]); // Unicode TLD
251
 
252
			$tld         = trim($ary[1]); // Punycode TLD
253
			$tld         = substr($tld, 1, strlen($tld)-2);
254
 
255
			$explanation = trim($ary[2]);
256
			$explanation = substr($explanation, 1); // remove "-"
257
			$explanation = trim($explanation);
258
		} else {
259
			$tld         = trim(strtolower($string));
260
			$tld_uc      = $tld;
261
			$explanation = '';
262
		}
263
 
264
		$out[] = array($date, $string, $tld, $tld_uc, $explanation);
265
	}
266
 
267
	return $out;
268
}
269
 
270
/**
271
 * Return unicode char by its code
272
 *
273
 * @param int $u
274
 * @return char
275
 * @source http://www.php.net/manual/de/function.chr.php#88611
276
 */
277
function unichr($u) {
278
	return mb_convert_encoding('&#' . intval($u) . ';', 'UTF-8', 'HTML-ENTITIES');
279
}
280
 
281
function count_newgtld_applications($string) {
282
	#global $punycoder;
283
 
284
	#if (is_null($punycoder))
285
	$punycoder = new idna_convert();
286
	$ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0';
287
 
288
	if (($x = $punycoder->decode(strtolower($string))) !== false) $string = $x;
289
 
290
	$out = array();
291
	exec('curl -i -s https://gtldresult.icann.org/application-result/applicationstatus/viewstatus -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua), $out, $code);
292
	if ($code != 0) return false;
293
	$html = implode("\n", $out);
294
 
295
	if (!preg_match('@JSESSIONID=(.+);@ismU', $html, $m)) return false;
296
	$jsessionid = $m[1];
297
 
298
	if (!preg_match('@<input value="([^"]+)" name="t:formdata"@ismU', $html, $m)) return false;
299
	$formdata = $m[1];
300
	$formdata = str_replace('+', '%2B', $formdata);
301
	$formdata = str_replace('/', '%2F', $formdata);
302
 
303
	$out = array();
304
	exec('curl -s "https://gtldresult.icann.org/application-result/applicationstatus/viewstatus.applicationstatusform" -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua).' -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: de,en-US;q=0.7,en;q=0.3"  -H "Referer: https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H '.escapeshellarg('Cookie: JSESSIONID='.$jsessionid).' -H "Connection: keep-alive" --data '.escapeshellarg('t%3Aformdata='.$formdata.'&t%3Asubmit=%5B%22searchButton%22%2C%22searchButton%22%5D&searchField='.$string.'&searchButton=Search&status=&updates=&objections=&gacew=&similar=&pic='), $out, $code);
305
	if ($code != 0) return false;
306
 
307
	$out = array();
308
	exec('curl -s "https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua).' -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: de,en-US;q=0.7,en;q=0.3"  -H "Referer: https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H '.escapeshellarg('Cookie: JSESSIONID='.$jsessionid).' -H "DNT: 1" -H "Connection: keep-alive"', $out, $code);
309
	if ($code != 0) return false;
310
 
311
	$html = implode("\n", $out);
312
 
313
	preg_match_all('@href="/application-result/applicationstatus/viewstatus:viewapplicationdetails/(\d+)">'.preg_quote($string, '@').'</a>@ismU', $html, $m);
314
 
315
	# DEBUG
316
	echo "$string = ".count($m[1])."\n";
317
 
318
	# return (count($m[1]) > 0);
319
	return count($m[1]);
320
}
321
 
322
#assert(count_newgtld_applications('shopping') == 2);