Subversion Repositories vgwhois

Rev

Rev 2 | Rev 4 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 daniel-mar 1
#!/usr/bin/php
2
<?php
3
 
4
#
5
#  generic Whois - Automatic Pattern Generator: TLDs
6
#
7
#  (c) 2012-2015 Daniel Marschall, ViaThinkSoft [www.viathinksoft.de]
8
#
9
#  Distribution, usage etc. pp. regulated by the current version of GPL.
10
#
11
#
12
#  Version 2015-07-13
13
#
14
 
15
require_once __DIR__ . '/config.inc.php';
16
require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php';
17
 
18
define('PATTERN_DIR', __DIR__ . '/../../main/pattern');
19
define('DOMAINS_PATTERN_FILE', PATTERN_DIR.'/domains');
3 daniel-mar 20
define('CACHE_FILE_DIR', __DIR__ . '/../.cache/cache');
2 daniel-mar 21
 
22
error_reporting(E_ALL | E_NOTICE | E_STRICT | E_DEPRECATED);
23
 
24
$iana_tld_data = null;
25
 
26
// Step 1:
27
// Check if in the meantime TLDs without an official whois server were updated to have one
28
// Attention/TODO: A change of the whois name still needs manual intervention!
29
 
30
$domains_cont_original = file_get_contents(DOMAINS_PATTERN_FILE);
31
 
32
$domains_cont_new = preg_replace_callback(
33
	'@# TODO: Entry generated automatically\. Needs manual check\.\n:notice\|\|Whois server unknown \((.*)\)\n\\\.(.*)\$\n@imU',
34
	function ($treffer) {
35
		$in_all = $treffer[0];
36
		$in_ts  = $treffer[1];
37
		$in_tld = $treffer[2];
38
 
39
		$days_passed = (time()-strtotime($treffer[1]))/(60*60*24);
40
		if ($days_passed < DOMAINS_RECHECK_MISSING_WHOIS_SERVERS) {
41
			return $in_all; // leave everything unchanged
42
		}
43
 
44
		$whois_serv = find_rootzone_whois_server($in_tld);
45
 
46
		if (!$whois_serv) {
47
			// Nothing found. Just update last check date.
48
			return str_replace($in_ts, date('Y-m-d'), $in_all);
49
		} else {
50
			// Update the entry
51
			return ":whois|$whois_serv\n.${in_tld}\$\n";
52
		}
53
	},
54
	$domains_cont_original
55
);
56
 
57
if ($domains_cont_original != $domains_cont_new) {
58
	file_put_contents(DOMAINS_PATTERN_FILE, $domains_cont_new);
59
	gwi_update_domains_patternfile();
60
}
61
 
62
// Step 2:
63
// Search for new TLDs which are not in our pattern file
64
 
3 daniel-mar 65
if (!isset($iana_tld_data)) {
66
	$iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR));
67
	$iana_tld_data = array_map('trim', $iana_tld_data);
68
}
2 daniel-mar 69
 
70
foreach ($iana_tld_data as $tld) {
3 daniel-mar 71
	if ($tld == '') continue;
2 daniel-mar 72
	if ($tld[0] == '#') continue;
73
 
74
	$tld = strtolower($tld);
75
 
76
	if (!does_exist($tld)) {
77
		$whois_serv = find_rootzone_whois_server($tld);
78
 
79
		$to_append = "\n";
80
		if ($whois_serv) {
81
			$to_append .= ":whois|$whois_serv\n";
82
		} else {
83
			$to_append .= "# TODO: Entry generated automatically. Needs manual check.\n";
84
			$to_append .= ":notice||Whois server unknown (".date('Y-m-d').")\n";
85
		}
86
		$to_append .= "\\.$tld$\n";
87
 
88
		file_put_contents(DOMAINS_PATTERN_FILE, $to_append, FILE_APPEND);
89
 
90
		gwi_update_domains_patternfile();
91
 
92
		echo "Added: $tld\n";
93
	}
94
 
95
}
96
 
97
# ------------------------------------------------------
98
 
99
function does_exist($tld) {
100
	$cont = get_united_pattern();
101
	$cont = preg_replace('@#[^\n]*\n@ismU', '', $cont);
102
	$tld  = strtolower($tld);
103
	$cont = strtolower($cont);
104
	$cont = str_replace(array('(', ')'), '', $cont);
105
	return (strpos($cont, "\\.$tld\$\n") !== false);
106
}
107
 
108
function gwi_update_domains_patternfile() {
109
	$now = date('Ymd');
110
 
111
	$pcont_original = file_get_contents(DOMAINS_PATTERN_FILE);
112
	$pcont = $pcont_original;
113
 
114
	$count = 0;
115
	$pcont = preg_replace("@#: version (\\S+)@i", "#: version $now", $pcont, 1, $count);
116
 
117
	if ($count == 0) {
118
		// Add header
119
		$pcont = "#: version $now\n".
120
		         "# Domains\n".
121
		         "# This file can be updated by ".__DIR__."/generate_domains (only additions of new entries)\n".
122
		         "# --------------------------------------------------------------------\n".
123
		         "\n".$pcont;
124
	}
125
 
126
	if ($pcont != $pcont_original) {
127
		file_put_contents(DOMAINS_PATTERN_FILE, $pcont);
128
	}
129
}
130
 
131
function find_rootzone_whois_server($tld) {
132
	$whois_serv = iana_get_rootzone_whois_server($tld);
133
 
134
	// Try to find undocumented whois servers
135
	if (TRY_FINDING_HIDDEN_WHOIS_SERVERS) {
136
		// TODO: also try out to use the URL of the homepage (in IANAs root DB)
137
		if (!$whois_serv) {
138
			$check_server = "whois.nic.$tld";
139
			if (gwitc_is_port_open($check_server, 43)) {
140
				$whois_serv = $check_server;
141
			}
142
		}
143
		if (!$whois_serv) {
144
			$check_server = "whois.$tld";
145
			if (gwitc_is_port_open($check_server, 43)) {
146
				$whois_serv = $check_server;
147
			}
148
		}
149
		if (!$whois_serv) {
150
			$check_server = "$tld";
151
			if (gwitc_is_port_open($check_server, 43)) {
152
				$whois_serv = $check_server;
153
			}
154
		}
155
	}
156
 
157
	return $whois_serv;
158
}
159
 
160
function iana_get_rootzone_whois_server($tld) {
161
	$tld  = strtolower($tld);
162
	$cont = QueryWhoisServer('whois.iana.org', $tld);
163
	if (!preg_match('@whois:\\s*(\\S+)@i', $cont, $m)) return false;
164
	return $m[1];
165
}