Rev 3 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | daniel-mar | 1 | #!/usr/bin/php |
2 | <?php |
||
3 | |||
4 | # |
||
5 | # generic Whois - Automatic Pattern Generator: TLDs |
||
6 | # |
||
7 | # (c) 2012-2015 Daniel Marschall, ViaThinkSoft [www.viathinksoft.de] |
||
8 | # |
||
9 | # Distribution, usage etc. pp. regulated by the current version of GPL. |
||
10 | # |
||
11 | # |
||
12 | # Version 2015-07-13 |
||
13 | # |
||
14 | |||
15 | require_once __DIR__ . '/iana_functions.inc.php'; |
||
16 | require_once __DIR__ . '/config.inc.php'; |
||
17 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php'; |
||
18 | |||
19 | define('PATTERN_DIR', __DIR__ . '/../../main/pattern'); |
||
20 | define('DOMAINS_PATTERN_FILE', PATTERN_DIR.'/domains'); |
||
21 | |||
22 | error_reporting(E_ALL | E_NOTICE | E_STRICT | E_DEPRECATED); |
||
23 | |||
24 | $iana_tld_data = null; |
||
25 | |||
26 | // Step 1: |
||
27 | // Check if in the meantime TLDs without an official whois server were updated to have one |
||
28 | // Attention/TODO: A change of the whois name still needs manual intervention! |
||
29 | |||
30 | $domains_cont_original = file_get_contents(DOMAINS_PATTERN_FILE); |
||
31 | |||
32 | $domains_cont_new = preg_replace_callback( |
||
33 | '@# TODO: Entry generated automatically\. Needs manual check\.\n:notice\|\|Whois server unknown \((.*)\)\n\\\.(.*)\$\n@imU', |
||
34 | function ($treffer) { |
||
35 | $in_all = $treffer[0]; |
||
36 | $in_ts = $treffer[1]; |
||
37 | $in_tld = $treffer[2]; |
||
38 | |||
39 | $days_passed = (time()-strtotime($treffer[1]))/(60*60*24); |
||
40 | if ($days_passed < DOMAINS_RECHECK_MISSING_WHOIS_SERVERS) { |
||
41 | return $in_all; // leave everything unchanged |
||
42 | } |
||
43 | |||
44 | $whois_serv = find_rootzone_whois_server($in_tld); |
||
45 | |||
46 | if (!$whois_serv) { |
||
47 | // Nothing found. Just update last check date. |
||
48 | return str_replace($in_ts, date('Y-m-d'), $in_all); |
||
49 | } else { |
||
50 | // Update the entry |
||
51 | return ":whois|$whois_serv\n.${in_tld}\$\n"; |
||
52 | } |
||
53 | }, |
||
54 | $domains_cont_original |
||
55 | ); |
||
56 | |||
57 | if ($domains_cont_original != $domains_cont_new) { |
||
58 | file_put_contents(DOMAINS_PATTERN_FILE, $domains_cont_new); |
||
59 | gwi_update_domains_patternfile(); |
||
60 | } |
||
61 | |||
62 | // Step 2: |
||
63 | // Search for new TLDs which are not in our pattern file |
||
64 | |||
65 | if (!isset($iana_tld_data)) $iana_tld_data = get_iana_tld_data(); |
||
66 | |||
67 | foreach ($iana_tld_data as $tld) { |
||
68 | if ($tld[0] == '#') continue; |
||
69 | |||
70 | $tld = strtolower($tld); |
||
71 | |||
72 | if (!does_exist($tld)) { |
||
73 | $whois_serv = find_rootzone_whois_server($tld); |
||
74 | |||
75 | $to_append = "\n"; |
||
76 | if ($whois_serv) { |
||
77 | $to_append .= ":whois|$whois_serv\n"; |
||
78 | } else { |
||
79 | $to_append .= "# TODO: Entry generated automatically. Needs manual check.\n"; |
||
80 | $to_append .= ":notice||Whois server unknown (".date('Y-m-d').")\n"; |
||
81 | } |
||
82 | $to_append .= "\\.$tld$\n"; |
||
83 | |||
84 | file_put_contents(DOMAINS_PATTERN_FILE, $to_append, FILE_APPEND); |
||
85 | |||
86 | gwi_update_domains_patternfile(); |
||
87 | |||
88 | echo "Added: $tld\n"; |
||
89 | } |
||
90 | |||
91 | } |
||
92 | |||
93 | # ------------------------------------------------------ |
||
94 | |||
95 | function does_exist($tld) { |
||
96 | $cont = get_united_pattern(); |
||
97 | $cont = preg_replace('@#[^\n]*\n@ismU', '', $cont); |
||
98 | $tld = strtolower($tld); |
||
99 | $cont = strtolower($cont); |
||
100 | $cont = str_replace(array('(', ')'), '', $cont); |
||
101 | return (strpos($cont, "\\.$tld\$\n") !== false); |
||
102 | } |
||
103 | |||
104 | function gwi_update_domains_patternfile() { |
||
105 | $now = date('Ymd'); |
||
106 | |||
107 | $pcont_original = file_get_contents(DOMAINS_PATTERN_FILE); |
||
108 | $pcont = $pcont_original; |
||
109 | |||
110 | $count = 0; |
||
111 | $pcont = preg_replace("@#: version (\\S+)@i", "#: version $now", $pcont, 1, $count); |
||
112 | |||
113 | if ($count == 0) { |
||
114 | // Add header |
||
115 | $pcont = "#: version $now\n". |
||
116 | "# Domains\n". |
||
117 | "# This file can be updated by ".__DIR__."/generate_domains (only additions of new entries)\n". |
||
118 | "# --------------------------------------------------------------------\n". |
||
119 | "\n".$pcont; |
||
120 | } |
||
121 | |||
122 | if ($pcont != $pcont_original) { |
||
123 | file_put_contents(DOMAINS_PATTERN_FILE, $pcont); |
||
124 | } |
||
125 | } |
||
126 | |||
127 | function find_rootzone_whois_server($tld) { |
||
128 | $whois_serv = iana_get_rootzone_whois_server($tld); |
||
129 | |||
130 | // Try to find undocumented whois servers |
||
131 | if (TRY_FINDING_HIDDEN_WHOIS_SERVERS) { |
||
132 | // TODO: also try out to use the URL of the homepage (in IANAs root DB) |
||
133 | if (!$whois_serv) { |
||
134 | $check_server = "whois.nic.$tld"; |
||
135 | if (gwitc_is_port_open($check_server, 43)) { |
||
136 | $whois_serv = $check_server; |
||
137 | } |
||
138 | } |
||
139 | if (!$whois_serv) { |
||
140 | $check_server = "whois.$tld"; |
||
141 | if (gwitc_is_port_open($check_server, 43)) { |
||
142 | $whois_serv = $check_server; |
||
143 | } |
||
144 | } |
||
145 | if (!$whois_serv) { |
||
146 | $check_server = "$tld"; |
||
147 | if (gwitc_is_port_open($check_server, 43)) { |
||
148 | $whois_serv = $check_server; |
||
149 | } |
||
150 | } |
||
151 | } |
||
152 | |||
153 | return $whois_serv; |
||
154 | } |
||
155 | |||
156 | function iana_get_rootzone_whois_server($tld) { |
||
157 | $tld = strtolower($tld); |
||
158 | $cont = QueryWhoisServer('whois.iana.org', $tld); |
||
159 | if (!preg_match('@whois:\\s*(\\S+)@i', $cont, $m)) return false; |
||
160 | return $m[1]; |
||
161 | } |