Rev 11 | Rev 17 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
11 | daniel-mar | 1 | #!/usr/bin/perl |
2 | |||
3 | # |
||
4 | # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois) |
||
5 | # Main program |
||
6 | # |
||
7 | # (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
||
8 | # based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de> |
||
9 | # Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de> |
||
10 | # |
||
11 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
||
12 | # |
||
13 | |||
14 | # TODO: print whois parameters at "querying..." |
||
15 | # TODO: lynx injection sicherheitslücke? => quotemeta() |
||
16 | # TODO: regularly check https://bugs.debian.org/cgi-bin/pkgreport.cgi?src=gwhois |
||
17 | |||
18 | # TODO: "%" am Anfang jeder Meldung ausgeben |
||
19 | |||
20 | # TODO: lynx wird manchmal auch ausgeführt ohne -L ... |
||
21 | # TODO: Alle "!!" entfernen |
||
22 | # TODO: print -> $result .= ? |
||
23 | |||
24 | use warnings; |
||
25 | use strict; |
||
26 | |||
27 | use FindBin; |
||
28 | use lib "$FindBin::RealBin/lib/"; |
||
29 | |||
30 | use VGWhoIs::Core; |
||
31 | use VGWhoIs::Utils; |
||
32 | use VGWhoIs::IPv4; |
||
33 | use VGWhoIs::IPv6; |
||
34 | use VGWhoIs::OID; |
||
35 | |||
36 | # install with "cpan Net::IP" or "aptitude install libnet-ip-perl" |
||
37 | use Net::IP; |
||
38 | |||
39 | use Net::LibIDN; |
||
40 | use Encode; |
||
41 | # use Encode::Detect::Detector; # requires Debian package libencode-detect-perl |
||
42 | |||
43 | #use encoding ':locale'; |
||
44 | |||
45 | #use utf8; |
||
46 | |||
47 | |||
48 | # Examples for output of the different hosts: |
||
49 | # ------------------------------------------------------------- |
||
50 | # Host Example Output BOM |
||
51 | # ------------------------------------------------------------- |
||
52 | # whois.viathinksoft.de oid:2.999 UTF-8 if required (existing BOMs will be removed) |
||
53 | # cnnic.cn cnnic.cn UTF-8 no |
||
54 | # whois.ati.tn ati.tn UTF-8 no |
||
55 | # whois.kr whois.kr UTF-8 no |
||
56 | # whois.denic.de denic.de ISO-8859-1 no |
||
57 | # oldwhois.kisa.or.kr (obsolete) whois.kr EUC-KR no |
||
58 | # whois.nic.ch domian.ch UTF-8 no |
||
59 | # vgwhois UTF-8 yes (existing BOMs will be removed?) |
||
60 | # gwhois (like server) (like server) |
||
61 | # ------------------------------------------------------------- |
||
62 | |||
63 | |||
64 | # TODO: for this diagram: check if existing BOMs will be removed, e.g. by LWP. |
||
65 | # TODO: how to stop LWP's auto-detect magic? |
||
66 | # TODO: only output bom if required? doesn't work, otherwise we would need to buffer stderr and stdout, and then their order is wrong again. |
||
67 | |||
68 | |||
69 | $ENV{'HOME'}='/var/home/whois' unless defined $ENV{'HOME'}; |
||
70 | |||
71 | # Nicht nach VGWhoIs::Core auslagern |
||
72 | # TODO: die $version auch von den .pm Modulen anzeigen? |
||
73 | my $version = '20190521'; |
||
74 | my $fixwhoishost; |
||
75 | my $rawoutput = 0; |
||
76 | |||
77 | $| = 1; # buffer flushing = autoflush |
||
78 | |||
79 | while ($ARGV[0]) { |
||
80 | if ($ARGV[0] eq '--help' || $ARGV[0] eq '-?') { |
||
81 | print "VGWhoIs - ViaThinkSoft Global WhoIs\n", |
||
82 | "Version $version\n\n", |
||
83 | "Usage: vgwhois {options} [query]\n", |
||
84 | " Try find information about the query (might be multiple words).\n", |
||
85 | " If no query is given, use the first line from stdin\n\n", |
||
86 | " Options:\n", |
||
13 | daniel-mar | 87 | " -C dir Setting an alternate configuration directory\n", |
88 | " default: $VGWhoIs::Core::confdir\n", |
||
11 | daniel-mar | 89 | " -h host Selecting a fixed whois server for this query\n", |
90 | " -m method:host mirror Defining a mirror for a given method and host.\n", |
||
91 | " -L Use lynx -source instead of LWP::Simple\n", |
||
92 | " -e Do not protect eMail addresses\n", |
||
93 | " -c Do not try to convert to UTF-8. Output server's stream.\n", |
||
94 | " -v Output version of pattern table(s)\n", |
||
95 | " -?, --help Printing this text\n\n"; |
||
96 | exit; |
||
13 | daniel-mar | 97 | } elsif ($ARGV[0] eq '-C') { |
98 | shift; |
||
99 | $VGWhoIs::Core::confdir = shift; |
||
11 | daniel-mar | 100 | } elsif ($ARGV[0] eq '-c') { |
101 | shift; |
||
102 | $rawoutput = 1; |
||
103 | $VGWhoIs::Core::useLWP = 0; # TODO: geht irgendwie nicht anders |
||
104 | } elsif ($ARGV[0] eq '-h') { |
||
105 | shift; |
||
106 | $fixwhoishost = shift; |
||
107 | } elsif ($ARGV[0] eq '-L') { |
||
108 | shift; |
||
109 | $VGWhoIs::Core::useLWP = 0; |
||
110 | } elsif ($ARGV[0] eq '-m') { |
||
111 | shift; |
||
112 | $_ = shift; |
||
113 | s/://; |
||
114 | $VGWhoIs::Core::mirror{$_} = shift; |
||
115 | } elsif ($ARGV[0] eq '-e') { |
||
116 | shift; |
||
117 | $VGWhoIs::Core::antispam = 0; |
||
118 | } elsif ($ARGV[0] eq '-v') { |
||
119 | print "VGWhoIs - ViaThinkSoft Global WhoIs\n\n", |
||
120 | "program version: $version\n", |
||
121 | "pattern tables: "; |
||
122 | foreach my $patternfile (VGWhoIs::Core::getpatternfiles()) { |
||
123 | if (!open(PATTERN,"<$patternfile")) { |
||
124 | warn "Cannot open $patternfile. STOP.\n"; |
||
125 | exit 1; |
||
126 | } |
||
127 | |||
128 | my $line = <PATTERN>; |
||
129 | close(PATTERN); |
||
130 | |||
131 | my $patternversion; |
||
132 | if (defined($line)) { |
||
133 | ($patternversion) = $line =~ /#:\s+version\s+(\S+)/; |
||
134 | $patternversion = 'unknown' if !defined($patternversion); |
||
135 | } else { |
||
136 | $patternversion = 'unknown'; |
||
137 | } |
||
138 | print "$patternversion\t($patternfile)\n "; |
||
139 | } |
||
140 | print "\n"; |
||
141 | exit 0; |
||
142 | } elsif ($ARGV[0] eq '--') { |
||
143 | shift; |
||
144 | last; |
||
145 | } else { |
||
146 | last; |
||
147 | } |
||
148 | } |
||
149 | |||
150 | if ($rawoutput) { |
||
151 | binmode(STDOUT, ":bytes"); |
||
152 | binmode(STDERR, ":bytes"); |
||
153 | } else { |
||
154 | binmode(STDOUT, ":utf8"); |
||
155 | binmode(STDERR, ":utf8"); |
||
156 | } |
||
157 | |||
158 | if (defined $ARGV[0]) { |
||
159 | $_ = join(' ', @ARGV); |
||
160 | } else { |
||
161 | # If no parameter is given, await an input from STDIN |
||
162 | $_ = <>; |
||
163 | chomp; |
||
164 | } |
||
165 | |||
166 | print "\x{FEFF}" if !$rawoutput; # BOM |
||
167 | exit main($_); |
||
168 | |||
169 | # ----------------------------------------------------------------------------------------- |
||
170 | |||
171 | sub main { |
||
172 | my $query = shift; |
||
173 | |||
174 | $query = '' if !defined $query; |
||
175 | |||
176 | if (VGWhoIs::Utils::is_utf8($query)) { |
||
177 | $query = Encode::decode('utf8', $query); |
||
178 | } |
||
179 | $query = VGWhoIs::Utils::trim($query); |
||
180 | |||
181 | if ($query eq '') { |
||
182 | warn "Query is empty.\n"; |
||
183 | exit 2; |
||
184 | } |
||
185 | |||
186 | my ($method,$host,$additional); |
||
187 | |||
188 | my $query_utf8 = VGWhoIs::Utils::enforce_utf8($query); |
||
189 | print "Process query: '$query_utf8'\n\n"; |
||
190 | |||
191 | if ( $fixwhoishost ) { |
||
192 | # QUE: soll das immer gelten, oder nur, wenn ermittelt wurde, dass whois benötigt wird (nicht aber cgi, etc?) |
||
193 | ($method,$host,$additional) = ('whois',$fixwhoishost,''); |
||
194 | } else { |
||
195 | # if ($query !~ /[^0-9\.]/) { # ipv4 |
||
196 | if ($query =~ /^[0-9\.]*$/) { |
||
197 | my ($a, $b, $c, $d, $e) = $query =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)(.*)/; |
||
198 | $a = 256 if !defined $a; |
||
199 | $b = 256 if !defined $b; |
||
200 | $c = 256 if !defined $c; |
||
201 | $d = 256 if !defined $d; |
||
202 | $e = '' if !defined $e; |
||
203 | if ($a > 255 || $b > 255 || $c > 255 || $d > 255 || $e ne '') { |
||
204 | warn "'$query' is no valid IP address, ASN, OID or domain name.\n"; |
||
205 | exit 2; |
||
206 | } |
||
207 | print "Query recognized as IPv4.\n"; |
||
208 | |||
209 | ($method,$host,$additional) = VGWhoIs::IPv4::getmethodv4($a,$b,$c,$d); |
||
210 | # } elsif ( lc($query) !~ /[^0-9a-f:]/i ) { # ipv6 |
||
211 | # } elsif ( $query !~ /[0-9a-f:]*/ ) { |
||
212 | } elsif (($query =~ /:/ ) && ( Net::IP::ip_expand_address($query, 6) =~ /^[0-9a-f:]*:[0-9a-f:]*$/ )) { # at least one ":" so that e.g. "ac" is recognized as TLD and not as IPv6 |
||
213 | # check and correct v6 address |
||
214 | if ( $query =~ /[0-9a-f]{5}/ || $query =~ /:::/ ) { |
||
215 | warn "'$query' is an invalid IPv6 address.\n"; |
||
216 | exit 2; |
||
217 | } |
||
218 | |||
219 | my $orig_query = $query; |
||
220 | #$query =~ s/:?$/::/ if ( $query !~ /(.*:){7}/ && $query !~ /::/ ); |
||
221 | $query = Net::IP::ip_expand_address($query, 6); |
||
222 | |||
223 | print "Query recognized as IPv6.\n"; |
||
224 | print "Address expanded to '$query'.\n" if $orig_query ne $query; |
||
225 | |||
226 | ($method,$host,$additional) = VGWhoIs::IPv6::getmethodv6($query); |
||
227 | } elsif ($query =~ /^(urn:){0,1}oid:/i ) { # OID |
||
228 | print "Query recognized as OID.\n"; |
||
229 | |||
230 | # preliminarily remove urn: and oid: from query |
||
231 | # we need a dot so that we can use "oid:." in our patternfile too |
||
232 | $query = VGWhoIs::OID::normalize_oid($query); |
||
233 | |||
234 | my @arcs = split(/\./, $query); # TODO: warum geht split('.',$oid) nicht? |
||
235 | |||
236 | ($method,$host,$additional) = VGWhoIs::OID::getmethodoid(@arcs); |
||
237 | |||
238 | # Whois OID query syntax definition by ViaThinkSoft (TODO: Apply for RFC): |
||
239 | # - urn:oid:2.999 or oid:2.999 |
||
240 | # - Case insensitive |
||
241 | # - Leading dot should be tolerated (urn:oid:.2.999) |
||
242 | # - Leading zeros should be tolerated (urn:oid:.002.00999) |
||
243 | # Idea: Should "oid:" be optional? Since 2.999 cannot be an IP ... But 1.2.3.4 could be one ... |
||
244 | |||
245 | # There are many possibilities. We choose "oid:.2.999" |
||
246 | $query = 'oid:' . VGWhoIs::OID::normalize_oid($query); |
||
247 | } else { |
||
248 | # Last resort: Query is probably a TLD, domain or handle, but we are not sure! |
||
249 | # print "Query recognized as domain.\n"; |
||
250 | |||
251 | # Dot exists? Type? Punycode? Filtering? |
||
252 | # ------------------------------------------------ |
||
253 | # Yes Domain Yes Yes |
||
254 | # No TLD Yes Yes |
||
255 | # No Handle No* Maybe |
||
256 | # ------------------------------------------------ |
||
257 | # * = but it is unlikely that a handle contains non-latin characters |
||
258 | |||
259 | # Filtering |
||
260 | $query =~ y/[\x{00A0}-\x{FFFF}]a-zA-Z0-9:.,+_ -//cd; |
||
261 | $query =~ s/\.$//; |
||
262 | my $query_utf8_filtered = VGWhoIs::Utils::enforce_utf8($query); |
||
263 | if ( $query_utf8 ne $query_utf8_filtered ) { |
||
264 | # QUE: warn or print? |
||
265 | warn "Attention: Query was filtered to '$query_utf8_filtered'.\n\n"; |
||
266 | } |
||
267 | |||
268 | # Punycode decoding |
||
269 | # my $ascii_query = Net::LibIDN::idn_to_ascii($query, 'utf-8') |
||
270 | # We separate between spaces, so that "tld <unicode>" can be processed |
||
271 | my @query_split = split(' ', $query); |
||
272 | @query_split = map { Net::LibIDN::idn_to_ascii($_, 'utf-8') || '' } @query_split; |
||
273 | my $ascii_query = join(' ', @query_split); |
||
274 | |||
275 | # Query valid? |
||
276 | if (!$ascii_query) { # e.g. $query = ".x" |
||
277 | warn "'$query_utf8' is an invalid domain name.\n"; |
||
278 | return 2; |
||
279 | } |
||
280 | |||
281 | # Just information for the user |
||
282 | if (index($query, ".") != -1) { |
||
283 | print "Query recognized as domain.\n\n"; # TODO: aber wenn kein IDN? |
||
284 | } else { |
||
285 | print "Query is probably a handle or TLD.\n\n"; |
||
286 | } |
||
287 | |||
288 | ($method,$host,$additional) = VGWhoIs::Core::getmethodother($ascii_query); |
||
289 | } |
||
290 | } |
||
291 | |||
292 | if ($method eq '') { |
||
293 | warn "I don't know where to query that.\n"; |
||
294 | warn "If this is a valid domainname or handle, please file a bug report.\n"; |
||
295 | return 1; |
||
296 | } |
||
297 | |||
298 | # Wird in getmethod*() bereits ausgeführt. |
||
299 | # Grund: Dann kann auch bei redirectwhois() dementsprechend in jedem Zwischenschritt gehandelt werden. |
||
300 | # $host = $VGWhoIs::Core::mirror{$method.$host} if defined $VGWhoIs::Core::mirror{$method.$host}; |
||
301 | |||
302 | my ($result, $exitcode) = VGWhoIs::Core::doquery($query,$method,$host,$additional); |
||
303 | $result = '' if !defined $result; # should not happen! |
||
304 | |||
305 | my $antispam_replacements = 0; |
||
306 | if ($VGWhoIs::Core::antispam) { |
||
307 | # Protect email addresses (to allow e.g. "RIPE -B" for public services) |
||
308 | # Note: eMail addresses have a much more complex structure, see http://code.google.com/p/isemail/ |
||
309 | # But this Regex should still prevent spammers from filtering eMail addresses, |
||
310 | # even if e.g. the "wrong" (e.g. escaped) "@" is protected. |
||
311 | $antispam_replacements = $result =~ s/(\S+)@(\S+)\.([^.\s]+)/$1 (at) $2 (dot) $3/g; |
||
312 | # Alternative solution: |
||
313 | # $antispam_replacements = $result =~ s/(\S+)@(\S+)\.([^.\s]+)/$1&$2.$3/g; |
||
314 | } |
||
315 | |||
316 | # We try to get $result to wide-string. Functions like LWP::Simple automatically convert UTF-8 into Unicode |
||
317 | # (even without BOM sent through the whois gopher channel!), while subprograms and other methods are providing |
||
318 | # raw UTF-8 data. |
||
319 | $result = Encode::decode('utf8', VGWhoIs::Utils::trim($result), Encode::FB_CROAK) if !$rawoutput && VGWhoIs::Utils::is_utf8($result); |
||
320 | |||
321 | # Don't allow DOS format |
||
322 | $result =~ s/(\012|\015\012?)/\n/g; |
||
323 | |||
324 | # Output everything |
||
325 | print VGWhoIs::Utils::trim($result), "\n\n"; |
||
326 | |||
327 | if ($antispam_replacements > 0) { |
||
328 | print "Note: The output has been modified by VGWhoIs.\n"; |
||
329 | print "$antispam_replacements eMail addresses have been anti-spam protected.\n"; |
||
330 | print "(Disable protection with \"vgwhois -e\")\n"; |
||
331 | print "\n"; |
||
332 | } |
||
333 | |||
334 | # Footer |
||
335 | print "--\n To resolve one of the above handles:"; |
||
336 | |||
337 | if ($method =~ /whois/) { |
||
338 | print "\n whois -h $host"; |
||
339 | print ":$1" if ( $additional =~ /port=(\d+)/ ); |
||
340 | print " -- HANDLE\n"; |
||
341 | } |
||
342 | elsif ($method eq "cgipost") { |
||
343 | print "\n POST $host\n"; |
||
344 | print " $additional\n"; |
||
345 | } |
||
346 | elsif ($method eq "cgi") { |
||
347 | print "\n $host\n"; |
||
348 | } |
||
349 | elsif ($method eq "program") { |
||
350 | print "\n $host HANDLE\n"; |
||
351 | } |
||
352 | # elsif ($method eq "wwwgrep") { |
||
353 | else { |
||
354 | # todo: add cgipostcurl etc |
||
355 | print "\n hmm. not sure (method = $method).\n"; |
||
356 | } |
||
357 | |||
358 | print " OTOH globally unique handles should be recognised directly by VGWhoIs.\n"; |
||
359 | print " Please report errors or misfits via the Debian bug tracking system.\n"; |
||
360 | |||
361 | return $exitcode; |
||
362 | } |