Subversion Repositories vgwhois

Rev

Rev 11 | Rev 17 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 daniel-mar 1
#!/usr/bin/perl
2
 
3
#
4
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
5
#  Main program
6
#
7
#  (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
8
#  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
9
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
10
#
11
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
12
#
13
 
14
# TODO: print whois parameters at "querying..."
15
# TODO: lynx injection sicherheitslücke? => quotemeta()
16
# TODO: regularly check https://bugs.debian.org/cgi-bin/pkgreport.cgi?src=gwhois
17
 
18
# TODO: "%" am Anfang jeder Meldung ausgeben
19
 
20
# TODO: lynx wird manchmal auch ausgeführt ohne -L ...
21
# TODO: Alle "!!" entfernen
22
# TODO: print -> $result .= ?
23
 
24
use warnings;
25
use strict;
26
 
27
use FindBin;
28
use lib "$FindBin::RealBin/lib/";
29
 
30
use VGWhoIs::Core;
31
use VGWhoIs::Utils;
32
use VGWhoIs::IPv4;
33
use VGWhoIs::IPv6;
34
use VGWhoIs::OID;
35
 
36
# install with "cpan Net::IP" or "aptitude install libnet-ip-perl"
37
use Net::IP;
38
 
39
use Net::LibIDN;
40
use Encode;
41
# use Encode::Detect::Detector; # requires Debian package libencode-detect-perl
42
 
43
#use encoding ':locale';
44
 
45
#use utf8;
46
 
47
 
48
# Examples for output of the different hosts:
49
# -------------------------------------------------------------
50
# Host                            Example    Output         BOM
51
# -------------------------------------------------------------
52
# whois.viathinksoft.de           oid:2.999  UTF-8          if required (existing BOMs will be removed)
53
# cnnic.cn                        cnnic.cn   UTF-8          no
54
# whois.ati.tn                    ati.tn     UTF-8          no
55
# whois.kr                        whois.kr   UTF-8          no
56
# whois.denic.de                  denic.de   ISO-8859-1     no
57
# oldwhois.kisa.or.kr (obsolete)  whois.kr   EUC-KR         no
58
# whois.nic.ch                    domian.ch  UTF-8          no
59
# vgwhois                                    UTF-8          yes (existing BOMs will be removed?)
60
# gwhois                                     (like server)  (like server)
61
# -------------------------------------------------------------
62
 
63
 
64
# TODO: for this diagram: check if existing BOMs will be removed, e.g. by LWP.
65
# TODO: how to stop LWP's auto-detect magic?
66
# TODO: only output bom if required? doesn't work, otherwise we would need to buffer stderr and stdout, and then their order is wrong again.
67
 
68
 
69
$ENV{'HOME'}='/var/home/whois' unless defined $ENV{'HOME'};
70
 
71
# Nicht nach VGWhoIs::Core auslagern
72
# TODO: die $version auch von den .pm Modulen anzeigen?
73
my $version = '20190521';
74
my $fixwhoishost;
75
my $rawoutput = 0;
76
 
77
$| = 1; # buffer flushing = autoflush
78
 
79
while ($ARGV[0]) {
80
	if ($ARGV[0] eq '--help' || $ARGV[0] eq '-?') {
81
		print "VGWhoIs - ViaThinkSoft Global WhoIs\n",
82
		"Version $version\n\n",
83
		"Usage: vgwhois {options} [query]\n",
84
		" Try find information about the query (might be multiple words).\n",
85
		" If no query is given, use the first line from stdin\n\n",
86
		" Options:\n",
13 daniel-mar 87
		"   -C dir                  Setting an alternate configuration directory\n",
88
		"                           default: $VGWhoIs::Core::confdir\n",
11 daniel-mar 89
		"   -h host                 Selecting a fixed whois server for this query\n",
90
		"   -m method:host mirror   Defining a mirror for a given method and host.\n",
91
		"   -L                      Use lynx -source instead of LWP::Simple\n",
92
		"   -e                      Do not protect eMail addresses\n",
93
		"   -c                      Do not try to convert to UTF-8. Output server's stream.\n",
94
		"   -v                      Output version of pattern table(s)\n",
95
		"   -?, --help              Printing this text\n\n";
96
		exit;
13 daniel-mar 97
	} elsif ($ARGV[0] eq '-C') {
98
		shift;
99
		$VGWhoIs::Core::confdir = shift;
11 daniel-mar 100
	} elsif ($ARGV[0] eq '-c') {
101
		shift;
102
		$rawoutput = 1;
103
		$VGWhoIs::Core::useLWP = 0; # TODO: geht irgendwie nicht anders
104
	} elsif ($ARGV[0] eq '-h') {
105
		shift;
106
		$fixwhoishost = shift;
107
	} elsif ($ARGV[0] eq '-L') {
108
		shift;
109
		$VGWhoIs::Core::useLWP = 0;
110
	} elsif ($ARGV[0] eq '-m') {
111
		shift;
112
		$_ = shift;
113
		s/://;
114
		$VGWhoIs::Core::mirror{$_} = shift;
115
	} elsif ($ARGV[0] eq '-e') {
116
		shift;
117
		$VGWhoIs::Core::antispam = 0;
118
	} elsif ($ARGV[0] eq '-v') {
119
		print "VGWhoIs - ViaThinkSoft Global WhoIs\n\n",
120
		"program version:  $version\n",
121
		"pattern tables:   ";
122
		foreach my $patternfile (VGWhoIs::Core::getpatternfiles()) {
123
			if (!open(PATTERN,"<$patternfile")) {
124
				warn "Cannot open $patternfile. STOP.\n";
125
				exit 1;
126
			}
127
 
128
			my $line = <PATTERN>;
129
			close(PATTERN);
130
 
131
			my $patternversion;
132
			if (defined($line)) {
133
				($patternversion) = $line =~ /#:\s+version\s+(\S+)/;
134
				$patternversion = 'unknown' if !defined($patternversion);
135
			} else {
136
				$patternversion = 'unknown';
137
			}
138
			print "$patternversion\t($patternfile)\n                  ";
139
		}
140
		print "\n";
141
		exit 0;
142
	} elsif ($ARGV[0] eq '--') {
143
		shift;
144
		last;
145
	} else {
146
		last;
147
	}
148
}
149
 
150
if ($rawoutput) {
151
	binmode(STDOUT, ":bytes");
152
	binmode(STDERR, ":bytes");
153
} else {
154
	binmode(STDOUT, ":utf8");
155
	binmode(STDERR, ":utf8");
156
}
157
 
158
if (defined $ARGV[0]) {
159
	$_ = join(' ', @ARGV);
160
} else {
161
	# If no parameter is given, await an input from STDIN
162
	$_ = <>;
163
	chomp;
164
}
165
 
166
print "\x{FEFF}" if !$rawoutput; # BOM
167
exit main($_);
168
 
169
# -----------------------------------------------------------------------------------------
170
 
171
sub main {
172
	my $query = shift;
173
 
174
	$query = '' if !defined $query;
175
 
176
	if (VGWhoIs::Utils::is_utf8($query)) {
177
		$query = Encode::decode('utf8', $query);
178
	}
179
	$query = VGWhoIs::Utils::trim($query);
180
 
181
	if ($query eq '') {
182
		warn "Query is empty.\n";
183
		exit 2;
184
	}
185
 
186
	my ($method,$host,$additional);
187
 
188
	my $query_utf8 = VGWhoIs::Utils::enforce_utf8($query);
189
	print "Process query: '$query_utf8'\n\n";
190
 
191
	if ( $fixwhoishost ) {
192
		# QUE: soll das immer gelten, oder nur, wenn ermittelt wurde, dass whois benötigt wird (nicht aber cgi, etc?)
193
		($method,$host,$additional) = ('whois',$fixwhoishost,'');
194
	} else {
195
		# if ($query !~ /[^0-9\.]/) { # ipv4
196
		if ($query =~ /^[0-9\.]*$/) {
197
			my ($a, $b, $c, $d, $e) = $query =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)(.*)/;
198
			$a = 256 if !defined $a;
199
			$b = 256 if !defined $b;
200
			$c = 256 if !defined $c;
201
			$d = 256 if !defined $d;
202
			$e = ''  if !defined $e;
203
			if ($a > 255 || $b > 255 || $c > 255 || $d > 255 || $e ne '') {
204
				warn "'$query' is no valid IP address, ASN, OID or domain name.\n";
205
				exit 2;
206
			}
207
			print "Query recognized as IPv4.\n";
208
 
209
			($method,$host,$additional) = VGWhoIs::IPv4::getmethodv4($a,$b,$c,$d);
210
		# } elsif ( lc($query) !~ /[^0-9a-f:]/i ) { # ipv6
211
		# } elsif ( $query !~ /[0-9a-f:]*/ ) {
212
		} elsif (($query =~ /:/ ) && ( Net::IP::ip_expand_address($query, 6) =~ /^[0-9a-f:]*:[0-9a-f:]*$/ )) { # at least one ":" so that e.g. "ac" is recognized as TLD and not as IPv6
213
			# check and correct v6 address
214
			if ( $query =~ /[0-9a-f]{5}/ || $query =~ /:::/ ) {
215
				warn "'$query' is an invalid IPv6 address.\n";
216
				exit 2;
217
			}
218
 
219
			my $orig_query = $query;
220
			#$query =~ s/:?$/::/ if ( $query !~ /(.*:){7}/ && $query !~ /::/ );
221
			$query = Net::IP::ip_expand_address($query, 6);
222
 
223
			print "Query recognized as IPv6.\n";
224
			print "Address expanded to '$query'.\n" if $orig_query ne $query;
225
 
226
			($method,$host,$additional) = VGWhoIs::IPv6::getmethodv6($query);
227
		} elsif ($query =~ /^(urn:){0,1}oid:/i ) { # OID
228
			print "Query recognized as OID.\n";
229
 
230
			# preliminarily remove urn: and oid: from query
231
			# we need a dot so that we can use "oid:." in our patternfile too
232
			$query = VGWhoIs::OID::normalize_oid($query);
233
 
234
			my @arcs = split(/\./, $query); # TODO: warum geht split('.',$oid) nicht?
235
 
236
			($method,$host,$additional) = VGWhoIs::OID::getmethodoid(@arcs);
237
 
238
			# Whois OID query syntax definition by ViaThinkSoft (TODO: Apply for RFC):
239
			# - urn:oid:2.999 or oid:2.999
240
			# - Case insensitive
241
			# - Leading dot should be tolerated (urn:oid:.2.999)
242
			# - Leading zeros should be tolerated (urn:oid:.002.00999)
243
			# Idea: Should "oid:" be optional? Since 2.999 cannot be an IP ... But 1.2.3.4 could be one ...
244
 
245
			# There are many possibilities. We choose "oid:.2.999"
246
			$query = 'oid:' . VGWhoIs::OID::normalize_oid($query);
247
		} else {
248
			# Last resort: Query is probably a TLD, domain or handle, but we are not sure!
249
			# print "Query recognized as domain.\n";
250
 
251
			# Dot exists?	Type?	Punycode?	Filtering?
252
			# ------------------------------------------------
253
			# Yes		Domain	Yes		Yes
254
			# No		TLD	Yes		Yes
255
			# No		Handle	No*		Maybe
256
			# ------------------------------------------------
257
			# * = but it is unlikely that a handle contains non-latin characters
258
 
259
			# Filtering
260
			$query =~ y/[\x{00A0}-\x{FFFF}]a-zA-Z0-9:.,+_ -//cd;
261
			$query =~ s/\.$//;
262
			my $query_utf8_filtered = VGWhoIs::Utils::enforce_utf8($query);
263
			if ( $query_utf8 ne $query_utf8_filtered ) {
264
				# QUE: warn or print?
265
				warn "Attention: Query was filtered to '$query_utf8_filtered'.\n\n";
266
			}
267
 
268
			# Punycode decoding
269
			# my $ascii_query = Net::LibIDN::idn_to_ascii($query, 'utf-8')
270
			# We separate between spaces, so that "tld <unicode>" can be processed
271
			my @query_split = split(' ', $query);
272
			@query_split = map { Net::LibIDN::idn_to_ascii($_, 'utf-8') || '' } @query_split;
273
			my $ascii_query = join(' ', @query_split);
274
 
275
			# Query valid?
276
			if (!$ascii_query) { # e.g. $query = ".x"
277
				warn "'$query_utf8' is an invalid domain name.\n";
278
				return 2;
279
			}
280
 
281
			# Just information for the user
282
			if (index($query, ".") != -1) {
283
				print "Query recognized as domain.\n\n"; # TODO: aber wenn kein IDN?
284
			} else {
285
				print "Query is probably a handle or TLD.\n\n";
286
			}
287
 
288
			($method,$host,$additional) = VGWhoIs::Core::getmethodother($ascii_query);
289
		}
290
	}
291
 
292
	if ($method eq '') {
293
		warn "I don't know where to query that.\n";
294
		warn "If this is a valid domainname or handle, please file a bug report.\n";
295
		return 1;
296
	}
297
 
298
	# Wird in getmethod*() bereits ausgeführt.
299
	# Grund: Dann kann auch bei redirectwhois() dementsprechend in jedem Zwischenschritt gehandelt werden.
300
	# $host = $VGWhoIs::Core::mirror{$method.$host} if defined $VGWhoIs::Core::mirror{$method.$host};
301
 
302
	my ($result, $exitcode) = VGWhoIs::Core::doquery($query,$method,$host,$additional);
303
	$result = '' if !defined $result; # should not happen!
304
 
305
	my $antispam_replacements = 0;
306
	if ($VGWhoIs::Core::antispam) {
307
		# Protect email addresses (to allow e.g. "RIPE -B" for public services)
308
		# Note: eMail addresses have a much more complex structure, see http://code.google.com/p/isemail/
309
		# But this Regex should still prevent spammers from filtering eMail addresses,
310
		# even if e.g. the "wrong" (e.g. escaped) "@" is protected.
311
		$antispam_replacements = $result =~ s/(\S+)@(\S+)\.([^.\s]+)/$1 (at) $2 (dot) $3/g;
312
		# Alternative solution:
313
		# $antispam_replacements = $result =~ s/(\S+)@(\S+)\.([^.\s]+)/$1&$2.$3/g;
314
	}
315
 
316
	# We try to get $result to wide-string. Functions like LWP::Simple automatically convert UTF-8 into Unicode
317
	# (even without BOM sent through the whois gopher channel!), while subprograms and other methods are providing
318
	# raw UTF-8 data.
319
	$result = Encode::decode('utf8', VGWhoIs::Utils::trim($result), Encode::FB_CROAK) if !$rawoutput && VGWhoIs::Utils::is_utf8($result);
320
 
321
	# Don't allow DOS format
322
	$result =~ s/(\012|\015\012?)/\n/g;
323
 
324
	# Output everything
325
	print VGWhoIs::Utils::trim($result), "\n\n";
326
 
327
	if ($antispam_replacements > 0) {
328
		print "Note: The output has been modified by VGWhoIs.\n";
329
		print "$antispam_replacements eMail addresses have been anti-spam protected.\n";
330
		print "(Disable protection with \"vgwhois -e\")\n";
331
		print "\n";
332
	}
333
 
334
	# Footer
335
	print "--\n  To resolve one of the above handles:";
336
 
337
	if ($method =~ /whois/) {
338
		print "\n     whois -h $host";
339
		print ":$1" if ( $additional =~ /port=(\d+)/ );
340
		print " -- HANDLE\n";
341
	}
342
	elsif ($method eq "cgipost") {
343
		print "\n     POST $host\n";
344
		print "     $additional\n";
345
	}
346
	elsif ($method eq "cgi") {
347
		print "\n     $host\n";
348
	}
349
	elsif ($method eq "program") {
350
		print "\n     $host HANDLE\n";
351
	}
352
	# elsif ($method eq "wwwgrep") {
353
	else {
354
		# todo: add cgipostcurl etc
355
		print "\n     hmm. not sure (method = $method).\n";
356
	}
357
 
358
	print "  OTOH globally unique handles should be recognised directly by VGWhoIs.\n";
359
	print "  Please report errors or misfits via the Debian bug tracking system.\n";
360
 
361
	return $exitcode;
362
}