Login | ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/vgwhois/trunk/main/vgwhois
Revision: 13
Committed: Thu May 23 11:06:58 2019 UTC (14 months, 3 weeks ago) by daniel-marschall
File size: 12228 byte(s)

File Contents

# Content
1 #!/usr/bin/perl
2
3 #
4 # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
5 # Main program
6 #
7 # (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
8 # based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
9 # Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
10 #
11 # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
12 #
13
14 # TODO: print whois parameters at "querying..."
15 # TODO: lynx injection sicherheitslücke? => quotemeta()
16 # TODO: regularly check https://bugs.debian.org/cgi-bin/pkgreport.cgi?src=gwhois
17
18 # TODO: "%" am Anfang jeder Meldung ausgeben
19
20 # TODO: lynx wird manchmal auch ausgeführt ohne -L ...
21 # TODO: Alle "!!" entfernen
22 # TODO: print -> $result .= ?
23
24 use warnings;
25 use strict;
26
27 use FindBin;
28 use lib "$FindBin::RealBin/lib/";
29
30 use VGWhoIs::Core;
31 use VGWhoIs::Utils;
32 use VGWhoIs::IPv4;
33 use VGWhoIs::IPv6;
34 use VGWhoIs::OID;
35
36 # install with "cpan Net::IP" or "aptitude install libnet-ip-perl"
37 use Net::IP;
38
39 use Net::LibIDN;
40 use Encode;
41 # use Encode::Detect::Detector; # requires Debian package libencode-detect-perl
42
43 #use encoding ':locale';
44
45 #use utf8;
46
47
48 # Examples for output of the different hosts:
49 # -------------------------------------------------------------
50 # Host Example Output BOM
51 # -------------------------------------------------------------
52 # whois.viathinksoft.de oid:2.999 UTF-8 if required (existing BOMs will be removed)
53 # cnnic.cn cnnic.cn UTF-8 no
54 # whois.ati.tn ati.tn UTF-8 no
55 # whois.kr whois.kr UTF-8 no
56 # whois.denic.de denic.de ISO-8859-1 no
57 # oldwhois.kisa.or.kr (obsolete) whois.kr EUC-KR no
58 # whois.nic.ch domian.ch UTF-8 no
59 # vgwhois UTF-8 yes (existing BOMs will be removed?)
60 # gwhois (like server) (like server)
61 # -------------------------------------------------------------
62
63
64 # TODO: for this diagram: check if existing BOMs will be removed, e.g. by LWP.
65 # TODO: how to stop LWP's auto-detect magic?
66 # TODO: only output bom if required? doesn't work, otherwise we would need to buffer stderr and stdout, and then their order is wrong again.
67
68
69 $ENV{'HOME'}='/var/home/whois' unless defined $ENV{'HOME'};
70
71 # Nicht nach VGWhoIs::Core auslagern
72 # TODO: die $version auch von den .pm Modulen anzeigen?
73 my $version = '20190521';
74 my $fixwhoishost;
75 my $rawoutput = 0;
76
77 $| = 1; # buffer flushing = autoflush
78
79 while ($ARGV[0]) {
80 if ($ARGV[0] eq '--help' || $ARGV[0] eq '-?') {
81 print "VGWhoIs - ViaThinkSoft Global WhoIs\n",
82 "Version $version\n\n",
83 "Usage: vgwhois {options} [query]\n",
84 " Try find information about the query (might be multiple words).\n",
85 " If no query is given, use the first line from stdin\n\n",
86 " Options:\n",
87 " -C dir Setting an alternate configuration directory\n",
88 " default: $VGWhoIs::Core::confdir\n",
89 " -h host Selecting a fixed whois server for this query\n",
90 " -m method:host mirror Defining a mirror for a given method and host.\n",
91 " -L Use lynx -source instead of LWP::Simple\n",
92 " -e Do not protect eMail addresses\n",
93 " -c Do not try to convert to UTF-8. Output server's stream.\n",
94 " -v Output version of pattern table(s)\n",
95 " -?, --help Printing this text\n\n";
96 exit;
97 } elsif ($ARGV[0] eq '-C') {
98 shift;
99 $VGWhoIs::Core::confdir = shift;
100 } elsif ($ARGV[0] eq '-c') {
101 shift;
102 $rawoutput = 1;
103 $VGWhoIs::Core::useLWP = 0; # TODO: geht irgendwie nicht anders
104 } elsif ($ARGV[0] eq '-h') {
105 shift;
106 $fixwhoishost = shift;
107 } elsif ($ARGV[0] eq '-L') {
108 shift;
109 $VGWhoIs::Core::useLWP = 0;
110 } elsif ($ARGV[0] eq '-m') {
111 shift;
112 $_ = shift;
113 s/://;
114 $VGWhoIs::Core::mirror{$_} = shift;
115 } elsif ($ARGV[0] eq '-e') {
116 shift;
117 $VGWhoIs::Core::antispam = 0;
118 } elsif ($ARGV[0] eq '-v') {
119 print "VGWhoIs - ViaThinkSoft Global WhoIs\n\n",
120 "program version: $version\n",
121 "pattern tables: ";
122 foreach my $patternfile (VGWhoIs::Core::getpatternfiles()) {
123 if (!open(PATTERN,"<$patternfile")) {
124 warn "Cannot open $patternfile. STOP.\n";
125 exit 1;
126 }
127
128 my $line = <PATTERN>;
129 close(PATTERN);
130
131 my $patternversion;
132 if (defined($line)) {
133 ($patternversion) = $line =~ /#:\s+version\s+(\S+)/;
134 $patternversion = 'unknown' if !defined($patternversion);
135 } else {
136 $patternversion = 'unknown';
137 }
138 print "$patternversion\t($patternfile)\n ";
139 }
140 print "\n";
141 exit 0;
142 } elsif ($ARGV[0] eq '--') {
143 shift;
144 last;
145 } else {
146 last;
147 }
148 }
149
150 if ($rawoutput) {
151 binmode(STDOUT, ":bytes");
152 binmode(STDERR, ":bytes");
153 } else {
154 binmode(STDOUT, ":utf8");
155 binmode(STDERR, ":utf8");
156 }
157
158 if (defined $ARGV[0]) {
159 $_ = join(' ', @ARGV);
160 } else {
161 # If no parameter is given, await an input from STDIN
162 $_ = <>;
163 chomp;
164 }
165
166 print "\x{FEFF}" if !$rawoutput; # BOM
167 exit main($_);
168
169 # -----------------------------------------------------------------------------------------
170
171 sub main {
172 my $query = shift;
173
174 $query = '' if !defined $query;
175
176 if (VGWhoIs::Utils::is_utf8($query)) {
177 $query = Encode::decode('utf8', $query);
178 }
179 $query = VGWhoIs::Utils::trim($query);
180
181 if ($query eq '') {
182 warn "Query is empty.\n";
183 exit 2;
184 }
185
186 my ($method,$host,$additional);
187
188 my $query_utf8 = VGWhoIs::Utils::enforce_utf8($query);
189 print "Process query: '$query_utf8'\n\n";
190
191 if ( $fixwhoishost ) {
192 # QUE: soll das immer gelten, oder nur, wenn ermittelt wurde, dass whois benötigt wird (nicht aber cgi, etc?)
193 ($method,$host,$additional) = ('whois',$fixwhoishost,'');
194 } else {
195 # if ($query !~ /[^0-9\.]/) { # ipv4
196 if ($query =~ /^[0-9\.]*$/) {
197 my ($a, $b, $c, $d, $e) = $query =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)(.*)/;
198 $a = 256 if !defined $a;
199 $b = 256 if !defined $b;
200 $c = 256 if !defined $c;
201 $d = 256 if !defined $d;
202 $e = '' if !defined $e;
203 if ($a > 255 || $b > 255 || $c > 255 || $d > 255 || $e ne '') {
204 warn "'$query' is no valid IP address, ASN, OID or domain name.\n";
205 exit 2;
206 }
207 print "Query recognized as IPv4.\n";
208
209 ($method,$host,$additional) = VGWhoIs::IPv4::getmethodv4($a,$b,$c,$d);
210 # } elsif ( lc($query) !~ /[^0-9a-f:]/i ) { # ipv6
211 # } elsif ( $query !~ /[0-9a-f:]*/ ) {
212 } elsif (($query =~ /:/ ) && ( Net::IP::ip_expand_address($query, 6) =~ /^[0-9a-f:]*:[0-9a-f:]*$/ )) { # at least one ":" so that e.g. "ac" is recognized as TLD and not as IPv6
213 # check and correct v6 address
214 if ( $query =~ /[0-9a-f]{5}/ || $query =~ /:::/ ) {
215 warn "'$query' is an invalid IPv6 address.\n";
216 exit 2;
217 }
218
219 my $orig_query = $query;
220 #$query =~ s/:?$/::/ if ( $query !~ /(.*:){7}/ && $query !~ /::/ );
221 $query = Net::IP::ip_expand_address($query, 6);
222
223 print "Query recognized as IPv6.\n";
224 print "Address expanded to '$query'.\n" if $orig_query ne $query;
225
226 ($method,$host,$additional) = VGWhoIs::IPv6::getmethodv6($query);
227 } elsif ($query =~ /^(urn:){0,1}oid:/i ) { # OID
228 print "Query recognized as OID.\n";
229
230 # preliminarily remove urn: and oid: from query
231 # we need a dot so that we can use "oid:." in our patternfile too
232 $query = VGWhoIs::OID::normalize_oid($query);
233
234 my @arcs = split(/\./, $query); # TODO: warum geht split('.',$oid) nicht?
235
236 ($method,$host,$additional) = VGWhoIs::OID::getmethodoid(@arcs);
237
238 # Whois OID query syntax definition by ViaThinkSoft (TODO: Apply for RFC):
239 # - urn:oid:2.999 or oid:2.999
240 # - Case insensitive
241 # - Leading dot should be tolerated (urn:oid:.2.999)
242 # - Leading zeros should be tolerated (urn:oid:.002.00999)
243 # Idea: Should "oid:" be optional? Since 2.999 cannot be an IP ... But 1.2.3.4 could be one ...
244
245 # There are many possibilities. We choose "oid:.2.999"
246 $query = 'oid:' . VGWhoIs::OID::normalize_oid($query);
247 } else {
248 # Last resort: Query is probably a TLD, domain or handle, but we are not sure!
249 # print "Query recognized as domain.\n";
250
251 # Dot exists? Type? Punycode? Filtering?
252 # ------------------------------------------------
253 # Yes Domain Yes Yes
254 # No TLD Yes Yes
255 # No Handle No* Maybe
256 # ------------------------------------------------
257 # * = but it is unlikely that a handle contains non-latin characters
258
259 # Filtering
260 $query =~ y/[\x{00A0}-\x{FFFF}]a-zA-Z0-9:.,+_ -//cd;
261 $query =~ s/\.$//;
262 my $query_utf8_filtered = VGWhoIs::Utils::enforce_utf8($query);
263 if ( $query_utf8 ne $query_utf8_filtered ) {
264 # QUE: warn or print?
265 warn "Attention: Query was filtered to '$query_utf8_filtered'.\n\n";
266 }
267
268 # Punycode decoding
269 # my $ascii_query = Net::LibIDN::idn_to_ascii($query, 'utf-8')
270 # We separate between spaces, so that "tld <unicode>" can be processed
271 my @query_split = split(' ', $query);
272 @query_split = map { Net::LibIDN::idn_to_ascii($_, 'utf-8') || '' } @query_split;
273 my $ascii_query = join(' ', @query_split);
274
275 # Query valid?
276 if (!$ascii_query) { # e.g. $query = ".x"
277 warn "'$query_utf8' is an invalid domain name.\n";
278 return 2;
279 }
280
281 # Just information for the user
282 if (index($query, ".") != -1) {
283 print "Query recognized as domain.\n\n"; # TODO: aber wenn kein IDN?
284 } else {
285 print "Query is probably a handle or TLD.\n\n";
286 }
287
288 ($method,$host,$additional) = VGWhoIs::Core::getmethodother($ascii_query);
289 }
290 }
291
292 if ($method eq '') {
293 warn "I don't know where to query that.\n";
294 warn "If this is a valid domainname or handle, please file a bug report.\n";
295 return 1;
296 }
297
298 # Wird in getmethod*() bereits ausgeführt.
299 # Grund: Dann kann auch bei redirectwhois() dementsprechend in jedem Zwischenschritt gehandelt werden.
300 # $host = $VGWhoIs::Core::mirror{$method.$host} if defined $VGWhoIs::Core::mirror{$method.$host};
301
302 my ($result, $exitcode) = VGWhoIs::Core::doquery($query,$method,$host,$additional);
303 $result = '' if !defined $result; # should not happen!
304
305 my $antispam_replacements = 0;
306 if ($VGWhoIs::Core::antispam) {
307 # Protect email addresses (to allow e.g. "RIPE -B" for public services)
308 # Note: eMail addresses have a much more complex structure, see http://code.google.com/p/isemail/
309 # But this Regex should still prevent spammers from filtering eMail addresses,
310 # even if e.g. the "wrong" (e.g. escaped) "@" is protected.
311 $antispam_replacements = $result =~ s/(\S+)@(\S+)\.([^.\s]+)/$1 (at) $2 (dot) $3/g;
312 # Alternative solution:
313 # $antispam_replacements = $result =~ s/(\S+)@(\S+)\.([^.\s]+)/$1&$2.$3/g;
314 }
315
316 # We try to get $result to wide-string. Functions like LWP::Simple automatically convert UTF-8 into Unicode
317 # (even without BOM sent through the whois gopher channel!), while subprograms and other methods are providing
318 # raw UTF-8 data.
319 $result = Encode::decode('utf8', VGWhoIs::Utils::trim($result), Encode::FB_CROAK) if !$rawoutput && VGWhoIs::Utils::is_utf8($result);
320
321 # Don't allow DOS format
322 $result =~ s/(\012|\015\012?)/\n/g;
323
324 # Output everything
325 print VGWhoIs::Utils::trim($result), "\n\n";
326
327 if ($antispam_replacements > 0) {
328 print "Note: The output has been modified by VGWhoIs.\n";
329 print "$antispam_replacements eMail addresses have been anti-spam protected.\n";
330 print "(Disable protection with \"vgwhois -e\")\n";
331 print "\n";
332 }
333
334 # Footer
335 print "--\n To resolve one of the above handles:";
336
337 if ($method =~ /whois/) {
338 print "\n whois -h $host";
339 print ":$1" if ( $additional =~ /port=(\d+)/ );
340 print " -- HANDLE\n";
341 }
342 elsif ($method eq "cgipost") {
343 print "\n POST $host\n";
344 print " $additional\n";
345 }
346 elsif ($method eq "cgi") {
347 print "\n $host\n";
348 }
349 elsif ($method eq "program") {
350 print "\n $host HANDLE\n";
351 }
352 # elsif ($method eq "wwwgrep") {
353 else {
354 # todo: add cgipostcurl etc
355 print "\n hmm. not sure (method = $method).\n";
356 }
357
358 print " OTOH globally unique handles should be recognised directly by VGWhoIs.\n";
359 print " Please report errors or misfits via the Debian bug tracking system.\n";
360
361 return $exitcode;
362 }

Properties

Name Value
svn:executable *