Login | ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/vgwhois/trunk/main/lib/VGWhoIs/Core.pm
Revision: 12
Committed: Tue May 21 19:26:19 2019 UTC (16 months, 1 week ago) by daniel-marschall
Content type: text/x-perl
File size: 17322 byte(s)

File Contents

# User Rev Content
1 daniel-marschall 12 #
2     # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
3     # Main program
4     #
5     # (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
6     # based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
7     # Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
8     #
9     # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
10     #
11    
12     package VGWhoIs::Core;
13    
14     use warnings;
15     use strict;
16    
17     use LWP::Simple;
18    
19     use FindBin;
20     use lib "$FindBin::RealBin/../";
21     use VGWhoIs::Utils;
22    
23     use List::Util 'max';
24    
25     $VGWhoIs::Core::confdir = "$FindBin::RealBin/pattern/";
26    
27     # DM 11.09.2017: There is a weird bug: If I use TOR in combination with LWP on a Gopher protocol, I get error 500.
28     $VGWhoIs::Core::useLWP = 0;
29    
30     $VGWhoIs::Core::antispam = 1; # default: on
31     $VGWhoIs::Core::step = 1;
32    
33     # Wieso muss das nicht deklariert werden? (Fehlermeldung "useless use")
34     #%VGWhoIs::Core::mirror;
35    
36     # ($result, $exitcode) = VGWhoIs::Core::getsource($url)
37     sub VGWhoIs::Core::getsource {
38     my ($url) = @_;
39     my $text = $VGWhoIs::Core::useLWP ? LWP::Simple::get($url) : VGWhoIs::Utils::lynxsource($url);
40     my $exitcode = defined($text) ? 0 : 1; # TODO: a better way to detect an error
41     return ($text, $exitcode);
42     }
43    
44     # ($result, $exitcode) = VGWhoIs::Core::whoisaccess($host,$port,$query)
45     sub VGWhoIs::Core::whoisaccess {
46     my ($host,$port,$query) = @_;
47    
48     $query =~ s/ /%20/g;
49    
50     my ($result, $exitcode) = VGWhoIs::Core::getsource("gopher://$host:$port/0$query");
51     if ($exitcode) {
52     $result .= "Query to whois server failed.\n";
53     }
54     $result =~ s/\x0D//g; # remove CR from output
55    
56     return ($result, $exitcode);
57     }
58    
59     # ($result, $exitcode) = VGWhoIs::Core::inicwhoisaccess($host,$port,$query)
60     sub VGWhoIs::Core::inicwhoisaccess { # todo: mehr als 1 redirect möglich, z.b. bei rwhois??
61     #TODO: hier auch $mirror unterstützung?
62     my ($host,$port,$query) = @_;
63     my ($queryresult, $result);
64     my $exitcode;
65    
66     ($queryresult, $exitcode) = VGWhoIs::Core::whoisaccess($host,$port,"=$query");
67    
68     # Result von NSI-Registry auf relevanten Part absuchen
69     if ( $queryresult =~ /Name:\s+$query\s/mi ) {
70     $result = "-- From: $host:$port\n\n";
71     ($host) = $queryresult =~
72     /Name:\s+$query\s.*?Whois Server:\s+(.*?)\s/si;
73    
74     my $relresult;
75     # my ($relresult) = $queryresult =~
76     # /[\r\n]([^\r\n]+\S+\sName:\s+$query\s.*?Expiration Date:[^\r\n]+)[\r\n]/si;
77     # $relresult = "(Redirect to $host:$port)" if !defined $relresult;
78     $relresult = $queryresult;
79    
80     $result .= "$relresult\n\n-- End --\n\n";
81    
82     # $port = 43;
83     my ($host2, $port) = $host =~ /^(.*):(.*)$/;
84     $port = 43 if !defined $port;
85     $host = $host2 if defined $host2;
86    
87     # print $VGWhoIs::Core::step++,". Step: Querying $host:$port with whois.\n\n"; # todo "rwhois"?
88     $result .= ($VGWhoIs::Core::step++).". Step: Querying $host:$port with whois.\n\n"; # todo "rwhois"?
89    
90     $result .= "-- From: $host:$port\n\n";
91     # TODO: beim referal whois ist die query ist nicht trimmed. scheint aber nix auszumachen
92     my ($loc_text, $loc_exitcode) = VGWhoIs::Core::whoisaccess($host,$port,$query);
93    
94     $exitcode = max($exitcode, $loc_exitcode);
95     $result .= $loc_text;
96     } else {
97     $result = "-- From: $host:$port\n\n$queryresult-- End --\n";
98     }
99    
100     return ($result, $exitcode);
101     }
102    
103     # ($result, $exitcode) = VGWhoIs::Core::wwwsgrep($url,$match)
104     sub VGWhoIs::Core::wwwsgrep {
105     my ($url,$match) = @_;
106     my ($result, $line, $exitcode) = ('', '', 0);
107    
108     ($line, $exitcode) = VGWhoIs::Core::getsource($url);
109     if (!$exitcode) {
110     $line =~ s/\n/ /g;
111     if ($line =~ $match) {
112     ($result) = $line =~ /$match/s;
113     }
114     }
115     return ($result, $exitcode);
116     }
117    
118     # ($host, $additional) = VGWhoIs::Core::methodpatternregex($query,$host,$additional,$queryline);
119     sub VGWhoIs::Core::methodpatternregex {
120     my ($query,$host,$additional,$line) = @_;
121    
122     my ($namewotld,$tld) = $query =~ /^([^\.]*)\.(.*)$/;
123     # TODO: !defined
124     my ($p1,$p2,$p3,$p4,$p5,$p6,$p7,$p8,$p9) = $query =~ $line;
125     # TODO: !defined
126     my ($ucq) = uc($query);
127    
128     $host =~ s/~query~/$query/;
129     $host =~ s/~ucquery~/$ucq/;
130     $host =~ s/~namewotld~/$namewotld/;
131     $host =~ s/~tld~/$tld/;
132     $host =~ s/~1~/$p1/;
133     $host =~ s/~2~/$p2/;
134     $host =~ s/~2~/$p3/;
135     $host =~ s/~2~/$p4/;
136     $host =~ s/~2~/$p5/;
137     $host =~ s/~2~/$p6/;
138     $host =~ s/~2~/$p7/;
139     $host =~ s/~2~/$p8/;
140     $host =~ s/~2~/$p9/;
141    
142     $additional =~ s/~query~/$query/;
143     $additional =~ s/~ucquery~/$ucq/;
144     $additional =~ s/~namewotld~/$namewotld/;
145     $additional =~ s/~tld~/$tld/;
146     $additional =~ s/~1~/$p1/;
147     $additional =~ s/~2~/$p2/;
148    
149     return ($host,$additional);
150     }
151    
152     # @patternfiles = VGWhoIs::Core::getpatternfiles()
153     sub VGWhoIs::Core::getpatternfiles {
154     my (@files, @files_new);
155    
156     opendir(DIR, $VGWhoIs::Core::confdir);
157     @files_new = sort(readdir(DIR));
158     closedir(DIR);
159    
160     @files_new = grep {
161     ($_ !~ /^\./)
162     } @files_new;
163     @files_new = map { "$VGWhoIs::Core::confdir$_" } @files_new;
164    
165     @files = grep { -f } (@files_new);
166    
167     return (@files);
168     }
169    
170     # ($method, $host, $additional) = VGWhoIs::Core::getmethodother($query);
171     sub VGWhoIs::Core::getmethodother {
172     my ($query) = @_;
173     my $found = 0;
174     my ($line,$cline,$method,$host,$additional);
175     my ($rang_prefix, $rang_beginning, $rang_ending);
176     my ($rang_actual_prefix, $rang_number);
177    
178     # Process file until we found a match
179     foreach my $patternfile (VGWhoIs::Core::getpatternfiles()) {
180     open(PATTERN,"<$patternfile") || die "Cannot open $patternfile. STOP.\n";
181    
182     while ( defined($line = <PATTERN>) && (!$found) ) {
183     # chomp $line;
184     $line = VGWhoIs::Utils::trim($line);
185    
186     if ( $line =~ /^#/ ) { # comment
187     } elsif ( ($cline) = $line =~ /^:(.*)$/ ) { # method declaration
188     ($method,$host,$additional) = split(/\|/,$cline,3);
189     $method='' if !defined $method;
190     $host='' if !defined $host;
191     $additional='' if !defined $additional;
192    
193     } elsif ( $line =~ /^\*/ && (($rang_actual_prefix, $rang_number) = $query =~ /^([^0-9]+)([0-9]+)$/) ) {
194     # e.g. for parsing ASNs
195    
196     if (($rang_prefix, $rang_beginning) = $line =~ /^\*([^0-9]+):([0-9]+)$/) {
197     # Single number
198     $rang_ending = $rang_beginning
199     } else {
200     # Range
201     ($rang_prefix, $rang_beginning, $rang_ending) = $line =~ /^\*([^0-9]+):([0-9]+)-([0-9]+)$/;
202     next if !defined $rang_prefix;
203     next if !defined $rang_beginning;
204     next if !defined $rang_ending;
205     }
206    
207     if ((lc($rang_prefix) eq lc($rang_actual_prefix))
208     && ($rang_number >= $rang_beginning)
209     && ($rang_number <= $rang_ending)) {
210     $found = 1;
211     # ($host,$additional) = VGWhoIs::Core::methodpatternregex($query,$host,$additional,$line);
212     }
213     } elsif ( $line ne '' && $line =~ /^[^\*]/ && $query =~ /$line/i ) {
214     # Regex
215     $found = 1;
216     ($host,$additional) = VGWhoIs::Core::methodpatternregex($query,$host,$additional,$line);
217     }
218     }
219     }
220     if (!$found) {
221     return ('','','')
222     }
223    
224     $host = $VGWhoIs::Core::mirror{$method.$host} if defined $VGWhoIs::Core::mirror{$method.$host};
225     return ($method,$host,$additional);
226     }
227    
228     # ($resulttext, $exitcode) = VGWhoIs::Core::redirectwhois($query,$host,$port)
229     sub VGWhoIs::Core::redirectwhois {
230     my ($query,$host,$port) = @_; # todo: anstelle $port lieber ein $additional zulassen?
231     $port = 43 if !defined $port;
232    
233     # check for query modifier (if any)
234     my ($modmethod, $modhost, $modadditional) = VGWhoIs::Core::getmethodother("redirect:$host(:$port){0,1}");
235    
236     return VGWhoIs::Core::doquery($query,$modmethod,$modhost,$modadditional)
237     if ( $modmethod ne 'none');
238    
239     return VGWhoIs::Core::doquery($query, 'whois', "$host:$port");
240     }
241    
242     # ($resulttext, $exitcode) = VGWhoIs::Core::doquery($query,$method,$host,$additional);
243     sub VGWhoIs::Core::doquery {
244     my ($query,$method,$host,$additional,$inside_multiple) = @_;
245     my $result = '';
246     my $exitcode = 0;
247    
248     $query = '' if !defined $query;
249     $method = '' if !defined $method;
250     $host = '' if !defined $host;
251     $additional = '' if !defined $additional;
252     $inside_multiple = 0 if !defined $inside_multiple;
253    
254     if ($method eq 'multiple') {
255     my $triple;
256     # do not match "::::", e.g. used by notice
257     my @triple_split = split(/(?<!:):::(?!:)/, $additional);
258     my $count = 0;
259     foreach $triple (@triple_split) {
260     ($method,$host,$additional) = split(/::/, $triple);
261    
262     # We will not get the exact sequence of "prints" and "$result" outputs, but it is better than nothing.
263     # If we would print everything, we would get the warning "print wide char" at nic.es
264     # If we would save all output to $result without buffering the prints inside VGWhoIs::Core::doquery(), the prints would not be in front of their section.
265     my $output = '';
266     open TOOUTPUT, '>', \$output or die "Can't open TOOUTPUT: $!"; # TODO: exitcode
267     my $orig_select = select(TOOUTPUT);
268    
269     my ($loc_text, $loc_exitcode) = VGWhoIs::Core::doquery($query, $method, $host, $additional, 1);
270     $exitcode = max($exitcode, $loc_exitcode);
271    
272     $output .= VGWhoIs::Utils::trim($loc_text);
273     $output .= "\n\n------\n\n" if $count < $#triple_split;
274     select($orig_select);
275     $result .= $output;
276    
277     $count += 1;
278     }
279    
280     # done
281     $method = '';
282     }
283    
284     # TODO: usage of methods. delete unused ones!
285     # wwwgreplv -> removed
286     # whoisjp: not in pattern
287     # whoisarin: not in pattern
288     # inicwhois: in use
289    
290     elsif ($method eq 'wwwsgrep') {
291     my ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($host);
292    
293     print "Querying $hostname with $protocol.\n\n";
294    
295     my ($loc_text, $loc_exitcode) = VGWhoIs::Core::wwwsgrep($host,$additional);
296     $exitcode = max($exitcode, $loc_exitcode);
297     if ($loc_exitcode) {
298     $result .= "Query to web server failed.\n";
299     } else {
300     if ($loc_text ne '') {
301     $result = "Match found:\n$loc_text\n";
302     } else {
303     $result = "No match found. This probably means that this domain does not exist.\n";
304     }
305     }
306     }
307    
308     elsif ($method =~ /^whois(|jp|arin)$/) {
309     my ($parameter,$outquery,$prefix) = ('', '', '');
310    
311     my $port = 43;
312     my $noipprefix = '';
313     my $ipprefix = '';
314     my $trailer = '';
315     my $strip = '';
316    
317     $additional = '' if !defined $additional;
318    
319     foreach $parameter (split('\|', $additional)) {
320     $trailer = $1 if ( $parameter =~ /trailer=(.*)/ );
321     $strip = $1 if ( $parameter =~ /strip=(.*)/ );
322     $prefix = $1 if ( $parameter =~ /prefix=(.*)/ );
323     }
324    
325     $port = $1 if ( $host =~ /.+:(\d+)/ );
326     $host =~ s/:(\d+)//g;
327    
328     print "Querying $host:$port with whois.\n"; # todo "rwhois"?
329    
330     $outquery = $prefix . $query . $trailer . "\n";
331     $outquery =~ s/$strip//g if ( $strip ne '' );
332    
333     my $loc_exitcode;
334     ($result, $loc_exitcode) = VGWhoIs::Core::whoisaccess($host,$port,$outquery);
335     $exitcode = max($exitcode, $loc_exitcode);
336    
337     # TODO rwhois:// implementierung ok?
338     if ( $result =~ /ReferralServer: whois:\/\/(.*):43/mi || $result =~ /ReferralServer: whois:\/\/(.*)/mi ) {
339     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,$1);
340     $host = ''; #TODO???
341     $exitcode = max($exitcode, $loc_exitcode);
342     } elsif ( $result =~ /ReferralServer: r{0,1}whois:\/\/([^:]*):(\d+)/mi ) {
343     # ($result, $loc_exitcode) = VGWhoIs::Core::whoisaccess($1,$2,$query); # TODO rediretwhois ?
344     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,$1,$2);
345     $exitcode = max($exitcode, $loc_exitcode);
346     } elsif ( $result =~ /ReferralServer: rwhois:\/\/(.*)/mi ) {
347     # ($result, $loc_exitcode) = VGWhoIs::Core::whoisaccess($1,4321,$query); # TODO rediretwhois ?
348     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,$1,4321);
349     $exitcode = max($exitcode, $loc_exitcode);
350     } elsif ( $result =~ /(refer|whois server):\s+(.*)/m ) {
351     # "refer:" is sent by whois.iana.org (e.g. if you query test.de )
352     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,$2);
353     $host = ''; #TODO???
354     $exitcode = max($exitcode, $loc_exitcode);
355     }
356    
357     # TODO: http://tools.ietf.org/html/rfc1714#section-3.3.2
358     # %referral<SP><server>[:type]<SP>[authority area]
359    
360     print "\n";
361     }
362    
363     elsif ($method eq 'inicwhois' ) {
364     my $port = $additional || 43;
365     $result = ($VGWhoIs::Core::step++).". Step: Querying $host:$port with whois.\n\n"; #todo "rwhois"?
366     $query .= "\n"; # ???
367    
368     my ($loc_text, $loc_exitcode) = VGWhoIs::Core::inicwhoisaccess($host,$port,$query);
369     $result .= $loc_text;
370     $exitcode = max($exitcode, $loc_exitcode);
371     }
372    
373     elsif ($method eq 'cgi') {
374     my ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($host);
375    
376     print "Querying $hostname ($protocol) with cgi.\n\n";
377     #!!
378     # print "$host\n";
379    
380     # TODO: lynx seems to be better in some ways!
381     # For example, a website that outputs "text/plain" will be rendered correct in lynx!
382     # $result = `lynx -connect_timeout=10 -dump "$host" 2>&1`;
383     # $result .= "FAILED with exit code $?\n\n" if $?;
384    
385     # TODO: VGWhoIs::Core::getsource ok? war vorher IMMER lynx
386     my ($loc_text, $loc_exitcode) = VGWhoIs::Core::getsource($host);
387    
388     $exitcode = max($exitcode, $loc_exitcode);
389     if ($loc_exitcode) {
390     $result .= "Query to web server failed.\n";
391     } else {
392     $result = VGWhoIs::Utils::render_html($loc_text);
393     }
394     }
395    
396     elsif ($method eq 'cgipost') {
397     my ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($host);
398    
399     print "Querying $hostname ($protocol) with cgi.\n\n";
400     #!!
401     # print "echo -e '$additional\n---' | lynx -connect_timeout=10 -dump -post_data '$host'\n";
402    
403     # TODO: VGWhoIs::Utils::render_html() better? TODO: lynx source?
404     # [Ma 22.07.2013] "echo -e" does not work... "-e" will shown to the output... However "\n" will still work if I remove -e ... weird.
405     # $result = `echo -e "$additional\n---" | lynx -dump -post_data "$host" 2>&1`; # TODO escape
406     # $result = `echo "$additional\n---" | lynx -dump -post_data "$host" 2>&1`; # TODO escape
407     $result = `echo "$additional" | curl --silent -X POST --data-binary \@- "$host" | lynx -dump -stdin 2>&1`; # TODO escape
408     my $loc_exitcode = $?;
409     $exitcode = max($exitcode, $loc_exitcode);
410     $result .= "FAILED with exit code $loc_exitcode\n\n" if $loc_exitcode;
411     }
412    
413     elsif ($method eq 'cgipostcurl') {
414     my ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($host);
415    
416     print "Querying $hostname ($protocol) with cgi.\n\n";
417     # print "$additional\n"; #!!
418     # print "curl --max-time 10 --stderr /dev/null -e $host --data '$additional' $host | lynx -dump -stdin\n";
419    
420     # TODO: "set pipefail" doesn't work (insecure certificate will not cause the function to fail)
421     $result = `curl --max-time 10 --insecure --stderr /dev/null -e "$host" --data "$additional" "$host" | lynx -dump -stdin 2>&1`; # TODO escape
422    
423     my $loc_exitcode = $?;
424     $exitcode = max($exitcode, $loc_exitcode);
425     $result .= "FAILED with exit code $loc_exitcode\n\n" if $loc_exitcode;
426     }
427    
428     elsif ($method eq 'cgihttps') {
429     my ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($host);
430    
431     print "Querying $hostname ($protocol) with cgi.\n\n";
432     # print "$additional\n"; #!!
433     # print "curl --max-time 10 --stderr /dev/null $host | lynx -dump -stdin\n";
434     # $result = `curl --max-time 10 --insecure --stderr /dev/null "$host" | lynx -dump -stdin 2>&1`;
435     my $html = `curl --max-time 10 --insecure --stderr /dev/null "$host" 2>&1`; # TODO escape. why --insecure?
436     my $loc_exitcode = $?;
437     $exitcode = max($exitcode, $loc_exitcode);
438     $html .= "FAILED with exit code $loc_exitcode\n\n" if $loc_exitcode;
439     $result = VGWhoIs::Utils::render_html($html);
440     }
441    
442     elsif ($method eq 'notice') {
443     if ($inside_multiple) {
444     $result = "\n\nAdditional information for query '$query'.\n\n" . $additional . "\n\n";
445     } else {
446     $result = "\n\nNo lookup service available for your query '$query'.\n\nvgwhois remarks: " . $additional . "\n\n";
447     }
448     # $exitcode = 0;
449     }
450    
451     elsif ($method eq 'program') {
452     my ($program) = VGWhoIs::Utils::trim($host);
453     $program =~ s/\$vgwhois\$/$FindBin::RealBin/;
454     print "Querying script $program\n\n";
455     $result = `$program $additional "$query" 2>&1`;
456     my $loc_exitcode = $?;
457     $exitcode = max($exitcode, $loc_exitcode);
458     $result .= "FAILED with exit code $loc_exitcode\n\n" if $loc_exitcode;
459     }
460    
461     if ($host =~ /arin/) {
462     my $loc_exitcode;
463     if ($result =~ /Maintainer: RIPE/) {
464     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,'whois.ripe.net');
465     $exitcode = max($exitcode, $loc_exitcode);
466     } elsif ($result =~ /Maintainer: AP/) {
467     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,'whois.apnic.net');
468     $exitcode = max($exitcode, $loc_exitcode);
469     }
470     }
471     elsif ($host =~ /apnic/) {
472     my $loc_exitcode;
473     if ($result =~ /netname: AUNIC-AU/) {
474     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,'whois.aunic.net');
475     $exitcode = max($exitcode, $loc_exitcode);
476     } elsif ($result =~ /netname: JPNIC-JP/) {
477     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,'whois.nic.ad.jp');
478     $exitcode = max($exitcode, $loc_exitcode);
479     }
480     }
481     elsif ($host =~ /ripe/ && $result =~ /remarks:\s+whois -h (\S+)/) {
482     my $loc_exitcode;
483     ($result, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,$1);
484     $exitcode = max($exitcode, $loc_exitcode);
485     }
486     # TODO: internic gibts doch gar nicht mehr, oder?
487     elsif (($host =~ /internic/) && ($result =~ /No match for/) && ($query !~ /\.(arpa|com|edu|net|org)$/) ) {
488     my ($result1, $loc_exitcode) = VGWhoIs::Core::redirectwhois($query,'whois.ripe.net');
489     $result = $result1 if $result1 !~ /No entries found/;
490     $exitcode = max($exitcode, $loc_exitcode);
491     }
492    
493     return ($result, $exitcode);
494     }
495    
496     1;