Rev 17 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 17 | Rev 92 | ||
---|---|---|---|
1 | # |
1 | # |
2 | # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois) |
2 | # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois) |
3 | # Main program |
3 | # Main program |
4 | # |
4 | # |
5 | # (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
5 | # (c) 2010-2022 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
6 | # based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de> |
6 | # based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de> |
7 | # Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de> |
7 | # Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de> |
8 | # |
8 | # |
9 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
9 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
10 | # |
10 | # |
11 | 11 | ||
12 | package VGWhoIs::Utils; |
12 | package VGWhoIs::Utils; |
13 | 13 | ||
14 | use warnings; |
14 | use warnings; |
15 | use strict; |
15 | use strict; |
16 | 16 | ||
17 | use Encode; |
17 | use Encode; |
18 | 18 | ||
19 | # $result = VGWhoIs::Utils::lynxsource($url) |
19 | # $result = VGWhoIs::Utils::lynxsource($url) |
20 | sub VGWhoIs::Utils::lynxsource { |
20 | sub VGWhoIs::Utils::lynxsource { |
21 | my ($url) = @_; |
21 | my ($url) = @_; |
22 | $url = quotemeta($url); |
22 | $url = quotemeta($url); |
23 | # LYNX sometimes hangs in combination with TOR |
23 | # LYNX sometimes hangs in combination with TOR |
24 | # return qx{lynx -connect_timeout=10 -source $url}; |
24 | # return qx{lynx -connect_timeout=10 -source $url}; |
25 | return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url}; |
25 | return qx{curl --insecure --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" --silent --max-time 10 $url}; |
- | 26 | } |
|
- | 27 | ||
- | 28 | sub VGWhoIs::Utils::lynxrender { |
|
- | 29 | my ($url) = @_; |
|
- | 30 | $url = quotemeta($url); |
|
- | 31 | ||
- | 32 | use File::Basename; |
|
- | 33 | my $script_dir = undef; |
|
- | 34 | if(-l __FILE__) { |
|
- | 35 | $script_dir = dirname(readlink(__FILE__)); |
|
- | 36 | } else { |
|
- | 37 | $script_dir = dirname(__FILE__); |
|
- | 38 | } |
|
- | 39 | $script_dir = quotemeta($script_dir); |
|
- | 40 | ||
- | 41 | my $result = qx{lynx -cfg $script_dir/../lynx.cfg -dump -connect_timeout=10 $url 2>&1}; |
|
- | 42 | $result .= "FAILED with exit code $?\n\n" if $?; |
|
- | 43 | return $result; |
|
26 | } |
44 | } |
27 | 45 | ||
28 | # $line = htmlpre($line); |
46 | # $line = htmlpre($line); |
29 | sub VGWhoIs::Utils::htmlpre { |
47 | sub VGWhoIs::Utils::htmlpre { |
30 | my ($line) = @_; |
48 | my ($line) = @_; |
31 | $line =~ s|\n|<br>|g; |
49 | $line =~ s|\n|<br>|g; |
32 | $line =~ s| | |g; |
50 | $line =~ s| | |g; |
33 | return $line; |
51 | return $line; |
34 | } |
52 | } |
35 | 53 | ||
36 | # $rendered = VGWhoIs::Utils::render_html($html); |
54 | # $rendered = VGWhoIs::Utils::render_html($html); |
37 | sub VGWhoIs::Utils::render_html { |
55 | sub VGWhoIs::Utils::render_html { |
38 | my ($html) = @_; |
56 | my ($html) = @_; |
39 | 57 | ||
40 | return '' if !defined $html; |
58 | return '' if !defined $html; |
41 | 59 | ||
42 | $html =~ s|<!--.*?-->||gsi; |
60 | $html =~ s|<!--.*?-->||gsi; |
43 | 61 | ||
44 | $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei; |
62 | $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei; |
45 | $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei; |
63 | $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei; |
46 | 64 | ||
47 | #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!! |
65 | #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!! |
48 | $html =~ s|\n| |g; |
66 | $html =~ s|\n| |g; |
49 | 67 | ||
50 | $html =~ s|<p\s*/{0,1}\s*>|\n|gsi; |
68 | $html =~ s|<p\s*/{0,1}\s*>|\n|gsi; |
51 | $html =~ s|<p\s.*?>|\n|gsi; |
69 | $html =~ s|<p\s.*?>|\n|gsi; |
52 | 70 | ||
53 | $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi; |
71 | $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi; |
54 | $html =~ s|<tr\s.*?>|\n|gsi; |
72 | $html =~ s|<tr\s.*?>|\n|gsi; |
55 | $html =~ s|<td>| |gsi; |
73 | $html =~ s|<td>| |gsi; |
56 | 74 | ||
57 | $html =~ s|<script.*?</script>||gsi; |
75 | $html =~ s|<script.*?</script>||gsi; |
58 | $html =~ s|<style.*?</style>||gsi; |
76 | $html =~ s|<style.*?</style>||gsi; |
59 | 77 | ||
60 | $html =~ s| \t| |gsi; |
78 | $html =~ s| \t| |gsi; |
61 | $html =~ s|\s*\n\s*\n|\n|gsi; |
79 | $html =~ s|\s*\n\s*\n|\n|gsi; |
62 | $html =~ s|^\s*||gm; |
80 | $html =~ s|^\s*||gm; |
63 | 81 | ||
64 | $html =~ s| | |gsi; |
82 | $html =~ s| | |gsi; |
65 | $html =~ s|<br\s*/{0,1}\s*>|\n|gsi; |
83 | $html =~ s|<br\s*/{0,1}\s*>|\n|gsi; |
66 | $html =~ s|<br\s.*?>|\n|gsi; |
84 | $html =~ s|<br\s.*?>|\n|gsi; |
67 | $html =~ s|\<.*?\>||gsi; |
85 | $html =~ s|\<.*?\>||gsi; |
68 | 86 | ||
69 | return($html); |
87 | return($html); |
70 | } |
88 | } |
71 | 89 | ||
72 | 90 | ||
73 | sub VGWhoIs::Utils::trim($) { |
91 | sub VGWhoIs::Utils::trim($) { |
74 | # Source: http://www.somacon.com/p114.php |
92 | # Source: http://www.somacon.com/p114.php |
75 | my $string = shift; |
93 | my $string = shift; |
76 | $string =~ s/^\s+//; |
94 | $string =~ s/^\s+//; |
77 | $string =~ s/\s+$//; |
95 | $string =~ s/\s+$//; |
78 | return $string; # TODO: ein push faende ich besser |
96 | return $string; # TODO: ein push faende ich besser |
79 | } |
97 | } |
80 | 98 | ||
81 | sub VGWhoIs::Utils::is_uc($) { |
99 | sub VGWhoIs::Utils::is_uc($) { |
82 | my $str = shift; |
100 | my $str = shift; |
83 | 101 | ||
84 | my $char; |
102 | my $char; |
85 | foreach $char (split //, $str) { |
103 | foreach $char (split //, $str) { |
86 | return 1 if (ord($char) > 255); |
104 | return 1 if (ord($char) > 255); |
87 | } |
105 | } |
88 | 106 | ||
89 | return 0; |
107 | return 0; |
90 | } |
108 | } |
91 | 109 | ||
92 | sub VGWhoIs::Utils::is_ascii($) { |
110 | sub VGWhoIs::Utils::is_ascii($) { |
93 | my $str = shift; |
111 | my $str = shift; |
94 | 112 | ||
95 | my $char; |
113 | my $char; |
96 | foreach $char (split //, $str) { |
114 | foreach $char (split //, $str) { |
97 | return 0 if (ord($char) >= 128); |
115 | return 0 if (ord($char) >= 128); |
98 | } |
116 | } |
99 | 117 | ||
100 | return 1; |
118 | return 1; |
101 | } |
119 | } |
102 | 120 | ||
103 | sub VGWhoIs::Utils::is_utf8($) { |
121 | sub VGWhoIs::Utils::is_utf8($) { |
104 | my $str = shift; |
122 | my $str = shift; |
105 | 123 | ||
106 | my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) }; |
124 | my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) }; |
107 | return defined($s); |
125 | return defined($s); |
108 | 126 | ||
109 | # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not... |
127 | # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not... |
110 | # return 1 if utf8::VGWhoIs::Utils::is_utf8($str); |
128 | # return 1 if utf8::VGWhoIs::Utils::is_utf8($str); |
111 | # return 0 if VGWhoIs::Utils::is_uc($str); |
129 | # return 0 if VGWhoIs::Utils::is_uc($str); |
112 | # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8"); |
130 | # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8"); |
113 | # return utf8::valid($str); |
131 | # return utf8::valid($str); |
114 | } |
132 | } |
115 | 133 | ||
116 | sub VGWhoIs::Utils::enforce_utf8($) { |
134 | sub VGWhoIs::Utils::enforce_utf8($) { |
117 | my $str = shift; |
135 | my $str = shift; |
118 | 136 | ||
119 | if (VGWhoIs::Utils::is_uc($str)) { |
137 | if (VGWhoIs::Utils::is_uc($str)) { |
120 | $str =~ s/^\x{FEFF}//; |
138 | $str =~ s/^\x{FEFF}//; |
121 | utf8::encode($str); |
139 | utf8::encode($str); |
122 | } |
140 | } |
123 | elsif (!VGWhoIs::Utils::is_utf8($str)) { |
141 | elsif (!VGWhoIs::Utils::is_utf8($str)) { |
124 | $str =~ s/^\xEF\xBB\xBF//; |
142 | $str =~ s/^\xEF\xBB\xBF//; |
125 | utf8::encode($str); |
143 | utf8::encode($str); |
126 | } |
144 | } |
127 | 145 | ||
128 | return $str; |
146 | return $str; |
129 | } |
147 | } |
130 | 148 | ||
131 | # ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url) |
149 | # ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url) |
132 | sub VGWhoIs::Utils::splitProtocolHost($) { |
150 | sub VGWhoIs::Utils::splitProtocolHost($) { |
133 | my $url = shift; |
151 | my $url = shift; |
134 | 152 | ||
135 | my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/; |
153 | my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/; |
136 | 154 | ||
137 | return ($protocol, $hostname); |
155 | return ($protocol, $hostname); |
138 | } |
156 | } |
139 | 157 | ||
140 | 1; |
158 | 1; |
141 | 159 | ||
142 | 160 |