Rev 17 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
12 | daniel-mar | 1 | # |
2 | # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois) |
||
3 | # Main program |
||
4 | # |
||
5 | # (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
||
6 | # based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de> |
||
7 | # Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de> |
||
8 | # |
||
9 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
||
10 | # |
||
11 | |||
12 | package VGWhoIs::Utils; |
||
13 | |||
14 | use warnings; |
||
15 | use strict; |
||
16 | |||
17 | use Encode; |
||
18 | |||
19 | # $result = VGWhoIs::Utils::lynxsource($url) |
||
20 | sub VGWhoIs::Utils::lynxsource { |
||
21 | my ($url) = @_; |
||
22 | $url = quotemeta($url); |
||
23 | # LYNX sometimes hangs in combination with TOR |
||
24 | # return qx{lynx -connect_timeout=10 -source $url}; |
||
25 | return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url}; |
||
26 | } |
||
27 | |||
28 | # $line = htmlpre($line); |
||
29 | sub VGWhoIs::Utils::htmlpre { |
||
30 | my ($line) = @_; |
||
31 | $line =~ s|\n|<br>|g; |
||
32 | $line =~ s| | |g; |
||
33 | return $line; |
||
34 | } |
||
35 | |||
36 | # $rendered = VGWhoIs::Utils::render_html($html); |
||
37 | sub VGWhoIs::Utils::render_html { |
||
38 | my ($html) = @_; |
||
39 | |||
40 | return '' if !defined $html; |
||
41 | |||
42 | $html =~ s|<!--.*?-->||gsi; |
||
43 | |||
44 | $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei; |
||
45 | $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei; |
||
46 | |||
47 | $html =~ s|\n| |g; |
||
48 | |||
49 | $html =~ s|<p\s*/{0,1}\s*>|\n|gsi; |
||
50 | $html =~ s|<p\s.*?>|\n|gsi; |
||
51 | |||
52 | $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi; |
||
53 | $html =~ s|<tr\s.*?>|\n|gsi; |
||
54 | $html =~ s|<td>| |gsi; |
||
55 | |||
56 | $html =~ s|<script.*?</script>||gsi; |
||
57 | $html =~ s|<style.*?</style>||gsi; |
||
58 | |||
59 | $html =~ s| \t| |gsi; |
||
60 | $html =~ s|\s*\n\s*\n|\n|gsi; |
||
61 | $html =~ s|^\s*||gm; |
||
62 | |||
63 | $html =~ s| | |gsi; |
||
64 | $html =~ s|<br\s*/{0,1}\s*>|\n|gsi; |
||
65 | $html =~ s|<br\s.*?>|\n|gsi; |
||
66 | $html =~ s|\<.*?\>||gsi; |
||
67 | |||
68 | return($html); |
||
69 | } |
||
70 | |||
71 | |||
72 | sub VGWhoIs::Utils::trim($) { |
||
73 | # Source: http://www.somacon.com/p114.php |
||
74 | my $string = shift; |
||
75 | $string =~ s/^\s+//; |
||
76 | $string =~ s/\s+$//; |
||
77 | return $string; # TODO: ein push faende ich besser |
||
78 | } |
||
79 | |||
80 | sub VGWhoIs::Utils::is_uc($) { |
||
81 | my $str = shift; |
||
82 | |||
83 | my $char; |
||
84 | foreach $char (split //, $str) { |
||
85 | return 1 if (ord($char) > 255); |
||
86 | } |
||
87 | |||
88 | return 0; |
||
89 | } |
||
90 | |||
91 | sub VGWhoIs::Utils::is_ascii($) { |
||
92 | my $str = shift; |
||
93 | |||
94 | my $char; |
||
95 | foreach $char (split //, $str) { |
||
96 | return 0 if (ord($char) >= 128); |
||
97 | } |
||
98 | |||
99 | return 1; |
||
100 | } |
||
101 | |||
102 | sub VGWhoIs::Utils::is_utf8($) { |
||
103 | my $str = shift; |
||
104 | |||
105 | my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) }; |
||
106 | return defined($s); |
||
107 | |||
108 | # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not... |
||
109 | # return 1 if utf8::VGWhoIs::Utils::is_utf8($str); |
||
110 | # return 0 if VGWhoIs::Utils::is_uc($str); |
||
111 | # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8"); |
||
112 | # return utf8::valid($str); |
||
113 | } |
||
114 | |||
115 | sub VGWhoIs::Utils::enforce_utf8($) { |
||
116 | my $str = shift; |
||
117 | |||
118 | if (VGWhoIs::Utils::is_uc($str)) { |
||
119 | $str =~ s/^\x{FEFF}//; |
||
120 | utf8::encode($str); |
||
121 | } |
||
122 | elsif (!VGWhoIs::Utils::is_utf8($str)) { |
||
123 | $str =~ s/^\xEF\xBB\xBF//; |
||
124 | utf8::encode($str); |
||
125 | } |
||
126 | |||
127 | return $str; |
||
128 | } |
||
129 | |||
130 | # ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url) |
||
131 | sub VGWhoIs::Utils::splitProtocolHost($) { |
||
132 | my $url = shift; |
||
133 | |||
134 | my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/; |
||
135 | |||
136 | return ($protocol, $hostname); |
||
137 | } |
||
138 | |||
139 | 1; |
||
140 |