Subversion Repositories vgwhois

Rev

Rev 17 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12 daniel-mar 1
#
2
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
3
#  Main program
4
#
92 daniel-mar 5
#  (c) 2010-2022 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
12 daniel-mar 6
#  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
7
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
8
#
9
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
10
#
11
 
12
package VGWhoIs::Utils;
13
 
14
use warnings;
15
use strict;
16
 
17
use Encode;
18
 
19
# $result = VGWhoIs::Utils::lynxsource($url)
20
sub VGWhoIs::Utils::lynxsource {
21
        my ($url) = @_;
22
        $url = quotemeta($url);
23
# LYNX sometimes hangs in combination with TOR
24
#       return qx{lynx -connect_timeout=10 -source $url};
92 daniel-mar 25
        return qx{curl --insecure --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" --silent --max-time 10 $url};
12 daniel-mar 26
}
27
 
92 daniel-mar 28
sub VGWhoIs::Utils::lynxrender {
29
        my ($url) = @_;
30
        $url = quotemeta($url);
31
 
32
        use File::Basename;
33
        my $script_dir = undef;
34
        if(-l __FILE__) {
35
                $script_dir = dirname(readlink(__FILE__));
36
        } else {
37
                $script_dir = dirname(__FILE__);
38
        }
39
        $script_dir = quotemeta($script_dir);
40
 
41
        my $result = qx{lynx -cfg $script_dir/../lynx.cfg -dump -connect_timeout=10 $url 2>&1};
42
        $result .= "FAILED with exit code $?\n\n" if $?;
43
        return $result;
44
}
45
 
12 daniel-mar 46
# $line = htmlpre($line);
47
sub VGWhoIs::Utils::htmlpre {
48
        my ($line) = @_;
49
        $line =~ s|\n|<br>|g;
50
        $line =~ s| |&nbsp;|g;
51
        return $line;
52
}
53
 
54
# $rendered = VGWhoIs::Utils::render_html($html);
55
sub VGWhoIs::Utils::render_html {
56
        my ($html) = @_;
57
 
58
        return '' if !defined $html;
59
 
60
        $html =~ s|<!--.*?-->||gsi;
61
 
62
        $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
63
        $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
64
 
17 daniel-mar 65
        #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!!
12 daniel-mar 66
        $html =~ s|\n| |g;
67
 
68
        $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
69
        $html =~ s|<p\s.*?>|\n|gsi;
70
 
71
        $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
72
        $html =~ s|<tr\s.*?>|\n|gsi;
73
        $html =~ s|<td>| |gsi;
74
 
75
        $html =~ s|<script.*?</script>||gsi;
76
        $html =~ s|<style.*?</style>||gsi;
77
 
78
        $html =~ s| \t| |gsi;
79
        $html =~ s|\s*\n\s*\n|\n|gsi;
80
        $html =~ s|^\s*||gm;
81
 
82
        $html =~ s|&nbsp;| |gsi;
83
        $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
84
        $html =~ s|<br\s.*?>|\n|gsi;
85
        $html =~ s|\<.*?\>||gsi;
86
 
87
        return($html);
88
}
89
 
90
 
91
sub VGWhoIs::Utils::trim($) {
92
        # Source: http://www.somacon.com/p114.php
93
        my $string = shift;
94
        $string =~ s/^\s+//;
95
        $string =~ s/\s+$//;
96
        return $string; # TODO: ein push faende ich besser
97
}
98
 
99
sub VGWhoIs::Utils::is_uc($) {
100
        my $str = shift;
101
 
102
        my $char;
103
        foreach $char (split //, $str) {
104
                return 1 if (ord($char) > 255);
105
        }
106
 
107
        return 0;
108
}
109
 
110
sub VGWhoIs::Utils::is_ascii($) {
111
        my $str = shift;
112
 
113
        my $char;
114
        foreach $char (split //, $str) {
115
                return 0 if (ord($char) >= 128);
116
        }
117
 
118
        return 1;
119
}
120
 
121
sub VGWhoIs::Utils::is_utf8($) {
122
        my $str = shift;
123
 
124
        my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
125
        return defined($s);
126
 
127
        # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
128
        # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
129
        # return 0 if VGWhoIs::Utils::is_uc($str);
130
        # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
131
        # return utf8::valid($str);
132
}
133
 
134
sub VGWhoIs::Utils::enforce_utf8($) {
135
        my $str = shift;
136
 
137
        if (VGWhoIs::Utils::is_uc($str)) {
138
                $str =~ s/^\x{FEFF}//;
139
                utf8::encode($str);
140
        }
141
        elsif (!VGWhoIs::Utils::is_utf8($str)) {
142
                $str =~ s/^\xEF\xBB\xBF//;
143
                utf8::encode($str);
144
        }
145
 
146
        return $str;
147
}
148
 
149
# ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
150
sub VGWhoIs::Utils::splitProtocolHost($) {
151
        my $url = shift;
152
 
153
        my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
154
 
155
        return ($protocol, $hostname);
156
}
157
 
158
1;
159