Subversion Repositories vgwhois

Rev

Rev 17 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 17 Rev 92
1
#
1
#
2
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
2
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
3
#  Main program
3
#  Main program
4
#
4
#
5
#  (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
5
#  (c) 2010-2022 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
6
#  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
6
#  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
7
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
7
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
8
#
8
#
9
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
9
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
10
#
10
#
11
 
11
 
12
package VGWhoIs::Utils;
12
package VGWhoIs::Utils;
13
 
13
 
14
use warnings;
14
use warnings;
15
use strict;
15
use strict;
16
 
16
 
17
use Encode;
17
use Encode;
18
 
18
 
19
# $result = VGWhoIs::Utils::lynxsource($url)
19
# $result = VGWhoIs::Utils::lynxsource($url)
20
sub VGWhoIs::Utils::lynxsource {
20
sub VGWhoIs::Utils::lynxsource {
21
        my ($url) = @_;
21
        my ($url) = @_;
22
        $url = quotemeta($url);
22
        $url = quotemeta($url);
23
# LYNX sometimes hangs in combination with TOR
23
# LYNX sometimes hangs in combination with TOR
24
#       return qx{lynx -connect_timeout=10 -source $url};
24
#       return qx{lynx -connect_timeout=10 -source $url};
25
        return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url};
25
        return qx{curl --insecure --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" --silent --max-time 10 $url};
-
 
26
}
-
 
27
 
-
 
28
sub VGWhoIs::Utils::lynxrender {
-
 
29
        my ($url) = @_;
-
 
30
        $url = quotemeta($url);
-
 
31
 
-
 
32
        use File::Basename;
-
 
33
        my $script_dir = undef;
-
 
34
        if(-l __FILE__) {
-
 
35
                $script_dir = dirname(readlink(__FILE__));
-
 
36
        } else {
-
 
37
                $script_dir = dirname(__FILE__);
-
 
38
        }
-
 
39
        $script_dir = quotemeta($script_dir);
-
 
40
 
-
 
41
        my $result = qx{lynx -cfg $script_dir/../lynx.cfg -dump -connect_timeout=10 $url 2>&1};
-
 
42
        $result .= "FAILED with exit code $?\n\n" if $?;
-
 
43
        return $result;
26
}
44
}
27
 
45
 
28
# $line = htmlpre($line);
46
# $line = htmlpre($line);
29
sub VGWhoIs::Utils::htmlpre {
47
sub VGWhoIs::Utils::htmlpre {
30
        my ($line) = @_;
48
        my ($line) = @_;
31
        $line =~ s|\n|<br>|g;
49
        $line =~ s|\n|<br>|g;
32
        $line =~ s| |&nbsp;|g;
50
        $line =~ s| |&nbsp;|g;
33
        return $line;
51
        return $line;
34
}
52
}
35
 
53
 
36
# $rendered = VGWhoIs::Utils::render_html($html);
54
# $rendered = VGWhoIs::Utils::render_html($html);
37
sub VGWhoIs::Utils::render_html {
55
sub VGWhoIs::Utils::render_html {
38
        my ($html) = @_;
56
        my ($html) = @_;
39
 
57
 
40
        return '' if !defined $html;
58
        return '' if !defined $html;
41
 
59
 
42
        $html =~ s|<!--.*?-->||gsi;
60
        $html =~ s|<!--.*?-->||gsi;
43
 
61
 
44
        $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
62
        $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
45
        $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
63
        $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
46
 
64
 
47
        #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!!
65
        #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!!
48
        $html =~ s|\n| |g;
66
        $html =~ s|\n| |g;
49
 
67
 
50
        $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
68
        $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
51
        $html =~ s|<p\s.*?>|\n|gsi;
69
        $html =~ s|<p\s.*?>|\n|gsi;
52
 
70
 
53
        $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
71
        $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
54
        $html =~ s|<tr\s.*?>|\n|gsi;
72
        $html =~ s|<tr\s.*?>|\n|gsi;
55
        $html =~ s|<td>| |gsi;
73
        $html =~ s|<td>| |gsi;
56
 
74
 
57
        $html =~ s|<script.*?</script>||gsi;
75
        $html =~ s|<script.*?</script>||gsi;
58
        $html =~ s|<style.*?</style>||gsi;
76
        $html =~ s|<style.*?</style>||gsi;
59
 
77
 
60
        $html =~ s| \t| |gsi;
78
        $html =~ s| \t| |gsi;
61
        $html =~ s|\s*\n\s*\n|\n|gsi;
79
        $html =~ s|\s*\n\s*\n|\n|gsi;
62
        $html =~ s|^\s*||gm;
80
        $html =~ s|^\s*||gm;
63
 
81
 
64
        $html =~ s|&nbsp;| |gsi;
82
        $html =~ s|&nbsp;| |gsi;
65
        $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
83
        $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
66
        $html =~ s|<br\s.*?>|\n|gsi;
84
        $html =~ s|<br\s.*?>|\n|gsi;
67
        $html =~ s|\<.*?\>||gsi;
85
        $html =~ s|\<.*?\>||gsi;
68
 
86
 
69
        return($html);
87
        return($html);
70
}
88
}
71
 
89
 
72
 
90
 
73
sub VGWhoIs::Utils::trim($) {
91
sub VGWhoIs::Utils::trim($) {
74
        # Source: http://www.somacon.com/p114.php
92
        # Source: http://www.somacon.com/p114.php
75
        my $string = shift;
93
        my $string = shift;
76
        $string =~ s/^\s+//;
94
        $string =~ s/^\s+//;
77
        $string =~ s/\s+$//;
95
        $string =~ s/\s+$//;
78
        return $string; # TODO: ein push faende ich besser
96
        return $string; # TODO: ein push faende ich besser
79
}
97
}
80
 
98
 
81
sub VGWhoIs::Utils::is_uc($) {
99
sub VGWhoIs::Utils::is_uc($) {
82
        my $str = shift;
100
        my $str = shift;
83
 
101
 
84
        my $char;
102
        my $char;
85
        foreach $char (split //, $str) {
103
        foreach $char (split //, $str) {
86
                return 1 if (ord($char) > 255);
104
                return 1 if (ord($char) > 255);
87
        }
105
        }
88
 
106
 
89
        return 0;
107
        return 0;
90
}
108
}
91
 
109
 
92
sub VGWhoIs::Utils::is_ascii($) {
110
sub VGWhoIs::Utils::is_ascii($) {
93
        my $str = shift;
111
        my $str = shift;
94
 
112
 
95
        my $char;
113
        my $char;
96
        foreach $char (split //, $str) {
114
        foreach $char (split //, $str) {
97
                return 0 if (ord($char) >= 128);
115
                return 0 if (ord($char) >= 128);
98
        }
116
        }
99
 
117
 
100
        return 1;
118
        return 1;
101
}
119
}
102
 
120
 
103
sub VGWhoIs::Utils::is_utf8($) {
121
sub VGWhoIs::Utils::is_utf8($) {
104
        my $str = shift;
122
        my $str = shift;
105
 
123
 
106
        my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
124
        my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
107
        return defined($s);
125
        return defined($s);
108
 
126
 
109
        # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
127
        # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
110
        # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
128
        # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
111
        # return 0 if VGWhoIs::Utils::is_uc($str);
129
        # return 0 if VGWhoIs::Utils::is_uc($str);
112
        # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
130
        # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
113
        # return utf8::valid($str);
131
        # return utf8::valid($str);
114
}
132
}
115
 
133
 
116
sub VGWhoIs::Utils::enforce_utf8($) {
134
sub VGWhoIs::Utils::enforce_utf8($) {
117
        my $str = shift;
135
        my $str = shift;
118
 
136
 
119
        if (VGWhoIs::Utils::is_uc($str)) {
137
        if (VGWhoIs::Utils::is_uc($str)) {
120
                $str =~ s/^\x{FEFF}//;
138
                $str =~ s/^\x{FEFF}//;
121
                utf8::encode($str);
139
                utf8::encode($str);
122
        }
140
        }
123
        elsif (!VGWhoIs::Utils::is_utf8($str)) {
141
        elsif (!VGWhoIs::Utils::is_utf8($str)) {
124
                $str =~ s/^\xEF\xBB\xBF//;
142
                $str =~ s/^\xEF\xBB\xBF//;
125
                utf8::encode($str);
143
                utf8::encode($str);
126
        }
144
        }
127
 
145
 
128
        return $str;
146
        return $str;
129
}
147
}
130
 
148
 
131
# ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
149
# ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
132
sub VGWhoIs::Utils::splitProtocolHost($) {
150
sub VGWhoIs::Utils::splitProtocolHost($) {
133
        my $url = shift;
151
        my $url = shift;
134
 
152
 
135
        my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
153
        my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
136
 
154
 
137
        return ($protocol, $hostname);
155
        return ($protocol, $hostname);
138
}
156
}
139
 
157
 
140
1;
158
1;
141
 
159
 
142
 
160