Subversion Repositories vgwhois

Rev

Rev 12 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12 daniel-mar 1
#
2
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
3
#  Main program
4
#
5
#  (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
6
#  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
7
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
8
#
9
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
10
#
11
 
12
package VGWhoIs::Utils;
13
 
14
use warnings;
15
use strict;
16
 
17
use Encode;
18
 
19
# $result = VGWhoIs::Utils::lynxsource($url)
20
sub VGWhoIs::Utils::lynxsource {
21
        my ($url) = @_;
22
        $url = quotemeta($url);
23
# LYNX sometimes hangs in combination with TOR
24
#       return qx{lynx -connect_timeout=10 -source $url};
25
        return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url};
26
}
27
 
28
# $line = htmlpre($line);
29
sub VGWhoIs::Utils::htmlpre {
30
        my ($line) = @_;
31
        $line =~ s|\n|<br>|g;
32
        $line =~ s| |&nbsp;|g;
33
        return $line;
34
}
35
 
36
# $rendered = VGWhoIs::Utils::render_html($html);
37
sub VGWhoIs::Utils::render_html {
38
        my ($html) = @_;
39
 
40
        return '' if !defined $html;
41
 
42
        $html =~ s|<!--.*?-->||gsi;
43
 
44
        $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
45
        $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
46
 
17 daniel-mar 47
        #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!!
12 daniel-mar 48
        $html =~ s|\n| |g;
49
 
50
        $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
51
        $html =~ s|<p\s.*?>|\n|gsi;
52
 
53
        $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
54
        $html =~ s|<tr\s.*?>|\n|gsi;
55
        $html =~ s|<td>| |gsi;
56
 
57
        $html =~ s|<script.*?</script>||gsi;
58
        $html =~ s|<style.*?</style>||gsi;
59
 
60
        $html =~ s| \t| |gsi;
61
        $html =~ s|\s*\n\s*\n|\n|gsi;
62
        $html =~ s|^\s*||gm;
63
 
64
        $html =~ s|&nbsp;| |gsi;
65
        $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
66
        $html =~ s|<br\s.*?>|\n|gsi;
67
        $html =~ s|\<.*?\>||gsi;
68
 
69
        return($html);
70
}
71
 
72
 
73
sub VGWhoIs::Utils::trim($) {
74
        # Source: http://www.somacon.com/p114.php
75
        my $string = shift;
76
        $string =~ s/^\s+//;
77
        $string =~ s/\s+$//;
78
        return $string; # TODO: ein push faende ich besser
79
}
80
 
81
sub VGWhoIs::Utils::is_uc($) {
82
        my $str = shift;
83
 
84
        my $char;
85
        foreach $char (split //, $str) {
86
                return 1 if (ord($char) > 255);
87
        }
88
 
89
        return 0;
90
}
91
 
92
sub VGWhoIs::Utils::is_ascii($) {
93
        my $str = shift;
94
 
95
        my $char;
96
        foreach $char (split //, $str) {
97
                return 0 if (ord($char) >= 128);
98
        }
99
 
100
        return 1;
101
}
102
 
103
sub VGWhoIs::Utils::is_utf8($) {
104
        my $str = shift;
105
 
106
        my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
107
        return defined($s);
108
 
109
        # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
110
        # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
111
        # return 0 if VGWhoIs::Utils::is_uc($str);
112
        # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
113
        # return utf8::valid($str);
114
}
115
 
116
sub VGWhoIs::Utils::enforce_utf8($) {
117
        my $str = shift;
118
 
119
        if (VGWhoIs::Utils::is_uc($str)) {
120
                $str =~ s/^\x{FEFF}//;
121
                utf8::encode($str);
122
        }
123
        elsif (!VGWhoIs::Utils::is_utf8($str)) {
124
                $str =~ s/^\xEF\xBB\xBF//;
125
                utf8::encode($str);
126
        }
127
 
128
        return $str;
129
}
130
 
131
# ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
132
sub VGWhoIs::Utils::splitProtocolHost($) {
133
        my $url = shift;
134
 
135
        my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
136
 
137
        return ($protocol, $hostname);
138
}
139
 
140
1;
141