Subversion Repositories vgwhois

Rev

Rev 17 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
12 daniel-mar 1
#
2
#  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
3
#  Main program
4
#
5
#  (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
6
#  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
7
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
8
#
9
#  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
10
#
11
 
12
package VGWhoIs::Utils;
13
 
14
use warnings;
15
use strict;
16
 
17
use Encode;
18
 
19
# $result = VGWhoIs::Utils::lynxsource($url)
20
sub VGWhoIs::Utils::lynxsource {
21
        my ($url) = @_;
22
        $url = quotemeta($url);
23
# LYNX sometimes hangs in combination with TOR
24
#       return qx{lynx -connect_timeout=10 -source $url};
25
        return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url};
26
}
27
 
28
# $line = htmlpre($line);
29
sub VGWhoIs::Utils::htmlpre {
30
        my ($line) = @_;
31
        $line =~ s|\n|<br>|g;
32
        $line =~ s| |&nbsp;|g;
33
        return $line;
34
}
35
 
36
# $rendered = VGWhoIs::Utils::render_html($html);
37
sub VGWhoIs::Utils::render_html {
38
        my ($html) = @_;
39
 
40
        return '' if !defined $html;
41
 
42
        $html =~ s|<!--.*?-->||gsi;
43
 
44
        $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
45
        $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
46
 
47
        $html =~ s|\n| |g;
48
 
49
        $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
50
        $html =~ s|<p\s.*?>|\n|gsi;
51
 
52
        $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
53
        $html =~ s|<tr\s.*?>|\n|gsi;
54
        $html =~ s|<td>| |gsi;
55
 
56
        $html =~ s|<script.*?</script>||gsi;
57
        $html =~ s|<style.*?</style>||gsi;
58
 
59
        $html =~ s| \t| |gsi;
60
        $html =~ s|\s*\n\s*\n|\n|gsi;
61
        $html =~ s|^\s*||gm;
62
 
63
        $html =~ s|&nbsp;| |gsi;
64
        $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
65
        $html =~ s|<br\s.*?>|\n|gsi;
66
        $html =~ s|\<.*?\>||gsi;
67
 
68
        return($html);
69
}
70
 
71
 
72
sub VGWhoIs::Utils::trim($) {
73
        # Source: http://www.somacon.com/p114.php
74
        my $string = shift;
75
        $string =~ s/^\s+//;
76
        $string =~ s/\s+$//;
77
        return $string; # TODO: ein push faende ich besser
78
}
79
 
80
sub VGWhoIs::Utils::is_uc($) {
81
        my $str = shift;
82
 
83
        my $char;
84
        foreach $char (split //, $str) {
85
                return 1 if (ord($char) > 255);
86
        }
87
 
88
        return 0;
89
}
90
 
91
sub VGWhoIs::Utils::is_ascii($) {
92
        my $str = shift;
93
 
94
        my $char;
95
        foreach $char (split //, $str) {
96
                return 0 if (ord($char) >= 128);
97
        }
98
 
99
        return 1;
100
}
101
 
102
sub VGWhoIs::Utils::is_utf8($) {
103
        my $str = shift;
104
 
105
        my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
106
        return defined($s);
107
 
108
        # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
109
        # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
110
        # return 0 if VGWhoIs::Utils::is_uc($str);
111
        # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
112
        # return utf8::valid($str);
113
}
114
 
115
sub VGWhoIs::Utils::enforce_utf8($) {
116
        my $str = shift;
117
 
118
        if (VGWhoIs::Utils::is_uc($str)) {
119
                $str =~ s/^\x{FEFF}//;
120
                utf8::encode($str);
121
        }
122
        elsif (!VGWhoIs::Utils::is_utf8($str)) {
123
                $str =~ s/^\xEF\xBB\xBF//;
124
                utf8::encode($str);
125
        }
126
 
127
        return $str;
128
}
129
 
130
# ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
131
sub VGWhoIs::Utils::splitProtocolHost($) {
132
        my $url = shift;
133
 
134
        my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
135
 
136
        return ($protocol, $hostname);
137
}
138
 
139
1;
140