Subversion Repositories vgwhois

Rev

Rev 17 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. #
  2. #  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
  3. #  Main program
  4. #
  5. #  (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
  6. #  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
  7. #  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
  8. #
  9. #  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
  10. #
  11.  
  12. package VGWhoIs::Utils;
  13.  
  14. use warnings;
  15. use strict;
  16.  
  17. use Encode;
  18.  
  19. # $result = VGWhoIs::Utils::lynxsource($url)
  20. sub VGWhoIs::Utils::lynxsource {
  21.         my ($url) = @_;
  22.         $url = quotemeta($url);
  23. # LYNX sometimes hangs in combination with TOR
  24. #       return qx{lynx -connect_timeout=10 -source $url};
  25.         return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url};
  26. }
  27.  
  28. # $line = htmlpre($line);
  29. sub VGWhoIs::Utils::htmlpre {
  30.         my ($line) = @_;
  31.         $line =~ s|\n|<br>|g;
  32.         $line =~ s| |&nbsp;|g;
  33.         return $line;
  34. }
  35.  
  36. # $rendered = VGWhoIs::Utils::render_html($html);
  37. sub VGWhoIs::Utils::render_html {
  38.         my ($html) = @_;
  39.  
  40.         return '' if !defined $html;
  41.  
  42.         $html =~ s|<!--.*?-->||gsi;
  43.  
  44.         $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
  45.         $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
  46.  
  47.         $html =~ s|\n| |g;
  48.  
  49.         $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
  50.         $html =~ s|<p\s.*?>|\n|gsi;
  51.  
  52.         $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
  53.         $html =~ s|<tr\s.*?>|\n|gsi;
  54.         $html =~ s|<td>| |gsi;
  55.  
  56.         $html =~ s|<script.*?</script>||gsi;
  57.         $html =~ s|<style.*?</style>||gsi;
  58.  
  59.         $html =~ s| \t| |gsi;
  60.         $html =~ s|\s*\n\s*\n|\n|gsi;
  61.         $html =~ s|^\s*||gm;
  62.  
  63.         $html =~ s|&nbsp;| |gsi;
  64.         $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
  65.         $html =~ s|<br\s.*?>|\n|gsi;
  66.         $html =~ s|\<.*?\>||gsi;
  67.  
  68.         return($html);
  69. }
  70.  
  71.  
  72. sub VGWhoIs::Utils::trim($) {
  73.         # Source: http://www.somacon.com/p114.php
  74.         my $string = shift;
  75.         $string =~ s/^\s+//;
  76.         $string =~ s/\s+$//;
  77.         return $string; # TODO: ein push faende ich besser
  78. }
  79.  
  80. sub VGWhoIs::Utils::is_uc($) {
  81.         my $str = shift;
  82.  
  83.         my $char;
  84.         foreach $char (split //, $str) {
  85.                 return 1 if (ord($char) > 255);
  86.         }
  87.  
  88.         return 0;
  89. }
  90.  
  91. sub VGWhoIs::Utils::is_ascii($) {
  92.         my $str = shift;
  93.  
  94.         my $char;
  95.         foreach $char (split //, $str) {
  96.                 return 0 if (ord($char) >= 128);
  97.         }
  98.  
  99.         return 1;
  100. }
  101.  
  102. sub VGWhoIs::Utils::is_utf8($) {
  103.         my $str = shift;
  104.  
  105.         my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
  106.         return defined($s);
  107.  
  108.         # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
  109.         # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
  110.         # return 0 if VGWhoIs::Utils::is_uc($str);
  111.         # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
  112.         # return utf8::valid($str);
  113. }
  114.  
  115. sub VGWhoIs::Utils::enforce_utf8($) {
  116.         my $str = shift;
  117.  
  118.         if (VGWhoIs::Utils::is_uc($str)) {
  119.                 $str =~ s/^\x{FEFF}//;
  120.                 utf8::encode($str);
  121.         }
  122.         elsif (!VGWhoIs::Utils::is_utf8($str)) {
  123.                 $str =~ s/^\xEF\xBB\xBF//;
  124.                 utf8::encode($str);
  125.         }
  126.  
  127.         return $str;
  128. }
  129.  
  130. # ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
  131. sub VGWhoIs::Utils::splitProtocolHost($) {
  132.         my $url = shift;
  133.  
  134.         my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
  135.  
  136.         return ($protocol, $hostname);
  137. }
  138.  
  139. 1;
  140.  
  141.