Subversion Repositories vgwhois

Rev

Rev 17 | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. #
  2. #  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
  3. #  Main program
  4. #
  5. #  (c) 2010-2022 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
  6. #  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
  7. #  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
  8. #
  9. #  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
  10. #
  11.  
  12. package VGWhoIs::Utils;
  13.  
  14. use warnings;
  15. use strict;
  16.  
  17. use Encode;
  18.  
  19. # $result = VGWhoIs::Utils::lynxsource($url)
  20. sub VGWhoIs::Utils::lynxsource {
  21.         my ($url) = @_;
  22.         $url = quotemeta($url);
  23. # LYNX sometimes hangs in combination with TOR
  24. #       return qx{lynx -connect_timeout=10 -source $url};
  25.         return qx{curl --insecure --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" --silent --max-time 10 $url};
  26. }
  27.  
  28. sub VGWhoIs::Utils::lynxrender {
  29.         my ($url) = @_;
  30.         $url = quotemeta($url);
  31.  
  32.         use File::Basename;
  33.         my $script_dir = undef;
  34.         if(-l __FILE__) {
  35.                 $script_dir = dirname(readlink(__FILE__));
  36.         } else {
  37.                 $script_dir = dirname(__FILE__);
  38.         }
  39.         $script_dir = quotemeta($script_dir);
  40.  
  41.         my $result = qx{lynx -cfg $script_dir/../lynx.cfg -dump -connect_timeout=10 $url 2>&1};
  42.         $result .= "FAILED with exit code $?\n\n" if $?;
  43.         return $result;
  44. }
  45.  
  46. # $line = htmlpre($line);
  47. sub VGWhoIs::Utils::htmlpre {
  48.         my ($line) = @_;
  49.         $line =~ s|\n|<br>|g;
  50.         $line =~ s| |&nbsp;|g;
  51.         return $line;
  52. }
  53.  
  54. # $rendered = VGWhoIs::Utils::render_html($html);
  55. sub VGWhoIs::Utils::render_html {
  56.         my ($html) = @_;
  57.  
  58.         return '' if !defined $html;
  59.  
  60.         $html =~ s|<!--.*?-->||gsi;
  61.  
  62.         $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
  63.         $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
  64.  
  65.         #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!!
  66.         $html =~ s|\n| |g;
  67.  
  68.         $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
  69.         $html =~ s|<p\s.*?>|\n|gsi;
  70.  
  71.         $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
  72.         $html =~ s|<tr\s.*?>|\n|gsi;
  73.         $html =~ s|<td>| |gsi;
  74.  
  75.         $html =~ s|<script.*?</script>||gsi;
  76.         $html =~ s|<style.*?</style>||gsi;
  77.  
  78.         $html =~ s| \t| |gsi;
  79.         $html =~ s|\s*\n\s*\n|\n|gsi;
  80.         $html =~ s|^\s*||gm;
  81.  
  82.         $html =~ s|&nbsp;| |gsi;
  83.         $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
  84.         $html =~ s|<br\s.*?>|\n|gsi;
  85.         $html =~ s|\<.*?\>||gsi;
  86.  
  87.         return($html);
  88. }
  89.  
  90.  
  91. sub VGWhoIs::Utils::trim($) {
  92.         # Source: http://www.somacon.com/p114.php
  93.         my $string = shift;
  94.         $string =~ s/^\s+//;
  95.         $string =~ s/\s+$//;
  96.         return $string; # TODO: ein push faende ich besser
  97. }
  98.  
  99. sub VGWhoIs::Utils::is_uc($) {
  100.         my $str = shift;
  101.  
  102.         my $char;
  103.         foreach $char (split //, $str) {
  104.                 return 1 if (ord($char) > 255);
  105.         }
  106.  
  107.         return 0;
  108. }
  109.  
  110. sub VGWhoIs::Utils::is_ascii($) {
  111.         my $str = shift;
  112.  
  113.         my $char;
  114.         foreach $char (split //, $str) {
  115.                 return 0 if (ord($char) >= 128);
  116.         }
  117.  
  118.         return 1;
  119. }
  120.  
  121. sub VGWhoIs::Utils::is_utf8($) {
  122.         my $str = shift;
  123.  
  124.         my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
  125.         return defined($s);
  126.  
  127.         # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
  128.         # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
  129.         # return 0 if VGWhoIs::Utils::is_uc($str);
  130.         # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
  131.         # return utf8::valid($str);
  132. }
  133.  
  134. sub VGWhoIs::Utils::enforce_utf8($) {
  135.         my $str = shift;
  136.  
  137.         if (VGWhoIs::Utils::is_uc($str)) {
  138.                 $str =~ s/^\x{FEFF}//;
  139.                 utf8::encode($str);
  140.         }
  141.         elsif (!VGWhoIs::Utils::is_utf8($str)) {
  142.                 $str =~ s/^\xEF\xBB\xBF//;
  143.                 utf8::encode($str);
  144.         }
  145.  
  146.         return $str;
  147. }
  148.  
  149. # ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
  150. sub VGWhoIs::Utils::splitProtocolHost($) {
  151.         my $url = shift;
  152.  
  153.         my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
  154.  
  155.         return ($protocol, $hostname);
  156. }
  157.  
  158. 1;
  159.  
  160.