Subversion Repositories vgwhois

Rev

Rev 12 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. #
  2. #  VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
  3. #  Main program
  4. #
  5. #  (c) 2010-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
  6. #  based on the code (c) 1998-2010 by Juliane Holzt <debian@kju.de>
  7. #  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
  8. #
  9. #  License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
  10. #
  11.  
  12. package VGWhoIs::Utils;
  13.  
  14. use warnings;
  15. use strict;
  16.  
  17. use Encode;
  18.  
  19. # $result = VGWhoIs::Utils::lynxsource($url)
  20. sub VGWhoIs::Utils::lynxsource {
  21.         my ($url) = @_;
  22.         $url = quotemeta($url);
  23. # LYNX sometimes hangs in combination with TOR
  24. #       return qx{lynx -connect_timeout=10 -source $url};
  25.         return qx{curl --user-agent "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" --silent --max-time 10 $url};
  26. }
  27.  
  28. # $line = htmlpre($line);
  29. sub VGWhoIs::Utils::htmlpre {
  30.         my ($line) = @_;
  31.         $line =~ s|\n|<br>|g;
  32.         $line =~ s| |&nbsp;|g;
  33.         return $line;
  34. }
  35.  
  36. # $rendered = VGWhoIs::Utils::render_html($html);
  37. sub VGWhoIs::Utils::render_html {
  38.         my ($html) = @_;
  39.  
  40.         return '' if !defined $html;
  41.  
  42.         $html =~ s|<!--.*?-->||gsi;
  43.  
  44.         $html =~ s|<pre>(.*?)</pre>|VGWhoIs::Utils::htmlpre($1)|gsei;
  45.         $html =~ s|<textarea>(.*?)</textarea>|VGWhoIs::Utils::htmlpre($1)|gsei;
  46.  
  47.         #TODO: big problem here: if the output is "content-type: text/plain", then we must not call render_html!!!
  48.         $html =~ s|\n| |g;
  49.  
  50.         $html =~ s|<p\s*/{0,1}\s*>|\n|gsi;
  51.         $html =~ s|<p\s.*?>|\n|gsi;
  52.  
  53.         $html =~ s|<tr\s*/{0,1}\s*>|\n|gsi;
  54.         $html =~ s|<tr\s.*?>|\n|gsi;
  55.         $html =~ s|<td>| |gsi;
  56.  
  57.         $html =~ s|<script.*?</script>||gsi;
  58.         $html =~ s|<style.*?</style>||gsi;
  59.  
  60.         $html =~ s| \t| |gsi;
  61.         $html =~ s|\s*\n\s*\n|\n|gsi;
  62.         $html =~ s|^\s*||gm;
  63.  
  64.         $html =~ s|&nbsp;| |gsi;
  65.         $html =~ s|<br\s*/{0,1}\s*>|\n|gsi;
  66.         $html =~ s|<br\s.*?>|\n|gsi;
  67.         $html =~ s|\<.*?\>||gsi;
  68.  
  69.         return($html);
  70. }
  71.  
  72.  
  73. sub VGWhoIs::Utils::trim($) {
  74.         # Source: http://www.somacon.com/p114.php
  75.         my $string = shift;
  76.         $string =~ s/^\s+//;
  77.         $string =~ s/\s+$//;
  78.         return $string; # TODO: ein push faende ich besser
  79. }
  80.  
  81. sub VGWhoIs::Utils::is_uc($) {
  82.         my $str = shift;
  83.  
  84.         my $char;
  85.         foreach $char (split //, $str) {
  86.                 return 1 if (ord($char) > 255);
  87.         }
  88.  
  89.         return 0;
  90. }
  91.  
  92. sub VGWhoIs::Utils::is_ascii($) {
  93.         my $str = shift;
  94.  
  95.         my $char;
  96.         foreach $char (split //, $str) {
  97.                 return 0 if (ord($char) >= 128);
  98.         }
  99.  
  100.         return 1;
  101. }
  102.  
  103. sub VGWhoIs::Utils::is_utf8($) {
  104.         my $str = shift;
  105.  
  106.         my $s = eval { Encode::decode('utf8', $str, Encode::FB_CROAK) };
  107.         return defined($s);
  108.  
  109.         # This procedure does not work :-( VGWhoIs::Utils::is_utf8 and valid are true even if they should not...
  110.         # return 1 if utf8::VGWhoIs::Utils::is_utf8($str);
  111.         # return 0 if VGWhoIs::Utils::is_uc($str);
  112.         # return 1 if (Encode::Detect::Detector::detect($str) eq "UTF-8");
  113.         # return utf8::valid($str);
  114. }
  115.  
  116. sub VGWhoIs::Utils::enforce_utf8($) {
  117.         my $str = shift;
  118.  
  119.         if (VGWhoIs::Utils::is_uc($str)) {
  120.                 $str =~ s/^\x{FEFF}//;
  121.                 utf8::encode($str);
  122.         }
  123.         elsif (!VGWhoIs::Utils::is_utf8($str)) {
  124.                 $str =~ s/^\xEF\xBB\xBF//;
  125.                 utf8::encode($str);
  126.         }
  127.  
  128.         return $str;
  129. }
  130.  
  131. # ($protocol, $hostname) = VGWhoIs::Utils::splitProtocolHost($url)
  132. sub VGWhoIs::Utils::splitProtocolHost($) {
  133.         my $url = shift;
  134.  
  135.         my ($protocol, $hostname) = $url =~ /(https{0,1}):\/\/([^\/]+)/;
  136.  
  137.         return ($protocol, $hostname);
  138. }
  139.  
  140. 1;
  141.  
  142.