Rev 5 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5 | Rev 11 | ||
---|---|---|---|
1 | #!/usr/bin/php |
1 | #!/usr/bin/php |
2 | <?php |
2 | <?php |
3 | 3 | ||
4 | # |
4 | # |
5 | # VWhois (ViaThinkSoft WHOIS, a fork of generic Whois / gwhois) |
5 | # VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois) |
6 | # Subprogram: mq, gf and gp TLD whois |
6 | # Subprogram: mq, gf and gp TLD whois |
7 | # |
7 | # |
8 | # (c) 2012 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
8 | # (c) 2012 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de> |
9 | # |
9 | # |
10 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
10 | # License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2) |
11 | # |
11 | # |
12 | 12 | ||
13 | # TODO: for many domains, the format is completely different! do we have a good enough parser? |
13 | # TODO: for many domains, the format is completely different! do we have a good enough parser? |
14 | 14 | ||
15 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php'; |
15 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php'; |
16 | 16 | ||
17 | $domain = isset($argv[1]) ? $argv[1] : ''; |
17 | $domain = isset($argv[1]) ? $argv[1] : ''; |
18 | 18 | ||
19 | $url = "https://www.dom-enic.com/whois.html"; |
19 | $url = "https://www.dom-enic.com/whois.html"; |
20 | 20 | ||
21 | $res = "% Parsing via regex from '$url'\n\n"; |
21 | $res = "% Parsing via regex from '$url'\n\n"; |
22 | 22 | ||
23 | // Split up "naked" domain name and TLD |
23 | // Split up "naked" domain name and TLD |
24 | if (!preg_match('@^(.*)(\.(mq|gf|gp))$@', $domain, $m)) { |
24 | if (!preg_match('@^(.*)(\.(mq|gf|gp))$@', $domain, $m)) { |
25 | echo "Error: Can only handle .mq, .gf and .gp TLDs.\n"; |
25 | echo "Error: Can only handle .mq, .gf and .gp TLDs.\n"; |
26 | exit(1); |
26 | exit(1); |
27 | } |
27 | } |
28 | $domain = $m[1]; |
28 | $domain = $m[1]; |
29 | $ext = $m[2]; |
29 | $ext = $m[2]; |
30 | 30 | ||
31 | $x = file_get_contents2($url, 'domain='.urlencode($domain).'&' . |
31 | $x = file_get_contents2($url, 'domain='.urlencode($domain).'&' . |
32 | 'extension='.urlencode($ext).'&' . |
32 | 'extension='.urlencode($ext).'&' . |
33 | 'Submit=Soumettre'); |
33 | 'Submit=Soumettre'); |
34 | 34 | ||
35 | if (strpos($x, /* $domain. */ ' est disponible.') !== false) { |
35 | if (strpos($x, /* $domain. */ ' est disponible.') !== false) { |
36 | define('BEGIN', '<div align="center" class="texte1"><p>'); |
36 | define('BEGIN', '<div align="center" class="texte1"><p>'); |
37 | define('END', '</p></div>'); |
37 | define('END', '</p></div>'); |
38 | } else { |
38 | } else { |
39 | // For some domains it is <p> and not <h1> |
39 | // For some domains it is <p> and not <h1> |
40 | $x = str_replace('<p class="titre1">WHOIS result</p>', '<h1 class="titre1">WHOIS result</h1>', $x); |
40 | $x = str_replace('<p class="titre1">WHOIS result</p>', '<h1 class="titre1">WHOIS result</h1>', $x); |
41 | define('BEGIN', '<h1 class="titre1">WHOIS result</h1>'); |
41 | define('BEGIN', '<h1 class="titre1">WHOIS result</h1>'); |
42 | // define('END', '</div>'); |
42 | // define('END', '</div>'); |
43 | // In comparison to </div>, this includes the disclaimer at the bottom: |
43 | // In comparison to </div>, this includes the disclaimer at the bottom: |
44 | define('END', '</td>'); |
44 | define('END', '</td>'); |
45 | } |
45 | } |
46 | 46 | ||
47 | preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m); |
47 | preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m); |
48 | 48 | ||
49 | if (!isset($m[1][0])) { |
49 | if (!isset($m[1][0])) { |
50 | echo "Error while parsing the web content. Could not find limitations.\n"; |
50 | echo "Error while parsing the web content. Could not find limitations.\n"; |
51 | exit(1); |
51 | exit(1); |
52 | } |
52 | } |
53 | $x = $m[1][0]; |
53 | $x = $m[1][0]; |
54 | 54 | ||
55 | $x = preg_replace('@<br />(?!\n)@', "\n", $x); |
55 | $x = preg_replace('@<br />(?!\n)@', "\n", $x); |
56 | $x = strip_tags($x); |
56 | $x = strip_tags($x); |
57 | 57 | ||
58 | $x = html_entity_decode($x); |
58 | $x = html_entity_decode($x); |
59 | 59 | ||
60 | // é -> É @ strtoupper() |
60 | // é -> É @ strtoupper() |
61 | /* |
61 | /* |
62 | $locals = array('es_ES@euro', 'es_ES', 'es'); |
62 | $locals = array('es_ES@euro', 'es_ES', 'es'); |
63 | reset($locals); |
63 | reset($locals); |
64 | while (list(, $locale) = each ($locals)) { |
64 | while (list(, $locale) = each ($locals)) { |
65 | if ( setlocale(LC_CTYPE, $locale) == $locale ) { |
65 | if ( setlocale(LC_CTYPE, $locale) == $locale ) { |
66 | break; // Exit when we were successfull |
66 | break; // Exit when we were successfull |
67 | } |
67 | } |
68 | } |
68 | } |
69 | */ |
69 | */ |
70 | 70 | ||
71 | $x = preg_replace("@\n\s+\n@", "\n\n", $x); |
71 | $x = preg_replace("@\n\s+\n@", "\n\n", $x); |
72 | while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x); |
72 | while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x); |
73 | 73 | ||
74 | $special_words = array( |
74 | $special_words = array( |
75 | 'Registrant:', |
75 | 'Registrant:', |
76 | 'Administrative Contact:', |
76 | 'Administrative Contact:', |
77 | 'Technical Contact:', |
77 | 'Technical Contact:', |
78 | 'Billing Contact:' |
78 | 'Billing Contact:' |
79 | ); |
79 | ); |
80 | 80 | ||
81 | foreach ($special_words as $s) { |
81 | foreach ($special_words as $s) { |
82 | $x = str_replace($s, "\n".uc_latin1($s)."\n", $x); |
82 | $x = str_replace($s, "\n".uc_latin1($s)."\n", $x); |
83 | } |
83 | } |
84 | 84 | ||
85 | $x = trim($x); |
85 | $x = trim($x); |
86 | 86 | ||
87 | echo $res.trim_each_line($x)."\n"; |
87 | echo $res.trim_each_line($x)."\n"; |