Rev 5 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | daniel-mar | 1 | #!/usr/bin/php |
2 | <?php |
||
3 | |||
4 | # |
||
5 | # generic Whois - Subprogram "mq_gf_gp" |
||
6 | # |
||
7 | # (c) 2012 by Daniel Marschall, ViaThinkSoft <www.viathinksoft.de> |
||
8 | # |
||
9 | # Distribution, usage etc. pp. regulated by the current version of GPL. |
||
10 | # |
||
11 | # |
||
12 | # |
||
13 | # History: |
||
14 | # 2012-11-19 mar Initial release |
||
15 | # |
||
16 | |||
17 | # TODO: for many domains, the format is completely different! do we have a good enough parser? |
||
18 | |||
19 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php'; |
||
20 | |||
21 | $domain = isset($argv[1]) ? $argv[1] : ''; |
||
22 | |||
23 | $url = "https://www.dom-enic.com/whois.html"; |
||
24 | |||
25 | $res = "% Parsing via regex from '$url'\n\n"; |
||
26 | |||
27 | // Split up "naked" domain name and TLD |
||
28 | if (!preg_match('@^(.*)(\.(mq|gf|gp))$@', $domain, $m)) { |
||
29 | echo "Error: Can only handle .mq, .gf and .gp TLDs.\n"; |
||
30 | exit(1); |
||
31 | } |
||
32 | $domain = $m[1]; |
||
33 | $ext = $m[2]; |
||
34 | |||
35 | $x = file_get_contents2($url, 'domain='.urlencode($domain).'&' . |
||
36 | 'extension='.urlencode($ext).'&' . |
||
37 | 'Submit=Soumettre'); |
||
38 | |||
39 | if (strpos($x, /* $domain. */ ' est disponible.') !== false) { |
||
40 | define('BEGIN', '<div align="center" class="texte1"><p>'); |
||
41 | define('END', '</p></div>'); |
||
42 | } else { |
||
43 | // For some domains it is <p> and not <h1> |
||
44 | $x = str_replace('<p class="titre1">WHOIS result</p>', '<h1 class="titre1">WHOIS result</h1>', $x); |
||
45 | define('BEGIN', '<h1 class="titre1">WHOIS result</h1>'); |
||
46 | // define('END', '</div>'); |
||
47 | // In comparison to </div>, this includes the disclaimer at the bottom: |
||
48 | define('END', '</td>'); |
||
49 | } |
||
50 | |||
51 | preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m); |
||
52 | |||
53 | if (!isset($m[1][0])) { |
||
54 | echo "Error while parsing the web content. Could not find limitations.\n"; |
||
55 | exit(1); |
||
56 | } |
||
57 | $x = $m[1][0]; |
||
58 | |||
59 | $x = preg_replace('@<br />(?!\n)@', "\n", $x); |
||
60 | $x = strip_tags($x); |
||
61 | |||
62 | $x = html_entity_decode($x); |
||
63 | |||
64 | // é -> É @ strtoupper() |
||
65 | /* |
||
66 | $locals = array('es_ES@euro', 'es_ES', 'es'); |
||
67 | reset($locals); |
||
68 | while (list(, $locale) = each ($locals)) { |
||
69 | if ( setlocale(LC_CTYPE, $locale) == $locale ) { |
||
70 | break; // Exit when we were successfull |
||
71 | } |
||
72 | } |
||
73 | */ |
||
74 | |||
75 | $x = preg_replace("@\n\s+\n@", "\n\n", $x); |
||
76 | while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x); |
||
77 | |||
78 | $special_words = array( |
||
79 | 'Registrant:', |
||
80 | 'Administrative Contact:', |
||
81 | 'Technical Contact:', |
||
82 | 'Billing Contact:' |
||
83 | ); |
||
84 | |||
85 | foreach ($special_words as $s) { |
||
86 | $x = str_replace($s, "\n".uc_latin1($s)."\n", $x); |
||
87 | } |
||
88 | |||
89 | $x = trim($x); |
||
90 | |||
91 | echo $res.trim_each_line($x)."\n"; |