Rev 11 | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 11 | Rev 149 | ||
---|---|---|---|
Line 12... | Line 12... | ||
12 | 12 | ||
13 | # TODO: for many domains, the format is completely different! do we have a good enough parser? |
13 | # TODO: for many domains, the format is completely different! do we have a good enough parser? |
14 | 14 | ||
15 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php'; |
15 | require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php'; |
16 | 16 | ||
- | 17 | ini_set('default_charset', 'UTF-8'); |
|
- | 18 | ||
17 | $domain = isset($argv[1]) ? $argv[1] : ''; |
19 | $domain = isset($argv[1]) ? $argv[1] : ''; |
18 | 20 | ||
19 | $url = "https://www.dom-enic.com/whois.html"; |
21 | $url = "https://www.dom-enic.com/whois.html"; |
20 | 22 | ||
21 | $res = "% Parsing via regex from '$url'\n\n"; |
23 | $res = "% Parsing via regex from '$url'\n\n"; |
Line 45... | Line 47... | ||
45 | } |
47 | } |
46 | 48 | ||
47 | preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m); |
49 | preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m); |
48 | 50 | ||
49 | if (!isset($m[1][0])) { |
51 | if (!isset($m[1][0])) { |
50 | echo "Error while parsing the web content. Could not find limitations.\n"; |
52 | echo "Error while parsing the web content (RegEx failed).\n"; |
51 | exit(1); |
53 | exit(1); |
52 | } |
54 | } |
53 | $x = $m[1][0]; |
55 | $x = $m[1][0]; |
54 | 56 | ||
55 | $x = preg_replace('@<br />(?!\n)@', "\n", $x); |
57 | $x = preg_replace('@<br />(?!\n)@', "\n", $x); |
56 | $x = strip_tags($x); |
58 | $x = strip_tags($x); |
57 | 59 | ||
58 | $x = html_entity_decode($x); |
60 | $x = html_entity_decode($x); |
59 | 61 | ||
60 | // é -> É @ strtoupper() |
- | |
61 | /* |
- | |
62 | $locals = array('es_ES@euro', 'es_ES', 'es'); |
- | |
63 | reset($locals); |
- | |
64 | while (list(, $locale) = each ($locals)) { |
- | |
65 | if ( setlocale(LC_CTYPE, $locale) == $locale ) { |
- | |
66 | break; // Exit when we were successfull |
- | |
67 | } |
- | |
68 | } |
- | |
69 | */ |
- | |
70 | - | ||
71 | $x = preg_replace("@\n\s+\n@", "\n\n", $x); |
62 | $x = preg_replace("@\n\s+\n@", "\n\n", $x); |
72 | while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x); |
63 | while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x); |
73 | 64 | ||
74 | $special_words = array( |
65 | $special_words = array( |
75 | 'Registrant:', |
66 | 'Registrant:', |
Line 77... | Line 68... | ||
77 | 'Technical Contact:', |
68 | 'Technical Contact:', |
78 | 'Billing Contact:' |
69 | 'Billing Contact:' |
79 | ); |
70 | ); |
80 | 71 | ||
81 | foreach ($special_words as $s) { |
72 | foreach ($special_words as $s) { |
82 | $x = str_replace($s, "\n".uc_latin1($s)."\n", $x); |
73 | $x = str_replace($s, "\n".mb_strtoupper($s)."\n", $x); |
83 | } |
74 | } |
84 | 75 | ||
85 | $x = trim($x); |
76 | $x = trim($x); |
86 | 77 | ||
87 | echo $res.trim_each_line($x)."\n"; |
78 | echo $res.trim_each_line($x)."\n"; |