Subversion Repositories vgwhois

Rev

Rev 11 | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 11 Rev 149
Line 12... Line 12...
12
 
12
 
13
# TODO: for many domains, the format is completely different! do we have a good enough parser?
13
# TODO: for many domains, the format is completely different! do we have a good enough parser?
14
 
14
 
15
require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php';
15
require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.php';
16
 
16
 
-
 
17
ini_set('default_charset', 'UTF-8');
-
 
18
 
17
$domain = isset($argv[1]) ? $argv[1] : '';
19
$domain = isset($argv[1]) ? $argv[1] : '';
18
 
20
 
19
$url = "https://www.dom-enic.com/whois.html";
21
$url = "https://www.dom-enic.com/whois.html";
20
 
22
 
21
$res  = "% Parsing via regex from '$url'\n\n";
23
$res  = "% Parsing via regex from '$url'\n\n";
Line 45... Line 47...
45
}
47
}
46
 
48
 
47
preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m);
49
preg_match_all('@'.preg_quote(BEGIN, '@').'(.*)'.preg_quote(END, '@').'@ismU', $x, $m);
48
 
50
 
49
if (!isset($m[1][0])) {
51
if (!isset($m[1][0])) {
50
	echo "Error while parsing the web content. Could not find limitations.\n";
52
	echo "Error while parsing the web content (RegEx failed).\n";
51
	exit(1);
53
	exit(1);
52
}
54
}
53
$x = $m[1][0];
55
$x = $m[1][0];
54
 
56
 
55
$x = preg_replace('@<br />(?!\n)@', "\n", $x);
57
$x = preg_replace('@<br />(?!\n)@', "\n", $x);
56
$x = strip_tags($x);
58
$x = strip_tags($x);
57
 
59
 
58
$x = html_entity_decode($x);
60
$x = html_entity_decode($x);
59
 
61
 
60
// é -> É @ strtoupper()
-
 
61
/*
-
 
62
$locals = array('es_ES@euro', 'es_ES', 'es');
-
 
63
reset($locals);
-
 
64
while (list(, $locale) = each ($locals)) {
-
 
65
	if ( setlocale(LC_CTYPE, $locale) == $locale ) {                
-
 
66
		break; // Exit when we were successfull
-
 
67
	}
-
 
68
}
-
 
69
*/
-
 
70
 
-
 
71
$x = preg_replace("@\n\s+\n@", "\n\n", $x);
62
$x = preg_replace("@\n\s+\n@", "\n\n", $x);
72
while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x);
63
while (strpos($x, "\n\n\n") !== false) $x = str_replace("\n\n\n", "\n\n", $x);
73
 
64
 
74
$special_words = array(
65
$special_words = array(
75
	'Registrant:',
66
	'Registrant:',
Line 77... Line 68...
77
	'Technical Contact:',
68
	'Technical Contact:',
78
	'Billing Contact:'
69
	'Billing Contact:'
79
);
70
);
80
 
71
 
81
foreach ($special_words as $s) {
72
foreach ($special_words as $s) {
82
	$x = str_replace($s, "\n".uc_latin1($s)."\n", $x);
73
	$x = str_replace($s, "\n".mb_strtoupper($s)."\n", $x);
83
}
74
}
84
 
75
 
85
$x = trim($x);
76
$x = trim($x);
86
 
77
 
87
echo $res.trim_each_line($x)."\n";
78
echo $res.trim_each_line($x)."\n";