Rev 7 | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/php
<?php
#
# VGWhoIs (ViaThinkSoft Global WhoIs, a fork of generic Whois / gwhois)
# Maintenance / Developer utilities
#
# (c) 2012-2019 by Daniel Marschall, ViaThinkSoft <info@daniel-marschall.de>
#
# License: https://www.gnu.org/licenses/gpl-2.0.html (GPL version 2)
#
# TODO: ":notice||Whois server unknown (2014-03-15)" in pattern_newgtld automatisch umwandeln in :whois| wenn etwas bei IANA vorliegt.
error_reporting(E_ALL | E_NOTICE | E_STRICT | E_DEPRECATED);
require_once __DIR__ . '/--/config.inc.phps';
require_once __DIR__ . '/../../shared/php_includes/common_functions.inc.phps';
require_once __DIR__ . '/../../shared/php_includes/idna_convert.class.php';
# ---
$iana_tld_data = null;
// Step 1:
// Check if in the meantime TLDs without an official whois server were updated to have one
// Attention/TODO: A change of the whois name still needs manual intervention!
$newgtld_cont_original = file_get_contents(NEWGTLD_PATTERN_FILE);
$newgtld_cont_new = preg_replace_callback(
'@# TODO: Entry generated automatically\. Needs manual check\.\n:notice\|\|Whois server unknown \((.*)\)\n\.(.*)\$\n@imU',
function ($treffer) {
$in_all = $treffer[0];
$in_ts = $treffer[1];
$in_tld = $treffer[2];
$days_passed = (time()-strtotime($treffer[1]))/(60*60*24);
if ($days_passed < NEWGTLD_RECHECK_MISSING_WHOIS_SERVERS) {
return $in_all; // leave everything unchanged
}
$whois_serv = find_rootzone_whois_server($in_tld);
if (!$whois_serv) {
// Nothing found. Just update last check date.
return str_replace($in_ts, date('Y-m-d'), $in_all);
} else {
// Update the entry
return ":whois|$whois_serv\n.${in_tld}\$\n";
}
},
$newgtld_cont_original
);
if ($newgtld_cont_original != $newgtld_cont_new) {
file_put_contents(NEWGTLD_PATTERN_FILE, $newgtld_cont_new);
gwi_update_newgtld_patternfile();
}
// Step 2:
// Search for new gTLDs which are not in our pattern file
$newgtld_data = gwi_newgtld_get_all_delegated_strings();
foreach ($newgtld_data as $data) {
$date = $data[0];
$string = $data[1];
$tld = $data[2];
$tld_uc = $data[3];
$explanation = $data[4];
// Is it already in our pattern file?
if (does_exist($tld)) {
# echo "Info: $tld is already in pattern.\n";
continue;
}
// Only add the TLD to our pattern file if it has also been published by IANA.
// The reason is that the newGTLD page had temporary typos many times (e.g. calogne instead of cologne)
if (!isset($iana_tld_data)) {
$iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR));
$iana_tld_data = array_map('trim', $iana_tld_data);
}
if (!in_array(strtoupper($tld), $iana_tld_data)) {
echo "Info: Will not add $tld, since it is not yet added in IANA's registry.\n";
continue;
}
$to_append = "\n";
$to_append .= "# Delegated on $date\n";
if ($explanation) {
$to_append .= "# $explanation\n";
}
$whois_serv = find_rootzone_whois_server($tld);
if ($whois_serv) {
$to_append .= ":whois|$whois_serv\n";
} else {
$to_append .= "# TODO: Entry generated automatically. Needs manual check.\n";
$to_append .= ":notice||Whois server unknown (".date('Y-m-d').")\n";
}
$to_append .= "\\.$tld$\n";
file_put_contents(NEWGTLD_PATTERN_FILE, $to_append, FILE_APPEND);
gwi_update_newgtld_patternfile();
}
// TODO:
// Step 3: Check if there are IANA TLDs which are not in our pattern files yet
if (!isset($iana_tld_data)) {
$iana_tld_data = explode("\n", cached_file(IANA_TLD_REGISTRY, CACHE_FILE_DIR));
$iana_tld_data = array_map('trim', $iana_tld_data);
}
foreach ($iana_tld_data as $tld) {
if (does_exist($tld)) { # TODO: in allen pattern files schauen
continue;
}
# echo "Does not exist: $tld\n";
continue;
$newgtld_res = count_newgtld_applications($tld);
if ($newgtld_res === false) {
// TODO: ignore?
} else if ($newgtld_res > 0) {
// TODO: add to newgtld pattern file
} else {
// TODO: add to normal pattern file
}
}
# ------------------------------------------------------
function gwi_update_newgtld_patternfile() {
$now = date('Ymd');
$pcont_original = file_get_contents(NEWGTLD_PATTERN_FILE);
$pcont = $pcont_original;
$count = 0;
$pcont = preg_replace("@#: version (\\S+)@i", "#: version $now", $pcont, 1, $count);
if ($count == 0) {
// Add header
$pcont = "#: version $now\n".
"# New gTLD\n".
"# see: http://newgtlds.icann.org/en/program-status/delegated-strings\n".
"# This file can be updated by running "vgwhois-pattern-update", but it does only ADD new \"New gTLDs\"\n".
"# --------------------------------------------------------------------\n".
"\n".$pcont;
}
if ($pcont != $pcont_original) {
file_put_contents(NEWGTLD_PATTERN_FILE, $pcont);
}
}
function find_rootzone_whois_server($tld) {
$whois_serv = iana_get_rootzone_whois_server($tld);
// Try to find "secret whois servers"
if (TRY_FINDING_HIDDEN_WHOIS_SERVERS) {
// TODO: also try out to use the URL of the homepage (in IANAs root DB)
if (!$whois_serv) {
$check_server = "whois.nic.$tld";
if (gwitc_is_port_open($check_server, 43)) {
$whois_serv = $check_server;
}
}
if (!$whois_serv) {
$check_server = "whois.$tld";
if (gwitc_is_port_open($check_server, 43)) {
$whois_serv = $check_server;
}
}
if (!$whois_serv) {
$check_server = "$tld";
if (gwitc_is_port_open($check_server, 43)) {
$whois_serv = $check_server;
}
}
}
return $whois_serv;
}
function iana_get_rootzone_whois_server($tld) {
$tld = strtolower($tld);
$cont = QueryWhoisServer('whois.iana.org', $tld);
if (!preg_match('@whois:\\s*(\\S+)@i', $cont, $m)) return false;
return $m[1];
}
function does_exist($tld) {
$cont = file_get_contents(NEWGTLD_PATTERN_FILE);
$tld = strtolower($tld);
$cont = strtolower($cont);
return (strpos($cont, "\n\\.$tld\$\n") !== false);
}
function gwi_newgtld_get_all_delegated_strings() {
$cont = file_get_contents('http://newgtlds.icann.org/en/program-status/delegated-strings');
// Convert Unicode stuff
$cont = str_replace('xn'.unichr(0x2013), 'xn--', $cont);
$cont = str_replace('xn'.unichr(0x2015), 'xn--', $cont); // used in Samsung TLD
$cont = str_replace(unichr(0x2013), '-', $cont); // used in most explanations
$cont = str_replace(unichr(0x2015), '-', $cont);
$cont = str_replace(unichr(0x00fc), 'ue', $cont); // German umlaut ue (used in .koeln)
$cont = utf8_decode($cont);
// Do some minor corrections
$cont = str_replace('game (s)', 'game(s)', $cont);
$cont = explode('STRING</th>', $cont, 2);
$cont = $cont[1];
preg_match_all('@<tr>\s*<td[^>]*>(.*)</td>\s*<td[^>]*>(.*)</td>\s*</tr>@ismU', $cont, $m, PREG_SET_ORDER);
$m = array_reverse($m);
$out = array();
foreach ($m as $data) {
$date = html_entity_decode(strip_tags($data[1]));
$string = html_entity_decode(strip_tags($data[2]));
$string = str_replace('(', ' (', $string);
while (strpos($string, ' ') !== false) $string = str_replace(' ', ' ', $string);
# Fixing some misplaced white spaces
$string = preg_replace('@\.\s+@m', '.', $string);
$string = preg_replace('@\(\s+@m', '(', $string);
$string = preg_replace('@\s+\)@m', ')', $string);
$ary = explode(' ', $string, 3);
if (count($ary) > 1) {
$tld_uc = trim($ary[0]); // Unicode TLD
$tld = trim($ary[1]); // Punycode TLD
$tld = substr($tld, 1, strlen($tld)-2);
$explanation = trim($ary[2]);
$explanation = substr($explanation, 1); // remove "-"
$explanation = trim($explanation);
} else {
$tld = trim(strtolower($string));
$tld_uc = $tld;
$explanation = '';
}
$out[] = array($date, $string, $tld, $tld_uc, $explanation);
}
return $out;
}
/**
* Return unicode char by its code
*
* @param int $u
* @return char
* @source http://www.php.net/manual/de/function.chr.php#88611
*/
function unichr($u) {
return mb_convert_encoding('&#' . intval($u) . ';', 'UTF-8', 'HTML-ENTITIES');
}
function count_newgtld_applications($string) {
#global $punycoder;
#if (is_null($punycoder))
$punycoder = new idna_convert();
$ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0';
if (($x = $punycoder->decode(strtolower($string))) !== false) $string = $x;
$out = array();
exec('curl -i -s https://gtldresult.icann.org/application-result/applicationstatus/viewstatus -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua), $out, $code);
if ($code != 0) return false;
$html = implode("\n", $out);
if (!preg_match('@JSESSIONID=(.+);@ismU', $html, $m)) return false;
$jsessionid = $m[1];
if (!preg_match('@<input value="([^"]+)" name="t:formdata"@ismU', $html, $m)) return false;
$formdata = $m[1];
$formdata = str_replace('+', '%2B', $formdata);
$formdata = str_replace('/', '%2F', $formdata);
$out = array();
exec('curl -s "https://gtldresult.icann.org/application-result/applicationstatus/viewstatus.applicationstatusform" -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua).' -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: de,en-US;q=0.7,en;q=0.3" -H "Referer: https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H '.escapeshellarg('Cookie: JSESSIONID='.$jsessionid).' -H "Connection: keep-alive" --data '.escapeshellarg('t%3Aformdata='.$formdata.'&t%3Asubmit=%5B%22searchButton%22%2C%22searchButton%22%5D&searchField='.$string.'&searchButton=Search&status=&updates=&objections=&gacew=&similar=&pic='), $out, $code);
if ($code != 0) return false;
$out = array();
exec('curl -s "https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H "Host: gtldresult.icann.org" -H '.escapeshellarg('User-Agent: '.$ua).' -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: de,en-US;q=0.7,en;q=0.3" -H "Referer: https://gtldresult.icann.org/application-result/applicationstatus/viewstatus" -H '.escapeshellarg('Cookie: JSESSIONID='.$jsessionid).' -H "DNT: 1" -H "Connection: keep-alive"', $out, $code);
if ($code != 0) return false;
$html = implode("\n", $out);
preg_match_all('@href="/application-result/applicationstatus/viewstatus:viewapplicationdetails/(\d+)">'.preg_quote($string, '@').'</a>@ismU', $html, $m);
# DEBUG
echo "$string = ".count($m[1])."\n";
# return (count($m[1]) > 0);
return count($m[1]);
}
#assert(count_newgtld_applications('shopping') == 2);