Rev 4 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 4 | Rev 5 | ||
---|---|---|---|
1 | #!/usr/bin/php |
1 | #!/usr/bin/php |
2 | <?php |
2 | <?php |
3 | 3 | ||
4 | /* |
4 | /* |
5 | Copyright 2020 Daniel Marschall, ViaThinkSoft |
5 | Copyright 2020 Daniel Marschall, ViaThinkSoft |
6 | 6 | ||
7 | Licensed under the Apache License, Version 2.0 (the "License"); |
7 | Licensed under the Apache License, Version 2.0 (the "License"); |
8 | you may not use this file except in compliance with the License. |
8 | you may not use this file except in compliance with the License. |
9 | You may obtain a copy of the License at |
9 | You may obtain a copy of the License at |
10 | 10 | ||
11 | http://www.apache.org/licenses/LICENSE-2.0 |
11 | http://www.apache.org/licenses/LICENSE-2.0 |
12 | 12 | ||
13 | Unless required by applicable law or agreed to in writing, software |
13 | Unless required by applicable law or agreed to in writing, software |
14 | distributed under the License is distributed on an "AS IS" BASIS, |
14 | distributed under the License is distributed on an "AS IS" BASIS, |
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | See the License for the specific language governing permissions and |
16 | See the License for the specific language governing permissions and |
17 | limitations under the License. |
17 | limitations under the License. |
18 | */ |
18 | */ |
19 | 19 | ||
20 | // TODO: make use of STDERR and return different exit codes |
20 | // TODO: make use of STDERR and return different exit codes |
- | 21 | // TODO: On Windows file systems, accept file names case insensitively |
|
- | 22 | ||
- | 23 | function utf8_normalize($str) { |
|
- | 24 | // This helps to handle decomposite Unicode endpoints (E.g. German Umlauts have different representations) |
|
- | 25 | // Requires php-intl |
|
- | 26 | if (!class_exists('Normalizer')) return $str; |
|
- | 27 | return Normalizer::normalize($str); |
|
- | 28 | } |
|
- | 29 | ||
- | 30 | function convertToUTF8($str) { |
|
- | 31 | $enc = mb_detect_encoding($str); |
|
- | 32 | if ($enc && $enc != 'UTF-8') { |
|
- | 33 | return iconv($enc, 'UTF-8', $str); |
|
- | 34 | } else { |
|
- | 35 | return $str; |
|
- | 36 | } |
|
- | 37 | } |
|
21 | 38 | ||
22 | function testsfv($file) { |
39 | function testsfv($file) { |
23 | // TODO: warn if an entry is multiple times (with different checksums) in a single file |
40 | // TODO: warn if an entry is multiple times (with different checksums) in a single file |
24 | if (!file_exists($file)) { |
41 | if (!file_exists($file)) { |
25 | echo "ERROR: File $file does not exist.\n"; |
42 | echo "ERROR: File $file does not exist.\n"; |
26 | return; |
43 | return; |
27 | } |
44 | } |
28 | 45 | ||
- | 46 | $files_checked = array(); |
|
- | 47 | ||
29 | $lines = file($file); |
48 | $lines = file($file); |
30 | $is_first_line = true; |
49 | $is_first_line = true; |
31 | $force_utf8 = false; |
50 | $force_utf8 = false; |
32 | foreach ($lines as $line) { |
51 | foreach ($lines as $line) { |
33 | if ($is_first_line) { |
52 | if ($is_first_line) { |
34 | $tmp = 0; |
53 | $tmp = 0; |
35 | $line = str_replace("\xEF\xBB\xBF",'',$line,$tmp); |
54 | $line = str_replace("\xEF\xBB\xBF",'',$line,$tmp); |
36 | if ($tmp > 0) $force_utf8 = true; |
55 | if ($tmp > 0) $force_utf8 = true; |
37 | $is_first_line = false; |
56 | $is_first_line = false; |
38 | } |
57 | } |
39 | $is_ansi = strstr(utf8_decode($line),'?') !== false; // Attention: This assumes that '?' is not part of the line! |
58 | if (!$force_utf8) $line = convertToUTF8($line); |
- | 59 | ||
40 | if (!$force_utf8 && $is_ansi) $line = utf8_encode($line); |
60 | if (substr(trim($line),0,1) == ';') continue; |
41 | 61 | ||
42 | $line = rtrim($line); |
62 | $line = rtrim($line); |
43 | if ($line == '') continue; |
63 | if ($line == '') continue; |
44 | $checksum = substr($line,-8); |
64 | $checksum = substr($line,-8); |
45 | $origname = rtrim(substr($line,0,strlen($line)-8)); |
65 | $origname = rtrim(substr($line,0,strlen($line)-8)); |
46 | $origname = dirname($file) . '/' . trim($origname); |
66 | $origname = dirname($file) . '/' . rtrim($origname); |
47 | if (!file_exists($origname)) { |
67 | if (!file_exists($origname)) { |
48 | echo "WARNING: File vanished : $origname\n"; |
68 | echo "WARNING: File vanished : $origname\n"; |
49 | } else { |
69 | } else { |
- | 70 | if (is_file($origname)) { |
|
50 | $checksum2 = crc32_file($origname); |
71 | $checksum2 = crc32_file($origname); |
51 | if (strtolower($checksum) != strtolower($checksum2)) { |
72 | if (strtolower($checksum) != strtolower($checksum2)) { |
52 | echo "CHECKSUM FAIL: $origname (expected $checksum, but is $checksum2)\n"; |
73 | echo "CHECKSUM FAIL: $origname (expected $checksum, but is $checksum2)\n"; |
53 | } else { |
74 | } else { |
54 | global $show_verbose; |
75 | global $show_verbose; |
55 | if ($show_verbose) echo "OK: $origname\n"; |
76 | if ($show_verbose) echo "OK: $origname\n"; |
56 | } |
77 | } |
- | 78 | } else { |
|
- | 79 | // For some reason, some files on a NTFS volume are "FIFO" pipe files?! |
|
- | 80 | echo "Warning: $origname is not a regular file!\n"; |
|
- | 81 | } |
|
- | 82 | } |
|
- | 83 | ||
- | 84 | $origname = utf8_normalize(basename($origname)); |
|
- | 85 | $files_checked[] = dirname($file) . '/' . $origname; |
|
- | 86 | } |
|
- | 87 | ||
- | 88 | // Now check if files have vanished! |
|
- | 89 | $directory = dirname($file); |
|
- | 90 | $sd = @scandir($directory); |
|
- | 91 | if ($sd === false) { |
|
- | 92 | echo "Error: Cannot scan directory $directory\n"; |
|
- | 93 | } else { |
|
- | 94 | foreach ($sd as $file) { |
|
- | 95 | if ($file === '.') continue; |
|
- | 96 | if ($file === '..') continue; |
|
- | 97 | if (strtolower($file) === 'thumbs.db') continue; |
|
- | 98 | if (strtolower(substr($file, -4)) === '.md5') continue; |
|
- | 99 | if (strtolower(substr($file, -4)) === '.sfv') continue; |
|
- | 100 | $fullpath = $directory . '/' . $file; |
|
- | 101 | if (!is_dir($fullpath)) { |
|
- | 102 | $fullpath = utf8_normalize($fullpath); |
|
- | 103 | if (!in_array($fullpath,$files_checked)) { |
|
- | 104 | echo "Warning: File not in SFV checksum file: $fullpath\n"; |
|
- | 105 | } |
|
- | 106 | } |
|
57 | } |
107 | } |
58 | // TODO: Also warn about extra files which are not indexed |
- | |
59 | } |
108 | } |
60 | } |
109 | } |
61 | 110 | ||
62 | function swapEndianness($hex) { |
111 | function swapEndianness($hex) { |
63 | return implode('', array_reverse(str_split($hex, 2))); |
112 | return implode('', array_reverse(str_split($hex, 2))); |
64 | } |
113 | } |
65 | 114 | ||
66 | function crc32_file($filename, $rawOutput = false) { |
115 | function crc32_file($filename, $rawOutput = false) { |
67 | $out = bin2hex(hash_file ('crc32b', $filename , true)); |
116 | $out = bin2hex(hash_file ('crc32b', $filename , true)); |
68 | if (hash('crc32b', 'TEST') == 'b893eaee') { |
117 | if (hash('crc32b', 'TEST') == 'b893eaee') { |
69 | // hash_file() in PHP 5.2 has the wrong Endianess! |
118 | // hash_file() in PHP 5.2 has the wrong Endianess! |
70 | // https://bugs.php.net/bug.php?id=47467 |
119 | // https://bugs.php.net/bug.php?id=47467 |
71 | $out = swapEndianness($out); |
120 | $out = swapEndianness($out); |
72 | } |
121 | } |
73 | return $out; |
122 | return $out; |
74 | } |
123 | } |
75 | 124 | ||
76 | function _rec($directory) { |
125 | function _rec($directory) { |
77 | $directory = rtrim($directory, '/\\'); |
126 | $directory = rtrim($directory, '/\\'); |
78 | 127 | ||
79 | if (!is_dir($directory)) { |
128 | if (!is_dir($directory)) { |
80 | exit("Invalid directory path $directory\n"); |
129 | exit("Invalid directory path $directory\n"); |
81 | } |
130 | } |
82 | 131 | ||
83 | if ($dont_add_files = count(glob("$directory/*.sfv")) == 0) { |
132 | if ($dont_add_files = count(glob("$directory/*.sfv")) == 0) { |
84 | global $show_verbose; |
133 | global $show_verbose; |
85 | if ($show_verbose) echo "Directory $directory has no SFV file. Skipping.\n"; |
134 | if ($show_verbose) echo "Directory $directory has no SFV file. Skipping.\n"; |
86 | } else { |
135 | } else { |
87 | $out = array(); |
136 | $out = array(); |
88 | 137 | ||
89 | global $show_verbose; |
138 | global $show_verbose; |
90 | if ($show_verbose) echo "Check directory $directory\n"; |
139 | if ($show_verbose) echo "Check directory $directory\n"; |
91 | $sfvfiles = glob($directory.'/*.sfv'); |
140 | $sfvfiles = glob($directory.'/*.sfv'); |
92 | foreach ($sfvfiles as $sfvfile) { |
141 | foreach ($sfvfiles as $sfvfile) { |
93 | testsfv($sfvfile); |
142 | testsfv($sfvfile); |
94 | } |
143 | } |
95 | } |
144 | } |
96 | 145 | ||
97 | $sd = @scandir($directory); |
146 | $sd = @scandir($directory); |
98 | if ($sd === false) { |
147 | if ($sd === false) { |
99 | echo "Error: Cannot scan directory $directory\n"; |
148 | echo "Error: Cannot scan directory $directory\n"; |
100 | return; |
149 | return; |
101 | } |
150 | } |
102 | 151 | ||
103 | foreach ($sd as $file) { |
152 | foreach ($sd as $file) { |
104 | if ($file !== '.' && $file !== '..') { |
153 | if ($file !== '.' && $file !== '..') { |
105 | $file = $directory . '/' . $file; |
154 | $file = $directory . '/' . $file; |
106 | if (is_dir($file)) { |
155 | if (is_dir($file)) { |
107 | _rec($file); |
156 | _rec($file); |
108 | } |
157 | } |
109 | } |
158 | } |
110 | } |
159 | } |
111 | } |
160 | } |
112 | 161 | ||
113 | 162 | ||
114 | # --- |
163 | # --- |
115 | 164 | ||
116 | $show_verbose = false; |
165 | $show_verbose = false; |
117 | $dir = ''; |
166 | $dir = ''; |
118 | 167 | ||
119 | for ($i=1; $i<$argc; $i++) { |
168 | for ($i=1; $i<$argc; $i++) { |
120 | if ($argv[$i] == '-v') { |
169 | if ($argv[$i] == '-v') { |
121 | $show_verbose = true; |
170 | $show_verbose = true; |
122 | } else { |
171 | } else { |
123 | $dir = $argv[$i]; |
172 | $dir = $argv[$i]; |
124 | } |
173 | } |
125 | } |
174 | } |
126 | 175 | ||
127 | if (empty($dir)) { |
176 | if (empty($dir)) { |
128 | echo "Syntax: $argv[0] [-v] <directory>\n"; |
177 | echo "Syntax: $argv[0] [-v] <directory>\n"; |
129 | exit(2); |
178 | exit(2); |
130 | } |
179 | } |
131 | 180 | ||
132 | if (!is_dir($dir)) { |
181 | if (!is_dir($dir)) { |
133 | echo "Directory not found\n"; |
182 | echo "Directory not found\n"; |
134 | exit(1); |
183 | exit(1); |
135 | } |
184 | } |
136 | 185 | ||
137 | _rec($dir); |
186 | _rec($dir); |
138 | 187 | ||
139 | if ($show_verbose) echo "Done.\n"; |
188 | if ($show_verbose) echo "Done.\n"; |
140 | 189 |