Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1 | daniel-mar | 1 | #include "stdafx.h" |
2 | #pragma hdrstop |
||
3 | //#include <stdio.h> |
||
4 | #include "ZipDflt.h" |
||
5 | //#include "Zip.h" |
||
6 | //#include "ZipErr.h" |
||
7 | #include "dz_errs.h" |
||
8 | |||
9 | #undef _DZ_FILE_ |
||
10 | #define _DZ_FILE_ DZ_ZDEFLATE_CPP |
||
11 | |||
12 | /* Deflate.c |
||
13 | * Copyright (C) 1990-1996 Mark Adler, Richard B. Wales, Jean-loup Gailly, |
||
14 | * Kai Uwe Rommel, Onno van der Linden and Igor Mandrichenko. |
||
15 | * This version modified by Chris Vleghert and Eric Engler for BCB/Delphi Zip. |
||
16 | ** distributed under LGPL license ** see license.txt for details |
||
17 | */ |
||
18 | /* PURPOSE |
||
19 | * Identify new text as repetitions of old text within a fixed- |
||
20 | * length sliding window trailing behind the new text. |
||
21 | ** DISCUSSION |
||
22 | * The "deflation" process depends on being able to identify portions |
||
23 | * of the input text which are identical to earlier input (within a |
||
24 | * sliding window trailing behind the input currently being processed). |
||
25 | * |
||
26 | * The most straightforward technique turns out to be the fastest for |
||
27 | * most input files: try all possible matches and select the longest. |
||
28 | * The key feature of this algorithm is that insertions into the string |
||
29 | * dictionary are very simple and thus fast, and deletions are avoided |
||
30 | * completely. Insertions are performed at each input character, whereas |
||
31 | * string matches are performed only when the previous match ends. So it |
||
32 | * is preferable to spend more time in matches to allow very fast string |
||
33 | * insertions and avoid deletions. The matching algorithm for small |
||
34 | * strings is inspired from that of Rabin & Karp. A brute force approach |
||
35 | * is used to find longer strings when a small match has been found. |
||
36 | * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze |
||
37 | * (by Leonid Broukhis). |
||
38 | * A previous version of this file used a more sophisticated algorithm |
||
39 | * (by Fiala and Greene) which is guaranteed to run in linear amortized |
||
40 | * time, but has a larger average cost, uses more memory and is patented. |
||
41 | * However the F&G algorithm may be faster for some highly redundant |
||
42 | * files if the parameter max_chain_length (described below) is too large. |
||
43 | * |
||
44 | * ACKNOWLEDGEMENTS |
||
45 | * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and |
||
46 | * I found it in 'freeze' written by Leonid Broukhis. |
||
47 | * Thanks to many info-zippers for bug reports and testing. |
||
48 | * |
||
49 | * REFERENCES |
||
50 | * APPNOTE.TXT documentation file in PKZIP 1.93a distribution. |
||
51 | * |
||
52 | * A description of the Rabin and Karp algorithm is given in the book |
||
53 | * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. |
||
54 | * |
||
55 | * Fiala,E.R., and Greene,D.H. |
||
56 | * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 |
||
57 | * |
||
58 | * INTERFACE |
||
59 | * void lm_init (int pack_level, ush *flags) |
||
60 | * Initialize the "longest match" routines for a new file |
||
61 | * |
||
62 | * ulg deflate (void) |
||
63 | * Processes a new input file and return its compressed length. Sets |
||
64 | * the compressed length, crc, deflate flags and internal file |
||
65 | * attributes. |
||
66 | |||
67 | Copyright (c) 1990-2007 Info-ZIP. All rights reserved. |
||
68 | |||
69 | See the accompanying file LICENSE, version 2007-Mar-4 or later |
||
70 | (the contents of which are also included in zip.h) for terms of use. |
||
71 | If, for some reason, all these files are missing, the Info-ZIP license |
||
72 | also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
||
73 | |||
74 | parts Copyright (C) 1997 Mike White, Eric W. Engler |
||
75 | ************************************************************************ |
||
76 | Copyright (C) 2009, 2010 by Russell J. Peters, Roger Aelbrecht |
||
77 | |||
78 | This file is part of TZipMaster Version 1.9. |
||
79 | |||
80 | TZipMaster is free software: you can redistribute it and/or modify |
||
81 | it under the terms of the GNU Lesser General Public License as published by |
||
82 | the Free Software Foundation, either version 3 of the License, or |
||
83 | (at your option) any later version. |
||
84 | |||
85 | TZipMaster is distributed in the hope that it will be useful, |
||
86 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
87 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
88 | GNU Lesser General Public License for more details. |
||
89 | |||
90 | You should have received a copy of the GNU Lesser General Public License |
||
91 | along with TZipMaster. If not, see <http://www.gnu.org/licenses/>. |
||
92 | |||
93 | contact: problems@delphizip.org (include ZipMaster in the subject). |
||
94 | updates: http://www.delphizip.org |
||
95 | DelphiZip maillist subscribe at http://www.freelists.org/list/delphizip |
||
96 | ************************************************************************/ |
||
97 | |||
98 | // Configuration parameters |
||
99 | // Compile with HASH_BITS = 14 to reduce the memory requirements or with |
||
100 | // HASH_BITS = 13 to use as little memory as possible. Otherwise if the |
||
101 | // entire input file can be held in memory (not possible on 16 bit systems). |
||
102 | // Warning: defining this symbol affects the compression ratio. The |
||
103 | // compressed output is still correct, and might even be smaller in some |
||
104 | // cases. For portability to 16 bit machines, do not use values above 15. |
||
105 | // #ifndef HASH_BITS |
||
106 | // #define HASH_BITS 15 // Number of bits used to hash strings |
||
107 | // #endif |
||
108 | |||
109 | //#define HASH_SIZE (unsigned)(1 << HASH_BITS) |
||
110 | //#define HASH_MASK (HASH_SIZE - 1) |
||
111 | //#define WMASK (ZWSIZE - 1) |
||
112 | // HASH_SIZE and ZWSIZE must be powers of two |
||
113 | #define NIL 0 |
||
114 | |||
115 | // Tail of hash chains |
||
116 | #define FAST 4 |
||
117 | #define SLOW 2 |
||
118 | |||
119 | // speed options for the general purpose bit flag |
||
120 | // #ifndef TOO_FAR |
||
121 | #define TOO_FAR 4096 |
||
122 | // #endif |
||
123 | |||
124 | // Matches of length 3 are discarded if their distance exceeds TOO_FAR |
||
125 | // Local data used by the "longest match" routines. |
||
126 | //typedef ush Pos; |
||
127 | typedef unsigned IPos; |
||
128 | |||
129 | // A Pos is an index in the character window. We use short instead of int to |
||
130 | // save space in the various tables. unsigned is used only for parameter passing. |
||
131 | #define H_SHIFT ((HASH_BITS + MIN_MATCH - 1) / MIN_MATCH) |
||
132 | |||
133 | // Number of bits by which ins_h and del_h must be shifted at each input |
||
134 | // step. It must be such that after MIN_MATCH steps, the oldest byte no |
||
135 | // longer takes part in the hash key, that is: H_SHIFT * MIN_MATCH >= |
||
136 | // HASH_BITS |
||
137 | #define max_insert_length fmax_lazy_match |
||
138 | |||
139 | // Insert new strings in the hash table only if the match length is not |
||
140 | // greater than this length. This saves time but degrades compression. |
||
141 | // max_insert_length is used only for compression levels <= 3. |
||
142 | // Values for max_lazy_match, good_match and max_chain_length, depending on |
||
143 | // the desired pack level (0..9). The values given below have been tuned to |
||
144 | // exclude worst case performance for pathological files. Better values may |
||
145 | // be found for specific files. |
||
146 | |||
147 | typedef struct config |
||
148 | { |
||
149 | ush good_length; // reduce lazy search above this match length |
||
150 | ush max_lazy; // do not perform lazy search above this match length |
||
151 | ush nice_length; // quit search above this match length |
||
152 | ush max_chain; |
||
153 | } |
||
154 | |||
155 | config; |
||
156 | |||
157 | #ifdef FULL_SEARCH |
||
158 | #define nice_match MAX_MATCH |
||
159 | #endif |
||
160 | static config configuration_table[10] = |
||
161 | { |
||
162 | // good lazy nice chain |
||
163 | // 0 |
||
164 | { 0, 0, 0, 0 }, |
||
165 | // store only |
||
166 | // 1 |
||
167 | { 4, 4, 8, 4 }, |
||
168 | // maximum speed, no lazy matches |
||
169 | // 2 |
||
170 | { 4, 5, 16, 8 }, |
||
171 | // 3 |
||
172 | { 4, 6, 32, 32 }, |
||
173 | // 4 |
||
174 | { 4, 4, 16, 16 }, |
||
175 | // lazy matches |
||
176 | // 5 |
||
177 | { 8, 16, 32, 32 }, |
||
178 | // 6 |
||
179 | { 8, 16, 128, 128 }, |
||
180 | // 7 |
||
181 | { 8, 32, 128, 256 }, |
||
182 | // 8 |
||
183 | { 32, 128, 258, 1024 }, |
||
184 | // 9 |
||
185 | { 32, 258, 258, 4096 } |
||
186 | } |
||
187 | ; // maximum compression |
||
188 | |||
189 | // Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= |
||
190 | // 4 For deflate_fast() (levels <= 3) good is ignored and lazy has a |
||
191 | // different meaning. |
||
192 | #define EQUAL 0 |
||
193 | |||
194 | // result of memcmp for equal strings |
||
195 | // Prototypes for local functions. |
||
196 | //static void fill_window(ZGlobals * pG); |
||
197 | //static ZInt64 deflate_fast(ZGlobals * pG); |
||
198 | |||
199 | //extern int __fastcall longest_match(ZGlobals *pG, unsigned cur_match); |
||
200 | |||
201 | //#ifdef DEBUG |
||
202 | //static void check_match(ZGlobals *pG, unsigned start, unsigned match, int length); |
||
203 | //#endif |
||
204 | |||
205 | // Update a hash value with the given input byte IN assertion: all calls to |
||
206 | // to UPDATE_HASH are made with consecutive input characters, so that a |
||
207 | // running hash key can be computed from the previous key instead of complete |
||
208 | // recalculation each time. |
||
209 | #define UPDATE_HASH(h, c) (h = (((h) << H_SHIFT) ^ (c)) & HASH_MASK) |
||
210 | |||
211 | // Insert string s in the dictionary and set match_head to the previous head |
||
212 | // of the hash chain (the most recent string with same hash key). Return the |
||
213 | // previous length of the hash chain. IN assertion: all calls to to |
||
214 | // INSERT_STRING are made with consecutive input characters and the first |
||
215 | // MIN_MATCH bytes of s are valid (except for the last MIN_MATCH-1 bytes of |
||
216 | // the input file). |
||
217 | #define INSERT_STRING(s, match_head) \ |
||
218 | ( \ |
||
219 | UPDATE_HASH(fins_h, fwindow[(s) + (MIN_MATCH - 1)]), fprev[ \ |
||
220 | (s) & WMASK] = (ush) (match_head = fhead[fins_h]), fhead[fins_h] = \ |
||
221 | (ush) (s) \ |
||
222 | ) |
||
223 | |||
224 | // RCV Added 2x (ush) |
||
225 | // Initialize the "longest match" routines for a new file IN assertion: |
||
226 | // window_size is > 0 if the input file is already read or mmap'ed in the |
||
227 | // window[] array, 0 otherwise. In the first case, window_size is sufficient |
||
228 | // to contain the whole input file plus MIN_LOOKAHEAD bytes (to avoid |
||
229 | // referencing memory beyond the end of window[] when looking for matches |
||
230 | // towards the end). pack_level :: 0: store, 1: best speed, 9: best |
||
231 | // compression. flags :: General purpose bit flag. |
||
232 | void ZipDflt::lm_init(int pack_level, ush * flags) |
||
233 | { |
||
234 | register unsigned j; |
||
235 | |||
236 | if (pack_level < 1 || pack_level > 9) |
||
237 | throw DZException(DZ_ERM_LOGIC_ERROR); |
||
238 | |||
239 | // FatalError(ZEN_LOGIC03); |
||
240 | |||
241 | // Do not slide the window if the whole input is already in memory |
||
242 | // (window_size > 0) |
||
243 | fsliding = 0; |
||
244 | |||
245 | if (fwindow_size == 0L) |
||
246 | { |
||
247 | fsliding = 1; |
||
248 | fwindow_size = (ulg)2L * ZWSIZE; |
||
249 | } |
||
250 | |||
251 | // Initialize the hash table (avoiding 64K overflow for 16 bit systems). |
||
252 | // prev[] will be initialized on the fly. |
||
253 | fhead[HASH_SIZE - 1] = NIL; |
||
254 | memset((char *) fhead, NIL, (unsigned)(HASH_SIZE - 1) * sizeof(* fhead)); |
||
255 | |||
256 | // Set the default configuration parameters: |
||
257 | fmax_lazy_match = configuration_table[pack_level].max_lazy; |
||
258 | fgood_match = configuration_table[pack_level].good_length; |
||
259 | |||
260 | #ifndef FULL_SEARCH |
||
261 | fnice_match = configuration_table[pack_level].nice_length; |
||
262 | #endif |
||
263 | |||
264 | fmax_chain_length = configuration_table[pack_level].max_chain; |
||
265 | |||
266 | if (pack_level <= 2) |
||
267 | { |
||
268 | * flags |= FAST; |
||
269 | } |
||
270 | else |
||
271 | if (pack_level >= 8) |
||
272 | { |
||
273 | * flags |= SLOW; |
||
274 | } |
||
275 | |||
276 | // ??? reduce max_chain_length for binary files |
||
277 | fstrstart = 0; |
||
278 | |||
279 | fblock_start = 0L; |
||
280 | |||
281 | j = ZWSIZE; |
||
282 | |||
283 | j <<= 1; // We Can read 64K in one step. |
||
284 | |||
285 | // flookahead = (* fread_buf)((uch *) fwindow, j); |
||
286 | flookahead = read_buf((uch *) fwindow, j); |
||
287 | |||
288 | if (flookahead == 0 || flookahead == (unsigned)EOF) |
||
289 | { |
||
290 | feofile = 1, flookahead = 0; |
||
291 | return; |
||
292 | } |
||
293 | |||
294 | feofile = 0; |
||
295 | |||
296 | // Make sure that we always have enough lookahead. This is important if |
||
297 | // input comes from a device such as a tty. |
||
298 | |||
299 | if (flookahead < MIN_LOOKAHEAD) |
||
300 | fill_window(); |
||
301 | |||
302 | fins_h = 0; |
||
303 | |||
304 | for (j = 0; j < MIN_MATCH - 1; j++) |
||
305 | UPDATE_HASH(fins_h, fwindow[j]); |
||
306 | |||
307 | // If lookahead < MIN_MATCH, ins_h is garbage, but this is not important |
||
308 | // since only literal bytes will be emitted. |
||
309 | } |
||
310 | |||
311 | |||
312 | #ifdef _DEBUG |
||
313 | // Check that the match at match_start is indeed a match. |
||
314 | void ZipDflt::check_match(unsigned start, unsigned match, int length) |
||
315 | { |
||
316 | // check that the match is indeed a match |
||
317 | if (memcmp((char *)fwindow + match, (char *)fwindow + start, length) != EQUAL) |
||
318 | Notify(DZ_ERM_LOGIC_ERROR, _T("invalid match - start %d, match %d, length %d") |
||
319 | , start, match, length); |
||
320 | } |
||
321 | |||
322 | #else |
||
323 | #define check_match(start, match, length) |
||
324 | #endif |
||
325 | |||
326 | // Fill the window when the lookahead becomes insufficient. Updates strstart |
||
327 | // and lookahead, and sets eofile if end of input file. IN assertion: |
||
328 | // lookahead < MIN_LOOKAHEAD & |
||
329 | // & |
||
330 | // strstart + lookahead > 0 OUT assertions: strstart <= |
||
331 | // window_size-MIN_LOOKAHEAD At least one byte has been read, or eofile is |
||
332 | // set; file reads are performed for at least two bytes (required for the |
||
333 | // translate_eol option). |
||
334 | void ZipDflt::fill_window(void) |
||
335 | { |
||
336 | register unsigned n, m; |
||
337 | unsigned more; // Amount of free space at the end of the window. |
||
338 | |||
339 | do |
||
340 | { |
||
341 | more = (unsigned)(fwindow_size - (ulg)flookahead - (ulg)fstrstart); |
||
342 | |||
343 | // If the window is almost full and there is insufficient lookahead, |
||
344 | // move the upper half to the lower one to make room in the upper half. |
||
345 | |||
346 | if (more == (unsigned)EOF) |
||
347 | { |
||
348 | // Very unlikely, but possible on 16 bit machine if strstart == 0 and |
||
349 | // lookahead == 1 (input done one byte at time) |
||
350 | more--; |
||
351 | |||
352 | // For MMAP or BIG_MEM, the whole input file is already in memory so |
||
353 | // we must not perform sliding. We must however call file_read in order |
||
354 | // to compute the crc, update lookahead and possibly set eofile. |
||
355 | } |
||
356 | else |
||
357 | if (fstrstart >= ZWSIZE + MAX_DIST && fsliding) |
||
358 | { |
||
359 | // By the IN assertion, the window is not empty so we can't confuse |
||
360 | // more == 0 with more == 64K on a 16 bit machine. |
||
361 | memcpy((char *) fwindow, (char *) fwindow + ZWSIZE, (unsigned)ZWSIZE); |
||
362 | fmatch_start -= ZWSIZE; |
||
363 | fstrstart -= ZWSIZE; // we now have strstart |
||
364 | ///* >= MAX_DIST: |
||
365 | fblock_start -= (long)ZWSIZE; |
||
366 | |||
367 | for (n = 0; n < HASH_SIZE; n++) |
||
368 | { |
||
369 | m = fhead[n]; |
||
370 | fhead[n] = (Pos)(m >= ZWSIZE ? m - ZWSIZE : NIL); |
||
371 | } |
||
372 | |||
373 | for (n = 0; n < ZWSIZE; n++) |
||
374 | { |
||
375 | m = fprev[n]; |
||
376 | fprev[n] = (Pos)(m >= ZWSIZE ? m - ZWSIZE : NIL); |
||
377 | |||
378 | // If n is not on any hash chain, prev[n] is garbage but its value |
||
379 | // will never be used. |
||
380 | } |
||
381 | |||
382 | more += ZWSIZE; |
||
383 | |||
384 | if (Abort_Flag) |
||
385 | return; // UPDATE: the ziperr() func in dllzip.c closes file. |
||
386 | } |
||
387 | |||
388 | if (feofile) |
||
389 | return; |
||
390 | |||
391 | // If there was no sliding: strstart <= WSIZE+MAX_DIST - 1 & |
||
392 | // & |
||
393 | // lookahead <= MIN_LOOKAHEAD - 1 & |
||
394 | // & |
||
395 | // more == window_size - lookahead - strstart => more >= window_size - |
||
396 | // (MIN_LOOKAHEAD - 1 + WSIZE + MAX_DIST - 1) => more >= window_size - 2 |
||
397 | // * WSIZE + 2 In the BIG_MEM or MMAP case (not yet supported in gzip), |
||
398 | // window_size == input_size + MIN_LOOKAHEAD & |
||
399 | // & |
||
400 | // strstart + lookahead <= input_size => more >= MIN_LOOKAHEAD. |
||
401 | // Otherwise, window_size == 2 * ZWSIZE so more >= 2. If there was |
||
402 | // sliding, more >= ZWSIZE. So in all cases, more >= 2. |
||
403 | Assert(more >= 2, _T("more < 2")); |
||
404 | |||
405 | // n = (* fread_buf)((uch *) fwindow + fstrstart + flookahead, more); |
||
406 | n = read_buf((uch *)fwindow + fstrstart + flookahead, more); |
||
407 | |||
408 | if (n == 0 || n == (unsigned)EOF) |
||
409 | { |
||
410 | feofile = 1; |
||
411 | } |
||
412 | else |
||
413 | { |
||
414 | flookahead += n; |
||
415 | } |
||
416 | } |
||
417 | while (flookahead < MIN_LOOKAHEAD && !feofile); |
||
418 | } |
||
419 | |||
420 | /* |
||
421 | // Flush the current block, with given end-of-file flag. IN assertion: |
||
422 | // strstart is set to the end of the current match. |
||
423 | #define FLUSH_BLOCK(eof) \ |
||
424 | flush_block(fblock_start >= 0L ? (uch *) &fwindow[(unsigned)fblock_start] : \ |
||
425 | (uch *)NULL, (ulg)fstrstart - (ulg)fblock_start, (eof)) |
||
426 | */ |
||
427 | |||
428 | // Processes a new input file and return its compressed length. This |
||
429 | // function does not perform lazy evaluationof matches and inserts new |
||
430 | // strings in the dictionary only for unmatched strings or for short matches. |
||
431 | // It is used only for the fast compression options. |
||
432 | ZInt64 ZipDflt::deflate_fast(void) |
||
433 | { |
||
434 | unsigned hash_head; // head of the hash chain |
||
435 | int flush; // set if current block must be flushed |
||
436 | unsigned match_length = 0; // length of best match |
||
437 | fprev_length = MIN_MATCH - 1; |
||
438 | |||
439 | while (flookahead != 0) |
||
440 | { |
||
441 | // Insert the string window[strstart .. strstart+2] in the dictionary, |
||
442 | // and set hash_head to the head of the hash chain: |
||
443 | INSERT_STRING(fstrstart, hash_head); |
||
444 | |||
445 | // Find the longest match, discarding those <= prev_length. At this |
||
446 | // point we have always match_length < MIN_MATCH |
||
447 | |||
448 | if (hash_head != NIL && fstrstart - hash_head <= MAX_DIST) |
||
449 | { |
||
450 | // To simplify the code, we prevent matches with the string of window |
||
451 | // index 0 (in particular we have to avoid a match of the string with |
||
452 | // itself at the start of the input file). |
||
453 | #ifndef HUFFMAN_ONLY |
||
454 | match_length = longest_match(hash_head); |
||
455 | #endif |
||
456 | // longest_match() sets match_start |
||
457 | |||
458 | if (match_length > flookahead) |
||
459 | match_length = flookahead; |
||
460 | } |
||
461 | |||
462 | if (match_length >= MIN_MATCH) |
||
463 | { |
||
464 | #ifdef _DEBUG |
||
465 | check_match(fstrstart, fmatch_start, match_length); |
||
466 | #endif |
||
467 | flush = ct_tally(fstrstart - fmatch_start, match_length - MIN_MATCH); |
||
468 | |||
469 | flookahead -= match_length; |
||
470 | |||
471 | // Insert new strings in the hash table only if the match length is |
||
472 | // not too large. This saves time but degrades compression. |
||
473 | |||
474 | if (match_length <= max_insert_length) |
||
475 | { |
||
476 | match_length--; // string at strstart already in hash table |
||
477 | |||
478 | do |
||
479 | { |
||
480 | fstrstart++; |
||
481 | INSERT_STRING(fstrstart, hash_head); |
||
482 | |||
483 | // strstart never exceeds ZWSIZE-MAX_MATCH, so there are always |
||
484 | // MIN_MATCH bytes ahead. If lookahead < MIN_MATCH these bytes are |
||
485 | // garbage, but it does not matter since the next lookahead bytes |
||
486 | // will be emitted as literals. |
||
487 | } |
||
488 | while (--match_length != 0); |
||
489 | |||
490 | fstrstart++; |
||
491 | } |
||
492 | else |
||
493 | { |
||
494 | fstrstart += match_length; |
||
495 | match_length = 0; |
||
496 | fins_h = fwindow[fstrstart]; |
||
497 | UPDATE_HASH(fins_h, fwindow[fstrstart + 1]); |
||
498 | #if MIN_MATCH != 3 |
||
499 | Call UPDATE_HASH() MIN_MATCH - 3 more times |
||
500 | #endif |
||
501 | } |
||
502 | } |
||
503 | else |
||
504 | { |
||
505 | // No match, output a literal byte |
||
506 | // Tracevv(("%c"), (fwindow[fstrstart])); |
||
507 | flush = ct_tally(0, fwindow[fstrstart]); |
||
508 | flookahead--; |
||
509 | fstrstart++; |
||
510 | } |
||
511 | |||
512 | if (flush) |
||
513 | { |
||
514 | FlushBlock(0); |
||
515 | // FLUSH_BLOCK(0); |
||
516 | fblock_start = fstrstart; |
||
517 | } |
||
518 | |||
519 | // Make sure that we always have enough lookahead, except at the end of |
||
520 | // the input file. We need MAX_MATCH bytes for the next match, plus |
||
521 | // MIN_MATCH bytes to insert the string following the next match. |
||
522 | if (flookahead < MIN_LOOKAHEAD) |
||
523 | fill_window(); |
||
524 | |||
525 | if (Abort_Flag) |
||
526 | Fatal(DZ_ERM_ABORT, 0); |
||
527 | // throw DZAbort(DZ_ERM_ABORT); |
||
528 | |||
529 | // { |
||
530 | // the ziperr() func in dllzip.c closes file. |
||
531 | //return ZEN_ABORT; |
||
532 | // } |
||
533 | } |
||
534 | |||
535 | // return FLUSH_BLOCK(1); // eof |
||
536 | return FlushBlock(1); // eof |
||
537 | } |
||
538 | |||
539 | // Same as above, but achieves better compression. We use a lazy evaluation |
||
540 | // for matches: a match is finally adopted only if there is no better match |
||
541 | // at the next window position. |
||
542 | ZInt64 ZipDflt::deflate(void) |
||
543 | { |
||
544 | unsigned hash_head; // head of hash chain |
||
545 | unsigned prev_match; // previous match |
||
546 | int flush; // set if current block |
||
547 | ///* must be flushed |
||
548 | int match_available = 0; // set if previous match |
||
549 | ///* exists |
||
550 | register unsigned match_length = MIN_MATCH - 1; // length of best match |
||
551 | |||
552 | if (flevel <= 3) |
||
553 | return deflate_fast(); // optimized for speed |
||
554 | |||
555 | // Process the input block. |
||
556 | while (flookahead != 0) |
||
557 | { |
||
558 | // if (fout_offset > fout_size || fout_size > sizeof(ffile_outbuf)) |
||
559 | // { |
||
560 | // Notify(IWARNING, _T("Write error in flush_outbuf - out_size %d [%d]"), |
||
561 | // fout_offset, fout_size); |
||
562 | // throw DZException(DZ_ERM_ERROR_WRITE); |
||
563 | // } |
||
564 | // Insert the string window[strstart .. strstart+2] in the dictionary, |
||
565 | // and set hash_head to the head of the hash chain: |
||
566 | INSERT_STRING(fstrstart, hash_head); |
||
567 | // if (fout_offset > fout_size || fout_size > sizeof(ffile_outbuf)) |
||
568 | // { |
||
569 | // Notify(IWARNING, _T("Write error in flush_outbuf - out_size %d [%d]"), |
||
570 | // fout_offset, fout_size); |
||
571 | // throw DZException(DZ_ERM_ERROR_WRITE); |
||
572 | // } |
||
573 | |||
574 | // Find the longest match, discarding those <= prev_length. |
||
575 | fprev_length = match_length, prev_match = fmatch_start; |
||
576 | match_length = MIN_MATCH - 1; |
||
577 | |||
578 | if (hash_head != NIL && fprev_length < fmax_lazy_match |
||
579 | && fstrstart - hash_head <= MAX_DIST) |
||
580 | { |
||
581 | // To simplify the code, we prevent matches with the string of window |
||
582 | // index 0 (in particular we have to avoid a match of the string with |
||
583 | // itself at the start of the input file). |
||
584 | #ifndef HUFFMAN_ONLY |
||
585 | match_length = longest_match(hash_head); |
||
586 | #endif |
||
587 | // longest_match() sets match_start |
||
588 | |||
589 | if (match_length > flookahead) |
||
590 | match_length = flookahead; |
||
591 | |||
592 | /* |
||
593 | #ifdef FILTERED |
||
594 | // Ignore matches of length <= 5 |
||
595 | if (match_length <= 5) |
||
596 | { |
||
597 | #else */ |
||
598 | // Ignore a length 3 match if it is too distant: |
||
599 | if ( |
||
600 | #ifdef FILTERED |
||
601 | match_length <= 5 && |
||
602 | #endif |
||
603 | match_length == MIN_MATCH && fstrstart - fmatch_start > TOO_FAR) |
||
604 | { |
||
605 | //#endif |
||
606 | // If prev_match is also MIN_MATCH, match_start is garbage but we |
||
607 | // will ignore the current match anyway. |
||
608 | match_length = MIN_MATCH - 1; |
||
609 | } |
||
610 | } |
||
611 | |||
612 | // If there was a match at the previous step and the current match is |
||
613 | // not better, output the previous match: |
||
614 | if (fprev_length >= MIN_MATCH && match_length <= fprev_length) |
||
615 | { |
||
616 | #ifdef _DEBUG |
||
617 | check_match(fstrstart - 1, prev_match, fprev_length); |
||
618 | #endif |
||
619 | flush = ct_tally(fstrstart - 1 - prev_match, fprev_length - MIN_MATCH); |
||
620 | |||
621 | // Insert in hash table all strings up to the end of the match. |
||
622 | // strstart-1 and strstart are already inserted. |
||
623 | flookahead -= fprev_length - 1; |
||
624 | fprev_length -= 2; |
||
625 | |||
626 | do |
||
627 | { |
||
628 | fstrstart++; |
||
629 | INSERT_STRING(fstrstart, hash_head); |
||
630 | |||
631 | // strstart never exceeds ZWSIZE-MAX_MATCH, so there are always |
||
632 | // MIN_MATCH bytes ahead. If lookahead < MIN_MATCH these bytes are |
||
633 | // garbage, but it does not matter since the next lookahead bytes |
||
634 | // will always be emitted as literals. |
||
635 | } |
||
636 | while (--fprev_length != 0); |
||
637 | |||
638 | match_available = 0; |
||
639 | match_length = MIN_MATCH - 1; |
||
640 | fstrstart++; |
||
641 | |||
642 | if (flush) |
||
643 | { |
||
644 | // FLUSH_BLOCK(0); |
||
645 | FlushBlock(0); |
||
646 | fblock_start = fstrstart; |
||
647 | } |
||
648 | //if (fout_offset > fout_size || fout_size > sizeof(ffile_outbuf)) |
||
649 | //{ |
||
650 | // Notify(IWARNING, _T("Write error in flush_outbuf - out_size %d [%d]"), |
||
651 | // fout_offset, fout_size); |
||
652 | // throw DZException(DZ_ERM_ERROR_WRITE); |
||
653 | //} |
||
654 | |||
655 | } |
||
656 | else |
||
657 | if (match_available) |
||
658 | { |
||
659 | //if (fout_offset > fout_size || fout_size > sizeof(ffile_outbuf)) |
||
660 | //{ |
||
661 | // Notify(IWARNING, _T("Write error in flush_outbuf - out_size %d [%d]"), |
||
662 | // fout_offset, fout_size); |
||
663 | // throw DZException(DZ_ERM_ERROR_WRITE); |
||
664 | //} |
||
665 | |||
666 | // If there was no match at the previous position, output a single |
||
667 | // literal. If there was a match but the current match is longer, |
||
668 | // truncate the previous match to a single literal. |
||
669 | // Tracevv(("%c"), (fwindow[fstrstart - 1])); |
||
670 | if (ct_tally(0, fwindow[fstrstart - 1])) |
||
671 | { |
||
672 | // FLUSH_BLOCK(0); |
||
673 | FlushBlock(0); |
||
674 | fblock_start = fstrstart; |
||
675 | } |
||
676 | fstrstart++; |
||
677 | flookahead--; |
||
678 | } |
||
679 | else |
||
680 | { |
||
681 | // There is no previous match to compare with, wait for the next |
||
682 | // step to decide. |
||
683 | match_available = 1; |
||
684 | fstrstart++; |
||
685 | flookahead--; |
||
686 | } |
||
687 | |||
688 | Assert(fstrstart <= fisize && flookahead <= fisize, _T("a bit too far")); |
||
689 | |||
690 | #ifdef _DEBUG |
||
691 | if (!(fstrstart <= fisize && flookahead <= fisize)) |
||
692 | Notify(DZ_ERM_LOGIC_ERROR, _T("a bit too far")); |
||
693 | #endif |
||
694 | // Make sure that we always have enough lookahead, except at the end |
||
695 | // of the input file. We need MAX_MATCH bytes for the next match, plus |
||
696 | // MIN_MATCH bytes to insert the string following the next match. |
||
697 | if (flookahead < MIN_LOOKAHEAD) |
||
698 | fill_window(); |
||
699 | |||
700 | if (Abort_Flag) |
||
701 | Fatal(DZ_ERM_ABORT, 0); |
||
702 | // throw DZAbort(DZ_ERM_ABORT); |
||
703 | // { |
||
704 | // UPDATE: the ziperr() func in dllzip.c closes file. |
||
705 | // return -ZEN_ABORT; |
||
706 | //} |
||
707 | } |
||
708 | |||
709 | if (match_available) |
||
710 | ct_tally(0, fwindow[fstrstart - 1]); |
||
711 | |||
712 | // return FLUSH_BLOCK(1); // eof |
||
713 | return FlushBlock(1); //eof |
||
714 | } |
||
715 | |||
716 | /* 30/1/07 */ |
||
717 | |||
718 | |||
719 | |||
720 |