WebSVN – autosfx – Blame – /zmstr1900102/DLL/dz1900087src/Source/lngmtch.cpp

Rev	Author	Line No.	Line
1	daniel-mar	1	#include "stdafx.h"
		2	#pragma hdrstop
		3	#if defined(_USE_ASM_) && (__BORLANDC__ < 0x0570)
		4	#pragma inline
		5	#endif
		6	#include "ZipDflt.h"
		7	#include "dz_errs.h"
		8
		9	#undef _DZ_FILE_
		10	#define _DZ_FILE_ DZ_LNGMTCH_CPP
		11
		12	/*
		13	Lngmtch.cpp - zip 'longmatch' function
		14	* part of Deflate.c
		15	* Copyright (C) 1990-1996 Mark Adler, Richard B. Wales, Jean-loup Gailly,
		16	* Kai Uwe Rommel, Onno van der Linden and Igor Mandrichenko.
		17	* This version modified by Chris Vleghert and Eric Engler for BCB/Delphi Zip.
		18	distributed under LGPL license see license.txt for details
		19
		20	Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
		21
		22	See the accompanying file LICENSE, version 2007-Mar-4 or later
		23	(the contents of which are also included in zip.h) for terms of use.
		24	If, for some reason, all these files are missing, the Info-ZIP license
		25	also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
		26
		27	parts Copyright (C) 1997 Mike White, Eric W. Engler
		28	************************************************************************
		29	Copyright (C) 2009, 2010 by Russell J. Peters, Roger Aelbrecht
		30
		31	This file is part of TZipMaster Version 1.9.
		32
		33	TZipMaster is free software: you can redistribute it and/or modify
		34	it under the terms of the GNU Lesser General Public License as published by
		35	the Free Software Foundation, either version 3 of the License, or
		36	(at your option) any later version.
		37
		38	TZipMaster is distributed in the hope that it will be useful,
		39	but WITHOUT ANY WARRANTY; without even the implied warranty of
		40	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		41	GNU Lesser General Public License for more details.
		42
		43	You should have received a copy of the GNU Lesser General Public License
		44	along with TZipMaster. If not, see <http://www.gnu.org/licenses/>.
		45
		46	contact: problems@delphizip.org (include ZipMaster in the subject).
		47	updates: http://www.delphizip.org
		48	DelphiZip maillist subscribe at http://www.freelists.org/list/delphizip
		49	************************************************************************
		50
		51	/* PURPOSE
		52	* Identify new text as repetitions of old text within a fixed-
		53	* length sliding window trailing behind the new text.
		54	*
		55	* DISCUSSION
		56	* The "deflation" process depends on being able to identify portions
		57	* of the input text which are identical to earlier input (within a
		58	* sliding window trailing behind the input currently being processed).
		59	*
		60	* The most straightforward technique turns out to be the fastest for
		61	* most input files: try all possible matches and select the longest.
		62	* The key feature of this algorithm is that insertions into the string
		63	* dictionary are very simple and thus fast, and deletions are avoided
		64	* completely. Insertions are performed at each input character, whereas
		65	* string matches are performed only when the previous match ends. So it
		66	* is preferable to spend more time in matches to allow very fast string
		67	* insertions and avoid deletions. The matching algorithm for small
		68	* strings is inspired from that of Rabin & Karp. A brute force approach
		69	* is used to find longer strings when a small match has been found.
		70	* A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
		71	* (by Leonid Broukhis).
		72	* A previous version of this file used a more sophisticated algorithm
		73	* (by Fiala and Greene) which is guaranteed to run in linear amortized
		74	* time, but has a larger average cost, uses more memory and is patented.
		75	* However the F&G algorithm may be faster for some highly redundant
		76	* files if the parameter max_chain_length (described below) is too large.
		77	*
		78	* ACKNOWLEDGEMENTS
		79	* The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
		80	* I found it in 'freeze' written by Leonid Broukhis.
		81	* Thanks to many info-zippers for bug reports and testing.
		82	*
		83	* REFERENCES
		84	* APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
		85	*
		86	* A description of the Rabin and Karp algorithm is given in the book
		87	* "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
		88	*
		89	* Fiala,E.R., and Greene,D.H.
		90	* Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
		91	*
		92	* INTERFACE
		93	* void lm_init (int pack_level, ush *flags)
		94	* Initialize the "longest match" routines for a new file
		95	*
		96	* ulg deflate (void)
		97	* Processes a new input file and return its compressed length. Sets
		98	* the compressed length, crc, deflate flags and internal file
		99	* attributes.
		100	*/
		101
		102
		103	#define NIL 0
		104
		105	// Tail of hash chains
		106	#define FAST 4
		107	#define SLOW 2
		108
		109	// speed options for the general purpose bit flag
		110	#define TOO_FAR 4096
		111
		112	// Matches of length 3 are discarded if their distance exceeds TOO_FAR
		113	// Local data used by the "longest match" routines.
		114	typedef unsigned IPos;
		115
		116	// Set match_start to the longest match starting at the given string and
		117	// return its length. Matches shorter or equal to prev_length are discarded,
		118	// in which case the result is equal to prev_length and match_start is
		119	// garbage. IN assertions: cur_match is the head of the hash chain for the
		120	// current string (strstart) and its distance is <= MAX_DIST, and prev_length
		121	// >= 1
		122	#ifndef _USE_ASM_
		123	int __fastcall ZipDflt::longest_match(unsigned cur_match)
		124	{
		125	unsigned chain_length = fmax_chain_length; // max hash chain length
		126	register uch *scan = fwindow + fstrstart; // current string
		127	register uch *match; // matched string
		128	register int len; // length of current match
		129	int best_len = fprev_length; // best match length so far
		130	unsigned limit = fstrstart > (unsigned) MAX_DIST ? fstrstart -
		131	(unsigned) MAX_DIST : NIL;
		132
		133	// Stop when cur_match becomes <= limit. To simplify the code, we prevent
		134	// matches with the string of window index 0.
		135	// The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of
		136	// 16. It is easy to get rid of this optimization if necessary.
		137	#if HASH_BITS < 8 \|\| MAX_MATCH != 258
		138	error :
		139	Code too clever
		140	#endif
		141	register uch * strend = fwindow + fstrstart + MAX_MATCH;
		142
		143	register uch scan_end1 = scan[best_len - 1];
		144	register uch scan_end = scan[best_len];
		145
		146	// Do not waste too much time if we already have a good match:
		147
		148	if (fprev_length >= fgood_match)
		149	chain_length >>= 2;
		150
		151	Assert(fstrstart <= fwindow_size - MIN_LOOKAHEAD, _T("insufficient lookahead"));
		152
		153	do
		154	{
		155	Assert(cur_match < fstrstart, _T("no future"));
		156	match = fwindow + cur_match;
		157
		158	// Skip to next match if the match length cannot increase or if the
		159	// match length is less than 2:
		160
		161	if (match[best_len] != scan_end
		162	\|\| match[best_len - 1] != scan_end1
		163	\|\| match != scan
		164	\|\| *++match != scan[1])
		165	continue;
		166
		167	// The check at best_len-1 can be removed because it will be made again
		168	// later. (This heuristic is not always a win.) It is not necessary to
		169	// compare scan[2] and match[2] since they are always equal when the
		170	// other bytes match, given that the hash keys are equal and that
		171	// HASH_BITS >= 8.
		172	scan += 2, match++;
		173
		174	// We check for insufficient lookahead only every 8th comparison; the
		175	// 256th check will be made at strstart+258.
		176	do
		177	{}
		178	while
		179	(
		180	++scan == ++match
		181	&& ++scan == ++match
		182	&& ++scan == ++match
		183	&& ++scan == ++match
		184	&& ++scan == ++match
		185	&& ++scan == ++match
		186	&& ++scan == ++match
		187	&& ++scan == ++match
		188	&& scan < strend
		189	);
		190
		191	Assert(scan <= fwindow + (unsigned)(fwindow_size - 1), _T("wild scan"));
		192
		193	len = MAX_MATCH - (int)(strend - scan);
		194
		195	scan = strend - MAX_MATCH;
		196
		197	if (len > best_len)
		198	{
		199	fmatch_start = cur_match;
		200	best_len = len;
		201
		202	if (len >= fnice_match)
		203	break;
		204
		205	scan_end1 = scan[best_len - 1];
		206
		207	scan_end = scan[best_len];
		208	}
		209	}
		210	while ((cur_match = fprev[cur_match & WMASK]) > limit && --chain_length != 0);
		211
		212	return best_len;
		213	}
		214
		215	#else
		216
		217	// The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
		218	// It is easy to get rid of this optimization if necessary.
		219	#if HASH_BITS < 8 \|\| MAX_MATCH != 258
		220
		221	error :
		222	Code too clever
		223	#endif
		224	// esp cur_match (D) +4 chain_length (D) +8 limit (D) +12 scan_end (W)
		225	int __fastcall ZipDflt::longest_match(unsigned cur_match)
		226	{
		227	#pragma warn - rvl
		228	#pragma argsused
		229	asm
		230	{
		231	add esp, -16
		232	mov dword ptr[esp], edx // cur_match
		233	mov edx, eax // pG
		234
		235	// unsigned chain_length = pG->max_chain_length; /* max hash chain length
		236	// ; EDX = pG
		237	mov eax, dword ptr[edx + .fmax_chain_length];
		238	mov dword ptr[esp + 4], eax
		239
		240	// register uch scan = pG->window + pG->strstart; / current string
		241	// register uch match; / matched string
		242	// register int len; /* length of current match
		243	mov eax, dword ptr[edx +.fstrstart];
		244	mov ecx, eax
		245	add ecx, edx
		246	add ecx, .fwindow;
		247
		248	// int best_len = pG->prev_length; /* best match length so far
		249	// ; ECX = scan, EDX = pG, EAX = @temp11
		250	mov edi, dword ptr[edx +.fprev_length];
		251
		252	// unsigned limit =
		253	// pG->strstart > (unsigned) MAX_DIST ? pG->strstart - (unsigned) MAX_DIST : NIL;
		254	// Stop when cur_match becomes <= limit. To simplify the code, we
		255	// prevent matches with the string of window index 0.
		256	// ; ECX = scan, EDX = pG, EDI = best_len, EAX = @temp11
		257	cmp eax, MAX_DIST;
		258	jbe short L2
		259	mov eax, dword ptr[edx +.fstrstart];
		260	sub eax, MAX_DIST;
		261	jmp short L3
		262	L2:
		263	xor eax, eax
		264	L3:
		265	mov dword ptr[esp + 8], eax
		266
		267	// register uch *strend = pG->window + pG->strstart + MAX_MATCH;
		268	// ; ECX = scan, EDX = pG, EDI = best_len
		269	// register uch scan_end1 = scan[best_len - 1];
		270	// register uch scan_end = scan[best_len];
		271	// ; ECX = scan, EDX = pG, EDI = best_len, EAX = strendXX
		272	mov bx, word ptr[ecx + edi - 1]
		273	mov word ptr[esp + 12], bx
		274
		275	// ; ECX = scan, EDX = pG, EDI = best_len, EAX =
		276	// Do not waste too much time if we already have a good match:
		277	// if (pG->prev_length >= pG->good_match)
		278	// chain_length >>= 2;
		279	mov ebx, dword ptr[edx +.fprev_length];
		280	cmp ebx, dword ptr[edx +.fgood_match];
		281	jb short L4
		282	shr dword ptr[esp + 4], 2 // chain_length
		283	L4:
		284	L5:
		285	// match = pG->window + cur_match;
		286	mov esi, dword ptr[esp] // cur_match
		287	add esi, edx
		288	add esi, .fwindow;
		289
		290	// Skip to next match if the match length cannot increase or if the
		291	// match length is less than 2:
		292	// if (match[best_len] != scan_end \|\|
		293	// match[best_len - 1] != scan_end1 \|\|
		294	// match != scan \|\| *++match != scan[1])
		295	// continue;
		296	// ; ECX = scan, ESI = match, EDX = pG, EDI = best_len, EAX = ??
		297	mov bx, word ptr[esi + edi - 1]
		298	cmp bx, word ptr[esp + 12] // scan_end
		299	jne L8
		300	mov bx, word ptr[esi]
		301	cmp bx, word ptr[ecx]
		302	jne short L8
		303
		304	// The check at best_len-1 can be removed because it will be made again
		305	// later. (This heuristic is not always a win.) It is not necessary to
		306	// compare scan[2] and match[2] since they are always equal when the
		307	// other bytes match, given that the hash keys are equal and that
		308	// HASH_BITS >= 8.
		309	// scan += 2, match++;
		310	// ; ECX = scan, EDX = pG, EDI = best_len, EAX = len
		311	mov eax, 2 // start 3rd byte
		312
		313	// We check for insufficient lookahead only every 8th comparison; the
		314	// 256th check will be made at strstart+258.
		315	// ; ECX = scan, ESI = match, EDX = pG, EDI = best_len, EAX = len
		316	L9:
		317	inc eax
		318	mov ebx, [ecx + eax]
		319	xor ebx, [esi + eax]
		320	test ebx, 0xFF // byte 0
		321	jnz short L10
		322	inc eax
		323	test ebx, 0xFF00 // byte 1
		324	jnz short L10
		325	inc eax
		326	test ebx, 0xFF0000 // byte 2
		327	jnz short L10
		328	inc eax
		329	test ebx, ebx // byte 3
		330	jnz short L10
		331	inc eax
		332	mov ebx, [ecx + eax]
		333	xor ebx, [esi + eax]
		334	test ebx, 0xFF // byte 0
		335	jnz short L10
		336	inc eax
		337	test ebx, 0xFF00 // byte 1
		338	jnz short L10
		339	inc eax
		340	test ebx, 0xFF0000 // byte 2
		341	jnz short L10
		342	inc eax
		343	test ebx, ebx // byte 3
		344	jnz short L10
		345
		346	cmp eax, MAX_MATCH
		347	jb short L9
		348
		349	L10:
		350	// ; ECX = scan, ESI = match, EDX = pG, EDI = best_len, EAX = len
		351	// free EBX , ESI = match
		352	// if (len > best_len)
		353	cmp eax, edi
		354	jl short L11
		355
		356	// pG->match_start = cur_match;
		357	// ; ECX = scan, EDX = pG, EAX = strend, EBX = len
		358	mov esi, dword ptr[esp]
		359	mov dword ptr[edx +.fmatch_start], esi
		360
		361	// best_len = len;
		362	mov edi, eax
		363
		364	// if (len >= pG->nice_match)
		365	// break;
		366	// ; ECX = scan, EDX = pG, EDI = best_len, EAX = len, EBX = ??
		367	cmp eax, dword ptr[edx +.fnice_match];
		368	jge short L14 // break
		369
		370	// scan_end1 = scan[best_len - 1];
		371	// ; EDI = best_len
		372	mov bx, word ptr[ecx + edi - 1]
		373	mov word ptr[esp + 12], bx // scan_end
		374
		375	L11:
		376	// while ((cur_match = pG->prev[cur_match & WMASK]) > limit && --chain_length != 0);
		377	L8:
		378	mov ebx, dword ptr[esp]
		379	and ebx, WMASK;
		380	movzx ebx, word ptr[edx + 2 * ebx +.fprev];
		381	mov dword ptr[esp], ebx // cur_match
		382	cmp ebx, dword ptr[esp + 8]
		383	jbe short L14
		384	dec dword ptr[esp + 4] // chain_length
		385	jne L5
		386	// ; EDI = best_len
		387
		388	L14:
		389	// return best_len;
		390	mov eax, edi
		391	add esp, 16
		392	};
		393	};
		394
		395	#endif
		396
		397

Subversion Repositories autosfx

autosfx/zmstr1900102/DLL/dz1900087src/Source/lngmtch.cpp – Rev 1