Subversion Repositories autosfx

Rev

Blame | Last modification | View Log | RSS feed

  1. #include "stdafx.h"
  2. #pragma hdrstop
  3. #if defined(_USE_ASM_) && (__BORLANDC__ < 0x0570)
  4. #pragma inline
  5. #endif
  6. #include "ZipDflt.h"
  7. #include "dz_errs.h"
  8.  
  9. #undef _DZ_FILE_
  10. #define _DZ_FILE_ DZ_LNGMTCH_CPP
  11.  
  12. /*
  13.   Lngmtch.cpp - zip 'longmatch' function
  14. * part of Deflate.c
  15.  * Copyright (C) 1990-1996 Mark Adler, Richard B. Wales, Jean-loup Gailly,
  16.  * Kai Uwe Rommel, Onno van der Linden and Igor Mandrichenko.
  17.  * This version modified by Chris Vleghert and Eric Engler for BCB/Delphi Zip.
  18.   ** distributed under LGPL license ** see license.txt for details
  19.  
  20.   Copyright (c) 1990-2007 Info-ZIP.  All rights reserved.
  21.  
  22.   See the accompanying file LICENSE, version 2007-Mar-4 or later
  23.   (the contents of which are also included in zip.h) for terms of use.
  24.   If, for some reason, all these files are missing, the Info-ZIP license
  25.   also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
  26.  
  27.   parts Copyright (C) 1997 Mike White, Eric W. Engler
  28. ************************************************************************
  29.  Copyright (C) 2009, 2010  by Russell J. Peters, Roger Aelbrecht
  30.  
  31.    This file is part of TZipMaster Version 1.9.
  32.  
  33.     TZipMaster is free software: you can redistribute it and/or modify
  34.     it under the terms of the GNU Lesser General Public License as published by
  35.     the Free Software Foundation, either version 3 of the License, or
  36.     (at your option) any later version.
  37.  
  38.     TZipMaster is distributed in the hope that it will be useful,
  39.     but WITHOUT ANY WARRANTY; without even the implied warranty of
  40.     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  41.     GNU Lesser General Public License for more details.
  42.  
  43.     You should have received a copy of the GNU Lesser General Public License
  44.     along with TZipMaster.  If not, see <http://www.gnu.org/licenses/>.
  45.  
  46.     contact: problems@delphizip.org (include ZipMaster in the subject).
  47.     updates: http://www.delphizip.org
  48.     DelphiZip maillist subscribe at http://www.freelists.org/list/delphizip
  49. ************************************************************************
  50.  
  51. /*  PURPOSE
  52.  *      Identify new text as repetitions of old text within a fixed-
  53.  *      length sliding window trailing behind the new text.
  54.  *
  55.  *  DISCUSSION
  56.  *      The "deflation" process depends on being able to identify portions
  57.  *      of the input text which are identical to earlier input (within a
  58.  *      sliding window trailing behind the input currently being processed).
  59.  *
  60.  *      The most straightforward technique turns out to be the fastest for
  61.  *      most input files: try all possible matches and select the longest.
  62.  *      The key feature of this algorithm is that insertions into the string
  63.  *      dictionary are very simple and thus fast, and deletions are avoided
  64.  *      completely. Insertions are performed at each input character, whereas
  65.  *      string matches are performed only when the previous match ends. So it
  66.  *      is preferable to spend more time in matches to allow very fast string
  67.  *      insertions and avoid deletions. The matching algorithm for small
  68.  *      strings is inspired from that of Rabin & Karp. A brute force approach
  69.  *      is used to find longer strings when a small match has been found.
  70.  *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
  71.  *      (by Leonid Broukhis).
  72.  *         A previous version of this file used a more sophisticated algorithm
  73.  *      (by Fiala and Greene) which is guaranteed to run in linear amortized
  74.  *      time, but has a larger average cost, uses more memory and is patented.
  75.  *      However the F&G algorithm may be faster for some highly redundant
  76.  *      files if the parameter max_chain_length (described below) is too large.
  77.  *
  78.  *  ACKNOWLEDGEMENTS
  79.  *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
  80.  *      I found it in 'freeze' written by Leonid Broukhis.
  81.  *      Thanks to many info-zippers for bug reports and testing.
  82.  *
  83.  *  REFERENCES
  84.  *      APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
  85.  *
  86.  *      A description of the Rabin and Karp algorithm is given in the book
  87.  *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
  88.  *
  89.  *      Fiala,E.R., and Greene,D.H.
  90.  *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
  91.  *
  92.  *  INTERFACE
  93.  *      void lm_init (int pack_level, ush *flags)
  94.  *          Initialize the "longest match" routines for a new file
  95.  *
  96.  *      ulg deflate (void)
  97.  *          Processes a new input file and return its compressed length. Sets
  98.  *          the compressed length, crc, deflate flags and internal file
  99.  *          attributes.
  100.  */
  101.  
  102.  
  103. #define NIL 0
  104.  
  105. // Tail of hash chains
  106. #define FAST  4
  107. #define SLOW  2
  108.  
  109. // speed options for the general purpose bit flag
  110. #define TOO_FAR 4096
  111.  
  112. // Matches of length 3 are discarded if their distance exceeds TOO_FAR
  113. // Local data used by the "longest match" routines.
  114. typedef unsigned  IPos;
  115.  
  116. // Set match_start to the longest match starting at the given string and
  117. //   return its length. Matches shorter or equal to prev_length are discarded,
  118. //   in which case the result is equal to prev_length and match_start is
  119. //   garbage. IN assertions: cur_match is the head of the hash chain for the
  120. //   current string (strstart) and its distance is <= MAX_DIST, and prev_length
  121. //   >= 1
  122. #ifndef _USE_ASM_
  123. int __fastcall ZipDflt::longest_match(unsigned cur_match)
  124. {
  125.     unsigned      chain_length = fmax_chain_length;  // max hash chain length
  126.     register uch  *scan = fwindow + fstrstart;    // current string
  127.     register uch  *match;                     // matched string
  128.     register int  len;                        // length of current match
  129.     int           best_len = fprev_length; // best match length so far
  130.     unsigned          limit = fstrstart > (unsigned) MAX_DIST ? fstrstart -
  131.                               (unsigned) MAX_DIST : NIL;
  132.  
  133.     // Stop when cur_match becomes <= limit. To simplify the code, we prevent
  134.     //   matches with the string of window index 0.
  135.     // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of
  136.     //   16. It is easy to get rid of this optimization if necessary.
  137. #if HASH_BITS < 8 || MAX_MATCH != 258
  138. error :
  139.     Code too clever
  140. #endif
  141.     register uch * strend = fwindow + fstrstart + MAX_MATCH;
  142.  
  143.     register uch  scan_end1 = scan[best_len - 1];
  144.     register uch  scan_end = scan[best_len];
  145.  
  146.     // Do not waste too much time if we already have a good match:
  147.  
  148.     if (fprev_length >= fgood_match)
  149.         chain_length >>= 2;
  150.  
  151.     Assert(fstrstart <= fwindow_size - MIN_LOOKAHEAD, _T("insufficient lookahead"));
  152.  
  153.     do
  154.     {
  155.         Assert(cur_match < fstrstart, _T("no future"));
  156.         match = fwindow + cur_match;
  157.  
  158.         // Skip to next match if the match length cannot increase or if the
  159.         //   match length is less than 2:
  160.  
  161.         if (match[best_len] != scan_end
  162.                 || match[best_len - 1] != scan_end1
  163.                 || *match != *scan
  164.                 || *++match != scan[1])
  165.             continue;
  166.  
  167.         // The check at best_len-1 can be removed because it will be made again
  168.         //   later. (This heuristic is not always a win.) It is not necessary to
  169.         //   compare scan[2] and match[2] since they are always equal when the
  170.         //   other bytes match, given that the hash keys are equal and that
  171.         //   HASH_BITS >= 8.
  172.         scan += 2, match++;
  173.  
  174.         // We check for insufficient lookahead only every 8th comparison; the
  175.         //   256th check will be made at strstart+258.
  176.         do
  177.             {}
  178.         while
  179.         (
  180.             *++scan == * ++match
  181.             && *++scan == * ++match
  182.             && *++scan == * ++match
  183.             && *++scan == * ++match
  184.             && *++scan == * ++match
  185.             && *++scan == * ++match
  186.             && *++scan == * ++match
  187.             && *++scan == * ++match
  188.             && scan < strend
  189.         );
  190.  
  191.         Assert(scan <= fwindow + (unsigned)(fwindow_size - 1), _T("wild scan"));
  192.  
  193.         len = MAX_MATCH - (int)(strend - scan);
  194.  
  195.         scan = strend - MAX_MATCH;
  196.  
  197.         if (len > best_len)
  198.         {
  199.             fmatch_start = cur_match;
  200.             best_len = len;
  201.  
  202.             if (len >= fnice_match)
  203.                 break;
  204.  
  205.             scan_end1 = scan[best_len - 1];
  206.  
  207.             scan_end = scan[best_len];
  208.         }
  209.     }
  210.     while ((cur_match = fprev[cur_match & WMASK]) > limit && --chain_length != 0);
  211.  
  212.     return best_len;
  213. }
  214.  
  215. #else
  216.  
  217. // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
  218. //   It is easy to get rid of this optimization if necessary.
  219. #if HASH_BITS < 8 || MAX_MATCH != 258
  220.  
  221. error :
  222. Code too clever
  223. #endif
  224. // esp cur_match (D) +4 chain_length (D) +8 limit (D) +12 scan_end (W)
  225. int __fastcall ZipDflt::longest_match(unsigned cur_match)
  226. {
  227. #pragma warn - rvl
  228. #pragma argsused
  229.     asm
  230.     {
  231.                 add esp, -16
  232.         mov dword ptr[esp], edx // cur_match
  233.         mov edx, eax  // pG
  234.  
  235.         // unsigned chain_length = pG->max_chain_length; /* max hash chain length
  236.         // ; EDX = pG
  237.         mov eax, dword ptr[edx + .fmax_chain_length];
  238.         mov dword         ptr[esp + 4], eax
  239.  
  240.         // register uch *scan = pG->window + pG->strstart; /* current string
  241.         // register uch *match; /* matched string
  242.         // register int len; /* length of current match
  243.         mov eax, dword ptr[edx +.fstrstart];
  244.         mov ecx, eax
  245.         add ecx, edx
  246.         add ecx, .fwindow;
  247.  
  248.         // int best_len = pG->prev_length; /* best match length so far
  249.         // ; ECX = scan, EDX = pG, EAX = @temp11
  250.         mov edi, dword ptr[edx +.fprev_length];
  251.  
  252.         // unsigned limit =
  253.         // pG->strstart > (unsigned) MAX_DIST ? pG->strstart - (unsigned) MAX_DIST : NIL;
  254.         // Stop when cur_match becomes <= limit. To simplify the code, we
  255.         //   prevent matches with the string of window index 0.
  256.         // ; ECX = scan, EDX = pG, EDI = best_len, EAX = @temp11
  257.         cmp eax, MAX_DIST;
  258.         jbe short L2
  259.         mov  eax, dword ptr[edx +.fstrstart];
  260.         sub  eax, MAX_DIST;
  261.                 jmp short L3
  262. L2:
  263.                 xor eax, eax
  264. L3:
  265.         mov dword ptr[esp + 8], eax
  266.  
  267.         // register uch *strend = pG->window + pG->strstart + MAX_MATCH;
  268.         // ; ECX = scan, EDX = pG, EDI = best_len
  269.         // register uch scan_end1 = scan[best_len - 1];
  270.         // register uch scan_end = scan[best_len];
  271.         // ; ECX = scan, EDX = pG, EDI = best_len, EAX = strendXX
  272.         mov bx, word ptr[ecx + edi - 1]
  273.         mov word ptr[esp + 12], bx
  274.  
  275.         // ; ECX = scan, EDX = pG, EDI = best_len, EAX =
  276.         // Do not waste too much time if we already have a good match:
  277.         // if (pG->prev_length >= pG->good_match)
  278.         // chain_length >>= 2;
  279.         mov ebx, dword ptr[edx +.fprev_length];
  280.         cmp ebx, dword ptr[edx +.fgood_match];
  281.         jb short L4
  282.                 shr dword ptr[esp + 4], 2   // chain_length
  283. L4:
  284. L5:
  285.         // match = pG->window + cur_match;
  286.         mov esi, dword ptr[esp]                 // cur_match
  287.         add esi, edx
  288.         add esi, .fwindow;
  289.  
  290.         // Skip to next match if the match length cannot increase or if the
  291.         //   match length is less than 2:
  292.         // if (match[best_len] != scan_end ||
  293.         // match[best_len - 1] != scan_end1 ||
  294.         // *match != *scan || *++match != scan[1])
  295.         // continue;
  296.         // ; ECX = scan, ESI = match, EDX = pG, EDI = best_len, EAX = ??
  297.         mov  bx, word ptr[esi + edi - 1]
  298.         cmp bx, word ptr[esp + 12]  // scan_end
  299.         jne L8
  300.         mov bx, word ptr[esi]
  301.         cmp bx, word ptr[ecx]
  302.         jne short L8
  303.  
  304.         // The check at best_len-1 can be removed because it will be made again
  305.         //   later. (This heuristic is not always a win.) It is not necessary to
  306.         //   compare scan[2] and match[2] since they are always equal when the
  307.         //   other bytes match, given that the hash keys are equal and that
  308.         //   HASH_BITS >= 8.
  309.         // scan += 2, match++;
  310.         // ; ECX = scan, EDX = pG, EDI = best_len, EAX = len
  311.         mov eax, 2  // start 3rd byte
  312.  
  313.         // We check for insufficient lookahead only every 8th comparison; the
  314.         //   256th check will be made at strstart+258.
  315.                 // ; ECX = scan, ESI = match, EDX = pG, EDI = best_len, EAX = len
  316. L9:
  317.         inc eax
  318.         mov ebx, [ecx + eax]
  319.         xor ebx, [esi + eax]
  320.         test ebx, 0xFF // byte 0
  321.         jnz short L10
  322.         inc eax
  323.         test ebx, 0xFF00    // byte 1
  324.         jnz short L10
  325.         inc eax
  326.         test ebx, 0xFF0000  // byte 2
  327.         jnz short L10
  328.         inc eax
  329.         test ebx, ebx       // byte 3
  330.         jnz short L10
  331.         inc eax
  332.         mov ebx, [ecx + eax]
  333.         xor ebx, [esi + eax]
  334.         test ebx, 0xFF  // byte 0
  335.         jnz short L10
  336.         inc eax
  337.         test ebx, 0xFF00    // byte 1
  338.         jnz short L10
  339.         inc eax
  340.         test ebx, 0xFF0000  // byte 2
  341.         jnz short L10
  342.         inc eax
  343.         test ebx, ebx       // byte 3
  344.                 jnz short L10
  345.  
  346.         cmp eax, MAX_MATCH
  347.         jb short L9
  348.  
  349. L10:
  350.         // ; ECX = scan, ESI = match, EDX = pG, EDI = best_len, EAX = len
  351.         // free EBX , ESI = match
  352.         // if (len > best_len)
  353.         cmp eax, edi
  354.         jl short L11
  355.  
  356.         // pG->match_start = cur_match;
  357.         // ; ECX = scan, EDX = pG, EAX = strend, EBX = len
  358.         mov esi, dword ptr[esp]
  359.         mov dword ptr[edx +.fmatch_start], esi
  360.  
  361.         // best_len = len;
  362.         mov edi, eax
  363.  
  364.         // if (len >= pG->nice_match)
  365.         // break;
  366.         // ; ECX = scan, EDX = pG, EDI = best_len, EAX = len, EBX = ??
  367.         cmp eax, dword ptr[edx +.fnice_match];
  368.         jge short L14 // break
  369.  
  370.         // scan_end1 = scan[best_len - 1];
  371.         // ; EDI = best_len
  372.         mov bx, word ptr[ecx + edi - 1]
  373.         mov word ptr[esp + 12], bx  // scan_end
  374.  
  375. L11:
  376.                 // while ((cur_match = pG->prev[cur_match & WMASK]) > limit && --chain_length != 0);
  377. L8:
  378.         mov ebx, dword ptr[esp]
  379.         and ebx, WMASK;
  380.         movzx ebx, word ptr[edx + 2 * ebx +.fprev];
  381.         mov dword ptr[esp], ebx // cur_match
  382.         cmp ebx, dword ptr[esp + 8]
  383.         jbe short L14
  384.         dec dword ptr[esp + 4]  // chain_length
  385.                 jne L5
  386.         // ; EDI = best_len
  387.  
  388. L14:
  389.         // return best_len;
  390.         mov eax, edi
  391.         add esp, 16
  392.     };
  393. };
  394.  
  395. #endif
  396.  
  397.  
  398.