Subversion Repositories autosfx

Rev

Blame | Last modification | View Log | RSS feed

  1. unit ZMMatch19;
  2.  
  3. (*
  4.   ZMMatch.pas - Wild filename matching
  5.     Copyright (C) 2009, 2010  by Russell J. Peters, Roger Aelbrecht,
  6.       Eric W. Engler and Chris Vleghert.
  7.  
  8.         This file is part of TZipMaster Version 1.9.
  9.  
  10.     TZipMaster is free software: you can redistribute it and/or modify
  11.     it under the terms of the GNU Lesser General Public License as published by
  12.     the Free Software Foundation, either version 3 of the License, or
  13.     (at your option) any later version.
  14.  
  15.     TZipMaster is distributed in the hope that it will be useful,
  16.     but WITHOUT ANY WARRANTY; without even the implied warranty of
  17.     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18.     GNU Lesser General Public License for more details.
  19.  
  20.     You should have received a copy of the GNU Lesser General Public License
  21.     along with TZipMaster.  If not, see <http://www.gnu.org/licenses/>.
  22.  
  23.     contact: problems@delphizip.org (include ZipMaster in the subject).
  24.     updates: http://www.delphizip.org
  25.     DelphiZip maillist subscribe at http://www.freelists.org/list/delphizip
  26.  
  27.   modified 2010-08-22
  28. ---------------------------------------------------------------------------*)
  29.  
  30. interface
  31.  
  32. uses
  33.   SysUtils, ZipMstr19;
  34.  
  35. //type
  36. //{$IFDEF UNICODE}
  37. //  TZMString = UnicodeString;
  38. //  TZMWideString = UnicodeString;
  39. //  TZMRawBytes = RawByteString;
  40. //{$ELSE}
  41. //  TZMString = AnsiString;
  42. //  TZMWideString = WideString;
  43. //  TZMRawBytes = AnsiString;
  44. //{$ENDIF}
  45.  
  46. {$IFDEF UNICODE}
  47. function FileNameMatch(const UPattern, USpec: TZMString): Boolean;
  48. function FileNameComp(const s1, s2: TZMString): Integer; overload;
  49. function FileNameComp(const s1, s2: TZMRawBytes): Integer; overload;
  50. {$ELSE}
  51. function FileNameMatch(const UPattern, USpec: TZMString; IsUTF8: Boolean): Boolean;
  52. function FileNameComp(const s1, s2: TZMString; IsUTF8: Boolean): Integer;
  53. {$ENDIF}
  54.  
  55.  
  56. implementation
  57.  
  58. uses
  59.   Windows, ZMUTF819;
  60.  
  61. type
  62.   TBounds = record
  63.     Start:  PWideChar;
  64.     Finish: PWideChar;
  65.   end;
  66.  
  67.   TParts = record
  68.     Main: TBounds;
  69.     Extn: TBounds;
  70.     MainLen: Integer;
  71.     ExtnLen: Integer;
  72.   end;
  73.  
  74. // return <0 _ match to *, 0 _ match to end, >0 _ no match
  75. function Wild(var Bp, Bs: TBounds): Integer;
  76. var
  77.   cp: Widechar;
  78.   cs: Widechar;
  79. begin
  80.   Result := -1;// matches so far
  81.   // handle matching characters before wild
  82.   while (Bs.Start <= Bs.Finish) and (Bp.Start <= Bp.Finish) do
  83.   begin
  84.     cp := Bp.Start^;
  85.     cs := Bs.Start^;
  86.     if cp <> cs then
  87.     begin
  88.       if cp = '*' then
  89.         break;     // matched to *
  90.       // would match anything except path sep
  91.       if (cp <> '?') or (cs = '\') then
  92.       begin
  93.         Result := 1;  // no match
  94.         Exit;
  95.       end;
  96.     end;
  97.     // they match
  98.     Inc(Bp.Start);
  99.     Inc(Bs.Start);
  100.   end;
  101.   // we have * or eos
  102.   if Bp.Start > Bp.Finish then
  103.   begin
  104.     if Bs.Start > Bs.Finish then
  105.       Result := 0;   // matched to end
  106.   end;
  107.   if Result < 0 then
  108.   begin
  109.     // handle matching characters from wild to end
  110.     while Bs.Start <= Bs.Finish do
  111.     begin
  112.       cp := Bp.Finish^;
  113.       cs := Bs.Finish^;
  114.       if cp <> cs then
  115.       begin
  116.         if cp = '*' then
  117.           break;
  118.           // must not match path sep
  119.         if (cp <> '?') or (cs = '\') then
  120.         begin
  121.           Result := 1;  // no match
  122.           break;
  123.         end;
  124.       end;
  125.       // they match
  126.       Dec(Bp.Finish);
  127.       Dec(Bs.Finish);
  128.     end;
  129.   end;
  130. end;
  131. function WildCmp(Bp, Bs: TBounds): Integer;
  132. var
  133.   bpt: TBounds;
  134.   bst: TBounds;
  135.   sm:  Integer;
  136.   pidx: PWideChar;
  137.   sidx: PWideChar;
  138. begin
  139.   // quick check for '*'
  140.   if (Bp.Start = Bp.Finish) and (Bp.Start <> nil) and (Bp.Start^ = '*') then
  141.   begin
  142.     Result := 0;  // matches any/none
  143.     exit;
  144.   end;
  145.   // no more Spec?
  146.   if Bs.Finish < Bs.Start then
  147.   begin
  148.     if Bp.Finish < Bp.Start then
  149.       Result := 0   // empty matches empty
  150.     else
  151.       Result := 3;  // no match
  152.     exit;
  153.   end;
  154.   // handle matching characters before wild
  155.   Result := Wild(Bp, Bs);      
  156.   if Result < 0 then
  157.   begin
  158.     pidx := Bp.Start;
  159.     sidx := Bs.Start;
  160.     if Bp.Start > Bp.Finish then
  161.     begin
  162.       if sidx <= Bs.Finish then
  163.         Result := 123
  164.       else
  165.         Result := 0;
  166.       exit;
  167.     end;
  168.     // handle wild
  169.     if (sidx <= Bs.Finish) and (pidx^ = '*') then
  170.     begin
  171.       // skip multiple *
  172.       while (pidx < Bp.Finish) and ((pidx + 1)^ = '*') and (pidx^ = '*') do
  173.         Inc(pidx);
  174.       // end of Pattern?
  175.       if pidx = Bp.Finish then
  176.         Result := 0  // match
  177.       else
  178.       begin
  179.         Inc(pidx);
  180.         bpt.Start  := pidx;
  181.         bpt.Finish := Bp.Finish;
  182.         bst.Start  := sidx;
  183.         bst.Finish := Bs.Finish;
  184.         while (bst.Start <= bst.Finish) do
  185.         begin
  186.           // recursively compare sub patterns
  187.           sm := WildCmp(bpt, bst);
  188.           if sm = 0 then
  189.           begin
  190.             Result := 0;  // match
  191.             break;
  192.           end;
  193.           Inc(bst.Start);
  194.         end;
  195.         if Result <> 0 then
  196.           Result := 1;  // no match
  197.       end;
  198.     end;
  199.     // end of Spec - Pattern must only have *
  200.     if Result < 0 then
  201.     begin
  202.       while (pidx <= Bp.Finish) and (pidx^ = '*') do
  203.         Inc(pidx);
  204.       if pidx > Bp.Finish then
  205.         Result := 0;  // matched
  206.     end;
  207.   end;
  208. end;
  209.  
  210.  
  211. // returned bit values
  212. const
  213.   MAIN = $01; // not empty
  214.   MAIN_WILDALL = $02; // is *
  215.   MAIN_HASWILD = $04;
  216.   EXTN         = $10;
  217.   EXTN_WILDALL = $20;
  218.   EXTN_HASWILD = $40;
  219.   HAD_DOT      = $08;
  220.  
  221. function Decompose(var idx: PWideChar; var parts: TParts): Integer;
  222. var
  223.   c: Widechar;
  224.   ExtnFinish: pwideChar;
  225.   ExtnStart: pwideChar;
  226.   MainFinish: pwideChar;
  227.   MainStart: pwideChar;
  228.   mwildall: Integer;
  229.   tmp: PWideChar;
  230.   xwildall: Integer;
  231. begin
  232.   Result := 0;
  233.   mwildall := 0;
  234.   xwildall := 0;
  235. //  parts.MainLen := 0;
  236.   parts.ExtnLen := 0;
  237.   ExtnStart := nil;
  238.   ExtnFinish := nil;
  239.   // at start of text or spec
  240.   MainStart := idx;    
  241.   MainFinish := nil; // keep compiler happy
  242.   while True do
  243.   begin
  244.     c := idx^;
  245.     case c of
  246.       '.':
  247.       if idx > MainStart then
  248.       begin
  249.         // we probably have extn
  250.         if ExtnStart <> nil then
  251.           Inc(mwildall, xwildall); // count all * in main
  252.         ExtnStart := idx+ 1;
  253.         xwildall := 0;
  254.       end;
  255.       '\', '/', ':':
  256.       begin            
  257.         if c = '/' then
  258.           idx^ := '\'; // normalise path seps
  259.         if ExtnStart <> nil then
  260.         begin
  261.           // was false start of extn
  262.           ExtnStart := nil;
  263.           Inc(mwildall, xwildall); // count all * in main
  264.           xwildall := 0;
  265.         end;
  266.       end;
  267.       ' ':
  268.       begin
  269.         // space can be embedded but cannot trail
  270.         tmp := idx;                                
  271.         Inc(idx);
  272.         while idx^ = ' ' do
  273.           Inc(idx);
  274.         if idx^ < ' ' then
  275.         begin
  276.           // terminate
  277.           MainFinish := tmp - 1;
  278.           Break;
  279.         end;
  280.         if idx^ = '|' then
  281.         begin
  282.           // terminate
  283.           MainFinish := tmp - 1;
  284.           Inc(idx);
  285.           Break;
  286.         end;
  287.         Continue;
  288.       end;
  289.       #0..#31:
  290.       begin
  291.         // control terminates
  292.         MainFinish := idx - 1;
  293.         Break;
  294.       end;
  295.       '|':
  296.       begin
  297.         // at the end
  298.         MainFinish := idx - 1;
  299.         Inc(idx);
  300.         break;
  301.       end;
  302.       '*':    
  303.       begin
  304.         if ExtnStart <> nil then
  305.           Inc(xwildall)
  306.         else
  307.           Inc(mwildall);
  308.       end;
  309.     end;
  310.     Inc(idx);
  311.   end;
  312.   // was there an extension?
  313.   if ExtnStart <> nil then
  314.   begin
  315.     Result := Result or HAD_DOT;
  316.     if ExtnStart <= MainFinish then
  317.     begin
  318.       // we have extn
  319.       ExtnFinish := MainFinish;
  320.       MainFinish := ExtnStart - 2;    
  321.       parts.Extnlen := 1 + (ExtnFinish - ExtnStart);
  322.       Result := Result or EXTN;
  323.       if  xwildall <> 0 then
  324.       begin
  325.         if xwildall = parts.Extnlen then
  326.           Result := Result or EXTN_WILDALL;
  327.         Result := Result or EXTN_HASWILD;
  328.       end;
  329.     end
  330.     else
  331.     begin
  332.       // dot but no extn
  333.       ExtnStart := nil;
  334.       Dec(MainFinish); // before dot
  335.     end;
  336.   end;
  337.  
  338.   parts.Mainlen := 1 + (MainFinish - MainStart);
  339.   if parts.Mainlen > 0 then
  340.   begin
  341.     Result := Result or MAIN;
  342.     if  mwildall <> 0 then
  343.     begin
  344.       if mwildall = parts.Mainlen then
  345.         Result := Result or MAIN_WILDALL;
  346.       Result := Result or MAIN_HASWILD;
  347.     end;
  348.   end;
  349.   // set resulting pointers
  350.   parts.Main.Start := MainStart;
  351.   parts.Main.Finish := MainFinish;
  352.   parts.Extn.Start := ExtnStart;
  353.   parts.Extn.Finish := ExtnFinish;
  354. end;
  355.  
  356. // only gets called to compare same length names
  357. function FileRCmp(var Bp, Bs: TBounds): Integer;
  358. var
  359.   cp: Widechar;
  360.   cs: Widechar;
  361. begin
  362.   Result := 1;  // no match
  363.   if (Bs.Start > Bs.Finish) then
  364.     exit;
  365.   if (Bp.Start^ <> Bs.Start^) and ((Bp.Start^ = '\') or (Bp.Start^ <> '?')) then
  366.     exit; // cannot match
  367.   Inc(Bs.Start);
  368.   Inc(Bp.Start);
  369.   while (Bs.Start <= Bs.Finish) and (Bp.Start <= Bp.Finish) do
  370.   begin
  371.     cp := Bp.Finish^;
  372.     cs := Bs.Finish^;
  373.     Dec(Bp.Finish);
  374.     Dec(Bs.Finish);
  375.     if cp <> cs then
  376.     begin
  377.       // must not match path sep
  378.       if (cp <> '?') or (cs = '\') then
  379.         Exit;   // no match
  380.     end;
  381.   end;
  382.   Result := 0;  // match
  383. end;
  384.  
  385. procedure ToUpperCase(var fspec: TZMWideString);
  386. {$IFNDEF UNICODE}
  387. var
  388.   pw: PWideChar;
  389.   wc: WideChar;
  390. {$ENDIF}
  391. begin
  392. {$IFDEF UNICODE}
  393.     CharUpperW(PWideChar(fspec));
  394. {$ELSE}
  395.   pw := PWideChar(fspec);
  396.   if Win32MajorVersion > 4 then
  397.     CharUpperW(pw)   // not implemented for earlier versions
  398.   else
  399.   begin
  400.     wc := pw^;
  401.     while wc <> #0 do
  402.     begin
  403.       if (wc <= 'z') and (wc >= 'a') then
  404.         pw^ := WideChar(Ord(wc) and $DF);
  405.       Inc(pw);
  406.       wc := pw^;
  407.     end;
  408.   end;
  409. {$ENDIF}
  410. end;
  411. (*
  412. function PUTF8ToWideStr(const raw: PAnsiChar; len: integer): TZMWideString;
  413. const
  414.   MB_ERR_INVALID_CHARS = $00000008; // error for invalid chars
  415. var
  416.   len: Integer;
  417.   wcnt: Integer;
  418.   flg: Cardinal;
  419. begin
  420.   Result := '';
  421.   len := Length(ustr);
  422.   if len = 0 then
  423.     exit;
  424. {$IFDEF UNICODE}
  425.     flg := MB_ERR_INVALID_CHARS;
  426. {$ELSE}
  427.   if Win32MajorVersion > 4 then
  428.     flg := MB_ERR_INVALID_CHARS
  429.   else
  430.     flg := 0;
  431. {$ENDIF}
  432.   SetLength(Result, len * 2); // plenty of room
  433.   wcnt := MultiByteToWideChar(CP_UTF8, flg, PAnsiChar(ustr), -1,
  434.             PWideChar(Result), len * 2);
  435.   if wcnt = 0 then    // try again assuming Ansi
  436.     wcnt := MultiByteToWideChar(0, flg, PAnsiChar(ustr), -1,
  437.             PWideChar(Result), len * 2);
  438.   if wcnt > 0 then
  439.     dec(wcnt);  // don't want end null
  440.   SetLength(Result, wcnt);
  441. end;
  442. *)
  443.  
  444. function UpperFileNameMatch(const Pattern, Spec: TZMWideString): Boolean;
  445. const
  446.   FULL_WILD = MAIN_WILDALL or EXTN_WILDALL;
  447. var
  448.   ch: WideChar;
  449.   pFlag: Integer;
  450.   pidx: PWideChar;
  451.   ptn: TParts;
  452.   sFlag: Integer;
  453.   sidx: PWideChar;
  454.   spc: TParts;
  455.   spc1: TParts;
  456.   SpecStt: PWideChar;
  457.   xres: Integer;
  458. begin
  459.   Result := False;
  460.   // check the spec if has extension
  461.   SpecStt := PWideChar(Spec);
  462.   sidx  := SpecStt;
  463.   while sidx^ <= ' ' do
  464.   begin
  465.     if sidx^ = #0 then
  466.       exit;
  467.     Inc(sidx);
  468.   end;
  469.   sFlag := Decompose(sidx, spc);
  470.   // now start processing each pattern
  471.   pidx := PWideChar(Pattern);
  472.   repeat
  473.     ch := pidx^;
  474.     // skip garbage or separator
  475.     while (ch <= ' ') or (ch = '|') do
  476.     begin
  477.       if ch = #0 then
  478.         exit;
  479.       Inc(pidx);
  480.       ch := pidx^;
  481.     end;    
  482.     pFlag := Decompose(pidx, ptn);
  483.     // work out what we must test
  484.     if ((pFlag and FULL_WILD) = FULL_WILD) or
  485.       ((pFlag and (FULL_WILD or EXTN or HAD_DOT)) = MAIN_WILDALL) then
  486.     begin
  487.       Result := True;
  488.       Break;
  489.     end;
  490.     if ((pFlag and (EXTN_HASWILD or EXTN)) = EXTN) and (spc.ExtnLen <> ptn.ExtnLen) then
  491.         Continue;   // cannot match
  492.     if ((pFlag and MAIN_HASWILD) = 0) and (spc.MainLen <> ptn.MainLen) then
  493.         Continue;   // cannot match
  494.     xres := -1;   // not tried to match
  495.     // make copy of spc
  496.     Move(spc, spc1, SizeOf(TParts));
  497.     if (pFlag and EXTN_WILDALL) <> 0 then
  498.       xres := 0   // ignore extn as matched
  499.     else
  500.     begin
  501.       // if pattern has extn, we must 'split' spec
  502.       if (pFlag and HAD_DOT) <> 0 then
  503.       begin
  504.         // check special cases
  505.         if (pFlag and EXTN) = 0 then
  506.         begin
  507.           // pattern ended in dot - spec must not have extn
  508.           if (sFlag and EXTN) <> 0 then
  509.              Continue;    // spec has extn - cannot match
  510.           xres := 0;  // no extn to check
  511.         end
  512.         else
  513.         begin
  514.           // spec must have extn
  515.           if (sFlag and EXTN) = 0 then
  516.             Continue;   // no spec extn - cannot match
  517.         end;
  518.       end
  519.       else
  520.       begin
  521.         // no Pattern dot _ test full spec
  522.         if ((sFlag and EXTN) <> 0) then
  523.           spc1.Main.Finish := spc.Extn.Finish; // full spec
  524.         xres := 0;  // only test spec
  525.       end;
  526.  
  527.       // test extn first (if required)
  528.       if xres < 0 then
  529.         xres := WildCmp(ptn.Extn, spc1.Extn);
  530.     end;
  531.     // if extn matched test main part
  532.     if xres = 0 then
  533.     begin
  534.       if (pFlag and MAIN_WILDALL) = 0 then
  535.         begin
  536.           if (pFlag and MAIN_HASWILD) <> 0 then
  537.             xres := WildCmp(ptn.Main, spc1.Main)
  538.           else
  539.             xres := FileRCmp(ptn.Main, spc1.Main);
  540.         end;
  541.     end;
  542.     // equate
  543.     Result := xres = 0;
  544.     // at next pattern
  545.   until Result;
  546. end;
  547.  
  548. {$IFDEF UNICODE}
  549. function FileNameMatch(const UPattern, USpec: TZMString): Boolean;
  550. var
  551.   Pattern: TZMWideString;
  552.   Spec: TZMWideString;
  553. begin
  554.   Result := False;
  555.   if (UPattern = '') <> (USpec = '') then
  556.     exit;
  557.   Pattern := AnsiUpperCase(UPattern);
  558.   Spec := AnsiUpperCase(USpec);
  559.   Result := UpperFileNameMatch(Pattern, Spec);
  560. end;
  561. {$ELSE}
  562. function FileNameMatch(const UPattern, USpec: TZMString; IsUTF8: Boolean): Boolean;
  563. var
  564.   Pattern: TZMWideString;
  565.   Spec: TZMWideString;
  566. begin
  567.   Result := False;
  568.   if (UPattern = '') <> (USpec = '') then
  569.     exit;
  570.   // convert to wide
  571.   if  IsUTF8 then
  572.   begin
  573.     Pattern := PUTF8ToWideStr(PAnsiChar(UPattern), Length(UPattern));
  574.     Spec := PUTF8ToWideStr(PAnsiChar(USpec), Length(USpec));
  575.   end
  576.   else
  577.   begin
  578.     Pattern := UPattern;
  579.     Spec := USpec;
  580.   end;
  581.   ToUpperCase(Pattern);
  582.   ToUpperCase(Spec);
  583.   Result := UpperFileNameMatch(Pattern, Spec);
  584. end;
  585. {$ENDIF}
  586.  
  587. function UpperFileNameComp(const ws1, ws2: TZMWideString): Integer;
  588. var
  589.   idx: Integer;
  590.   len: Integer;
  591.   len1: integer;
  592.   len2: integer;
  593.   wc1: WideChar;
  594.   wc2: WideChar;
  595. begin
  596.   Result := 0;
  597.   len1 := Length(ws1);
  598.   len2 := Length(ws2);
  599.   len := len1;
  600.   if Len2 < Len then
  601.     len1 := len2;
  602.   idx := 1;
  603.   // handle matching characters while they do
  604.   while idx <= Len do
  605.   begin
  606.     wc1 := ws1[idx];
  607.     if wc1 = '/' then
  608.       wc1 := '\';
  609.     wc2 := ws2[idx];
  610.     if wc2 = '/' then
  611.       wc2 := '\';
  612.     Result := Ord(wc1) - Ord(wc2);
  613.     if Result <> 0 then
  614.       Break;
  615.     // they match
  616.     Inc(idx);
  617.   end;
  618.   if Result = 0 then
  619.     Result := Len1 - Len2;
  620. end;
  621.  
  622.  
  623. {$IFDEF UNICODE}
  624. function FileNameComp(const s1, s2: TZMString): Integer;
  625. var
  626.   ws1: TZMWideString;
  627.   ws2: TZMWideString;
  628. begin
  629.   ws1 := AnsiUpperCase(s1);
  630.   ws2 := AnsiUpperCase(s2);
  631.   Result := UpperFileNameComp(ws1, ws2);
  632. end;
  633.  
  634. function FileNameComp(const s1, s2: TZMRawBytes): Integer; overload;
  635. var
  636.   ws1: TZMWideString;
  637.   ws2: TZMWideString;
  638. begin
  639.   ws1 := PUTF8ToWideStr(PAnsiChar(s1), Length(s1));
  640.   ws2 := PUTF8ToWideStr(PAnsiChar(s2), Length(s2));
  641.   ToUpperCase(ws1);
  642.   ToUpperCase(ws2);
  643.   Result := UpperFileNameComp(ws1, ws2);
  644. end;
  645. {$ELSE}
  646. function FileNameComp(const s1, s2: TZMString; IsUTF8: Boolean): Integer;
  647. var
  648.   ws1: TZMWideString;
  649.   ws2: TZMWideString;
  650. begin
  651.   // convert to wide
  652.   if  IsUTF8 then
  653.   begin
  654.     ws1 := PUTF8ToWideStr(PAnsiChar(s1), Length(s1));
  655.     ws2 := PUTF8ToWideStr(PAnsiChar(s2), Length(s2));
  656.   end
  657.   else
  658.   begin
  659.     ws1 := s1;
  660.     ws2 := s2;
  661.   end;
  662.   ToUpperCase(ws1);
  663.   ToUpperCase(ws2);
  664.   Result := UpperFileNameComp(ws1, ws2);
  665. end;
  666. {$ENDIF}
  667.  
  668. end.
  669.