Subversion Repositories autosfx

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1 daniel-mar 1
unit ZMMatch19;
2
 
3
(*
4
  ZMMatch.pas - Wild filename matching
5
    Copyright (C) 2009, 2010  by Russell J. Peters, Roger Aelbrecht,
6
      Eric W. Engler and Chris Vleghert.
7
 
8
        This file is part of TZipMaster Version 1.9.
9
 
10
    TZipMaster is free software: you can redistribute it and/or modify
11
    it under the terms of the GNU Lesser General Public License as published by
12
    the Free Software Foundation, either version 3 of the License, or
13
    (at your option) any later version.
14
 
15
    TZipMaster is distributed in the hope that it will be useful,
16
    but WITHOUT ANY WARRANTY; without even the implied warranty of
17
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
    GNU Lesser General Public License for more details.
19
 
20
    You should have received a copy of the GNU Lesser General Public License
21
    along with TZipMaster.  If not, see <http://www.gnu.org/licenses/>.
22
 
23
    contact: problems@delphizip.org (include ZipMaster in the subject).
24
    updates: http://www.delphizip.org
25
    DelphiZip maillist subscribe at http://www.freelists.org/list/delphizip
26
 
27
  modified 2010-08-22
28
---------------------------------------------------------------------------*)
29
 
30
interface
31
 
32
uses
33
  SysUtils, ZipMstr19;
34
 
35
//type
36
//{$IFDEF UNICODE}
37
//  TZMString = UnicodeString;
38
//  TZMWideString = UnicodeString;
39
//  TZMRawBytes = RawByteString;
40
//{$ELSE}
41
//  TZMString = AnsiString;
42
//  TZMWideString = WideString;
43
//  TZMRawBytes = AnsiString;
44
//{$ENDIF}
45
 
46
{$IFDEF UNICODE}
47
function FileNameMatch(const UPattern, USpec: TZMString): Boolean;
48
function FileNameComp(const s1, s2: TZMString): Integer; overload;
49
function FileNameComp(const s1, s2: TZMRawBytes): Integer; overload;
50
{$ELSE}
51
function FileNameMatch(const UPattern, USpec: TZMString; IsUTF8: Boolean): Boolean;
52
function FileNameComp(const s1, s2: TZMString; IsUTF8: Boolean): Integer;
53
{$ENDIF}
54
 
55
 
56
implementation
57
 
58
uses
59
  Windows, ZMUTF819;
60
 
61
type
62
  TBounds = record
63
    Start:  PWideChar;
64
    Finish: PWideChar;
65
  end;
66
 
67
  TParts = record
68
    Main: TBounds;
69
    Extn: TBounds;
70
    MainLen: Integer;
71
    ExtnLen: Integer;
72
  end;
73
 
74
// return <0 _ match to *, 0 _ match to end, >0 _ no match
75
function Wild(var Bp, Bs: TBounds): Integer;
76
var
77
  cp: Widechar;
78
  cs: Widechar;
79
begin
80
  Result := -1;// matches so far
81
  // handle matching characters before wild
82
  while (Bs.Start <= Bs.Finish) and (Bp.Start <= Bp.Finish) do
83
  begin
84
    cp := Bp.Start^;
85
    cs := Bs.Start^;
86
    if cp <> cs then
87
    begin
88
      if cp = '*' then
89
        break;     // matched to *
90
      // would match anything except path sep
91
      if (cp <> '?') or (cs = '\') then
92
      begin
93
        Result := 1;  // no match
94
        Exit;
95
      end;
96
    end;
97
    // they match
98
    Inc(Bp.Start);
99
    Inc(Bs.Start);
100
  end;
101
  // we have * or eos
102
  if Bp.Start > Bp.Finish then
103
  begin
104
    if Bs.Start > Bs.Finish then
105
      Result := 0;   // matched to end
106
  end;
107
  if Result < 0 then
108
  begin
109
    // handle matching characters from wild to end
110
    while Bs.Start <= Bs.Finish do
111
    begin
112
      cp := Bp.Finish^;
113
      cs := Bs.Finish^;
114
      if cp <> cs then
115
      begin
116
        if cp = '*' then
117
          break;
118
          // must not match path sep
119
        if (cp <> '?') or (cs = '\') then
120
        begin
121
          Result := 1;  // no match
122
          break;
123
        end;
124
      end;
125
      // they match
126
      Dec(Bp.Finish);
127
      Dec(Bs.Finish);
128
    end;
129
  end;
130
end;
131
function WildCmp(Bp, Bs: TBounds): Integer;
132
var
133
  bpt: TBounds;
134
  bst: TBounds;
135
  sm:  Integer;
136
  pidx: PWideChar;
137
  sidx: PWideChar;
138
begin
139
  // quick check for '*'
140
  if (Bp.Start = Bp.Finish) and (Bp.Start <> nil) and (Bp.Start^ = '*') then
141
  begin
142
    Result := 0;  // matches any/none
143
    exit;
144
  end;
145
  // no more Spec?
146
  if Bs.Finish < Bs.Start then
147
  begin
148
    if Bp.Finish < Bp.Start then
149
      Result := 0   // empty matches empty
150
    else
151
      Result := 3;  // no match
152
    exit;
153
  end;
154
  // handle matching characters before wild
155
  Result := Wild(Bp, Bs);      
156
  if Result < 0 then
157
  begin
158
    pidx := Bp.Start;
159
    sidx := Bs.Start;
160
    if Bp.Start > Bp.Finish then
161
    begin
162
      if sidx <= Bs.Finish then
163
        Result := 123
164
      else
165
        Result := 0;
166
      exit;
167
    end;
168
    // handle wild
169
    if (sidx <= Bs.Finish) and (pidx^ = '*') then
170
    begin
171
      // skip multiple *
172
      while (pidx < Bp.Finish) and ((pidx + 1)^ = '*') and (pidx^ = '*') do
173
        Inc(pidx);
174
      // end of Pattern?
175
      if pidx = Bp.Finish then
176
        Result := 0  // match
177
      else
178
      begin
179
        Inc(pidx);
180
        bpt.Start  := pidx;
181
        bpt.Finish := Bp.Finish;
182
        bst.Start  := sidx;
183
        bst.Finish := Bs.Finish;
184
        while (bst.Start <= bst.Finish) do
185
        begin
186
          // recursively compare sub patterns
187
          sm := WildCmp(bpt, bst);
188
          if sm = 0 then
189
          begin
190
            Result := 0;  // match
191
            break;
192
          end;
193
          Inc(bst.Start);
194
        end;
195
        if Result <> 0 then
196
          Result := 1;  // no match
197
      end;
198
    end;
199
    // end of Spec - Pattern must only have *
200
    if Result < 0 then
201
    begin
202
      while (pidx <= Bp.Finish) and (pidx^ = '*') do
203
        Inc(pidx);
204
      if pidx > Bp.Finish then
205
        Result := 0;  // matched
206
    end;
207
  end;
208
end;
209
 
210
 
211
// returned bit values
212
const
213
  MAIN = $01; // not empty
214
  MAIN_WILDALL = $02; // is *
215
  MAIN_HASWILD = $04;
216
  EXTN         = $10;
217
  EXTN_WILDALL = $20;
218
  EXTN_HASWILD = $40;
219
  HAD_DOT      = $08;
220
 
221
function Decompose(var idx: PWideChar; var parts: TParts): Integer;
222
var
223
  c: Widechar;
224
  ExtnFinish: pwideChar;
225
  ExtnStart: pwideChar;
226
  MainFinish: pwideChar;
227
  MainStart: pwideChar;
228
  mwildall: Integer;
229
  tmp: PWideChar;
230
  xwildall: Integer;
231
begin
232
  Result := 0;
233
  mwildall := 0;
234
  xwildall := 0;
235
//  parts.MainLen := 0;
236
  parts.ExtnLen := 0;
237
  ExtnStart := nil;
238
  ExtnFinish := nil;
239
  // at start of text or spec
240
  MainStart := idx;    
241
  MainFinish := nil; // keep compiler happy
242
  while True do
243
  begin
244
    c := idx^;
245
    case c of
246
      '.':
247
      if idx > MainStart then
248
      begin
249
        // we probably have extn
250
        if ExtnStart <> nil then
251
          Inc(mwildall, xwildall); // count all * in main
252
        ExtnStart := idx+ 1;
253
        xwildall := 0;
254
      end;
255
      '\', '/', ':':
256
      begin            
257
        if c = '/' then
258
          idx^ := '\'; // normalise path seps
259
        if ExtnStart <> nil then
260
        begin
261
          // was false start of extn
262
          ExtnStart := nil;
263
          Inc(mwildall, xwildall); // count all * in main
264
          xwildall := 0;
265
        end;
266
      end;
267
      ' ':
268
      begin
269
        // space can be embedded but cannot trail
270
        tmp := idx;                                
271
        Inc(idx);
272
        while idx^ = ' ' do
273
          Inc(idx);
274
        if idx^ < ' ' then
275
        begin
276
          // terminate
277
          MainFinish := tmp - 1;
278
          Break;
279
        end;
280
        if idx^ = '|' then
281
        begin
282
          // terminate
283
          MainFinish := tmp - 1;
284
          Inc(idx);
285
          Break;
286
        end;
287
        Continue;
288
      end;
289
      #0..#31:
290
      begin
291
        // control terminates
292
        MainFinish := idx - 1;
293
        Break;
294
      end;
295
      '|':
296
      begin
297
        // at the end
298
        MainFinish := idx - 1;
299
        Inc(idx);
300
        break;
301
      end;
302
      '*':    
303
      begin
304
        if ExtnStart <> nil then
305
          Inc(xwildall)
306
        else
307
          Inc(mwildall);
308
      end;
309
    end;
310
    Inc(idx);
311
  end;
312
  // was there an extension?
313
  if ExtnStart <> nil then
314
  begin
315
    Result := Result or HAD_DOT;
316
    if ExtnStart <= MainFinish then
317
    begin
318
      // we have extn
319
      ExtnFinish := MainFinish;
320
      MainFinish := ExtnStart - 2;    
321
      parts.Extnlen := 1 + (ExtnFinish - ExtnStart);
322
      Result := Result or EXTN;
323
      if  xwildall <> 0 then
324
      begin
325
        if xwildall = parts.Extnlen then
326
          Result := Result or EXTN_WILDALL;
327
        Result := Result or EXTN_HASWILD;
328
      end;
329
    end
330
    else
331
    begin
332
      // dot but no extn
333
      ExtnStart := nil;
334
      Dec(MainFinish); // before dot
335
    end;
336
  end;
337
 
338
  parts.Mainlen := 1 + (MainFinish - MainStart);
339
  if parts.Mainlen > 0 then
340
  begin
341
    Result := Result or MAIN;
342
    if  mwildall <> 0 then
343
    begin
344
      if mwildall = parts.Mainlen then
345
        Result := Result or MAIN_WILDALL;
346
      Result := Result or MAIN_HASWILD;
347
    end;
348
  end;
349
  // set resulting pointers
350
  parts.Main.Start := MainStart;
351
  parts.Main.Finish := MainFinish;
352
  parts.Extn.Start := ExtnStart;
353
  parts.Extn.Finish := ExtnFinish;
354
end;
355
 
356
// only gets called to compare same length names
357
function FileRCmp(var Bp, Bs: TBounds): Integer;
358
var
359
  cp: Widechar;
360
  cs: Widechar;
361
begin
362
  Result := 1;  // no match
363
  if (Bs.Start > Bs.Finish) then
364
    exit;
365
  if (Bp.Start^ <> Bs.Start^) and ((Bp.Start^ = '\') or (Bp.Start^ <> '?')) then
366
    exit; // cannot match
367
  Inc(Bs.Start);
368
  Inc(Bp.Start);
369
  while (Bs.Start <= Bs.Finish) and (Bp.Start <= Bp.Finish) do
370
  begin
371
    cp := Bp.Finish^;
372
    cs := Bs.Finish^;
373
    Dec(Bp.Finish);
374
    Dec(Bs.Finish);
375
    if cp <> cs then
376
    begin
377
      // must not match path sep
378
      if (cp <> '?') or (cs = '\') then
379
        Exit;   // no match
380
    end;
381
  end;
382
  Result := 0;  // match
383
end;
384
 
385
procedure ToUpperCase(var fspec: TZMWideString);
386
{$IFNDEF UNICODE}
387
var
388
  pw: PWideChar;
389
  wc: WideChar;
390
{$ENDIF}
391
begin
392
{$IFDEF UNICODE}
393
    CharUpperW(PWideChar(fspec));
394
{$ELSE}
395
  pw := PWideChar(fspec);
396
  if Win32MajorVersion > 4 then
397
    CharUpperW(pw)   // not implemented for earlier versions
398
  else
399
  begin
400
    wc := pw^;
401
    while wc <> #0 do
402
    begin
403
      if (wc <= 'z') and (wc >= 'a') then
404
        pw^ := WideChar(Ord(wc) and $DF);
405
      Inc(pw);
406
      wc := pw^;
407
    end;
408
  end;
409
{$ENDIF}
410
end;
411
(*
412
function PUTF8ToWideStr(const raw: PAnsiChar; len: integer): TZMWideString;
413
const
414
  MB_ERR_INVALID_CHARS = $00000008; // error for invalid chars
415
var
416
  len: Integer;
417
  wcnt: Integer;
418
  flg: Cardinal;
419
begin
420
  Result := '';
421
  len := Length(ustr);
422
  if len = 0 then
423
    exit;
424
{$IFDEF UNICODE}
425
    flg := MB_ERR_INVALID_CHARS;
426
{$ELSE}
427
  if Win32MajorVersion > 4 then
428
    flg := MB_ERR_INVALID_CHARS
429
  else
430
    flg := 0;
431
{$ENDIF}
432
  SetLength(Result, len * 2); // plenty of room
433
  wcnt := MultiByteToWideChar(CP_UTF8, flg, PAnsiChar(ustr), -1,
434
            PWideChar(Result), len * 2);
435
  if wcnt = 0 then    // try again assuming Ansi
436
    wcnt := MultiByteToWideChar(0, flg, PAnsiChar(ustr), -1,
437
            PWideChar(Result), len * 2);
438
  if wcnt > 0 then
439
    dec(wcnt);  // don't want end null
440
  SetLength(Result, wcnt);
441
end;
442
*)
443
 
444
function UpperFileNameMatch(const Pattern, Spec: TZMWideString): Boolean;
445
const
446
  FULL_WILD = MAIN_WILDALL or EXTN_WILDALL;
447
var
448
  ch: WideChar;
449
  pFlag: Integer;
450
  pidx: PWideChar;
451
  ptn: TParts;
452
  sFlag: Integer;
453
  sidx: PWideChar;
454
  spc: TParts;
455
  spc1: TParts;
456
  SpecStt: PWideChar;
457
  xres: Integer;
458
begin
459
  Result := False;
460
  // check the spec if has extension
461
  SpecStt := PWideChar(Spec);
462
  sidx  := SpecStt;
463
  while sidx^ <= ' ' do
464
  begin
465
    if sidx^ = #0 then
466
      exit;
467
    Inc(sidx);
468
  end;
469
  sFlag := Decompose(sidx, spc);
470
  // now start processing each pattern
471
  pidx := PWideChar(Pattern);
472
  repeat
473
    ch := pidx^;
474
    // skip garbage or separator
475
    while (ch <= ' ') or (ch = '|') do
476
    begin
477
      if ch = #0 then
478
        exit;
479
      Inc(pidx);
480
      ch := pidx^;
481
    end;    
482
    pFlag := Decompose(pidx, ptn);
483
    // work out what we must test
484
    if ((pFlag and FULL_WILD) = FULL_WILD) or
485
      ((pFlag and (FULL_WILD or EXTN or HAD_DOT)) = MAIN_WILDALL) then
486
    begin
487
      Result := True;
488
      Break;
489
    end;
490
    if ((pFlag and (EXTN_HASWILD or EXTN)) = EXTN) and (spc.ExtnLen <> ptn.ExtnLen) then
491
        Continue;   // cannot match
492
    if ((pFlag and MAIN_HASWILD) = 0) and (spc.MainLen <> ptn.MainLen) then
493
        Continue;   // cannot match
494
    xres := -1;   // not tried to match
495
    // make copy of spc
496
    Move(spc, spc1, SizeOf(TParts));
497
    if (pFlag and EXTN_WILDALL) <> 0 then
498
      xres := 0   // ignore extn as matched
499
    else
500
    begin
501
      // if pattern has extn, we must 'split' spec
502
      if (pFlag and HAD_DOT) <> 0 then
503
      begin
504
        // check special cases
505
        if (pFlag and EXTN) = 0 then
506
        begin
507
          // pattern ended in dot - spec must not have extn
508
          if (sFlag and EXTN) <> 0 then
509
             Continue;    // spec has extn - cannot match
510
          xres := 0;  // no extn to check
511
        end
512
        else
513
        begin
514
          // spec must have extn
515
          if (sFlag and EXTN) = 0 then
516
            Continue;   // no spec extn - cannot match
517
        end;
518
      end
519
      else
520
      begin
521
        // no Pattern dot _ test full spec
522
        if ((sFlag and EXTN) <> 0) then
523
          spc1.Main.Finish := spc.Extn.Finish; // full spec
524
        xres := 0;  // only test spec
525
      end;
526
 
527
      // test extn first (if required)
528
      if xres < 0 then
529
        xres := WildCmp(ptn.Extn, spc1.Extn);
530
    end;
531
    // if extn matched test main part
532
    if xres = 0 then
533
    begin
534
      if (pFlag and MAIN_WILDALL) = 0 then
535
        begin
536
          if (pFlag and MAIN_HASWILD) <> 0 then
537
            xres := WildCmp(ptn.Main, spc1.Main)
538
          else
539
            xres := FileRCmp(ptn.Main, spc1.Main);
540
        end;
541
    end;
542
    // equate
543
    Result := xres = 0;
544
    // at next pattern
545
  until Result;
546
end;
547
 
548
{$IFDEF UNICODE}
549
function FileNameMatch(const UPattern, USpec: TZMString): Boolean;
550
var
551
  Pattern: TZMWideString;
552
  Spec: TZMWideString;
553
begin
554
  Result := False;
555
  if (UPattern = '') <> (USpec = '') then
556
    exit;
557
  Pattern := AnsiUpperCase(UPattern);
558
  Spec := AnsiUpperCase(USpec);
559
  Result := UpperFileNameMatch(Pattern, Spec);
560
end;
561
{$ELSE}
562
function FileNameMatch(const UPattern, USpec: TZMString; IsUTF8: Boolean): Boolean;
563
var
564
  Pattern: TZMWideString;
565
  Spec: TZMWideString;
566
begin
567
  Result := False;
568
  if (UPattern = '') <> (USpec = '') then
569
    exit;
570
  // convert to wide
571
  if  IsUTF8 then
572
  begin
573
    Pattern := PUTF8ToWideStr(PAnsiChar(UPattern), Length(UPattern));
574
    Spec := PUTF8ToWideStr(PAnsiChar(USpec), Length(USpec));
575
  end
576
  else
577
  begin
578
    Pattern := UPattern;
579
    Spec := USpec;
580
  end;
581
  ToUpperCase(Pattern);
582
  ToUpperCase(Spec);
583
  Result := UpperFileNameMatch(Pattern, Spec);
584
end;
585
{$ENDIF}
586
 
587
function UpperFileNameComp(const ws1, ws2: TZMWideString): Integer;
588
var
589
  idx: Integer;
590
  len: Integer;
591
  len1: integer;
592
  len2: integer;
593
  wc1: WideChar;
594
  wc2: WideChar;
595
begin
596
  Result := 0;
597
  len1 := Length(ws1);
598
  len2 := Length(ws2);
599
  len := len1;
600
  if Len2 < Len then
601
    len1 := len2;
602
  idx := 1;
603
  // handle matching characters while they do
604
  while idx <= Len do
605
  begin
606
    wc1 := ws1[idx];
607
    if wc1 = '/' then
608
      wc1 := '\';
609
    wc2 := ws2[idx];
610
    if wc2 = '/' then
611
      wc2 := '\';
612
    Result := Ord(wc1) - Ord(wc2);
613
    if Result <> 0 then
614
      Break;
615
    // they match
616
    Inc(idx);
617
  end;
618
  if Result = 0 then
619
    Result := Len1 - Len2;
620
end;
621
 
622
 
623
{$IFDEF UNICODE}
624
function FileNameComp(const s1, s2: TZMString): Integer;
625
var
626
  ws1: TZMWideString;
627
  ws2: TZMWideString;
628
begin
629
  ws1 := AnsiUpperCase(s1);
630
  ws2 := AnsiUpperCase(s2);
631
  Result := UpperFileNameComp(ws1, ws2);
632
end;
633
 
634
function FileNameComp(const s1, s2: TZMRawBytes): Integer; overload;
635
var
636
  ws1: TZMWideString;
637
  ws2: TZMWideString;
638
begin
639
  ws1 := PUTF8ToWideStr(PAnsiChar(s1), Length(s1));
640
  ws2 := PUTF8ToWideStr(PAnsiChar(s2), Length(s2));
641
  ToUpperCase(ws1);
642
  ToUpperCase(ws2);
643
  Result := UpperFileNameComp(ws1, ws2);
644
end;
645
{$ELSE}
646
function FileNameComp(const s1, s2: TZMString; IsUTF8: Boolean): Integer;
647
var
648
  ws1: TZMWideString;
649
  ws2: TZMWideString;
650
begin
651
  // convert to wide
652
  if  IsUTF8 then
653
  begin
654
    ws1 := PUTF8ToWideStr(PAnsiChar(s1), Length(s1));
655
    ws2 := PUTF8ToWideStr(PAnsiChar(s2), Length(s2));
656
  end
657
  else
658
  begin
659
    ws1 := s1;
660
    ws2 := s2;
661
  end;
662
  ToUpperCase(ws1);
663
  ToUpperCase(ws2);
664
  Result := UpperFileNameComp(ws1, ws2);
665
end;
666
{$ENDIF}
667
 
668
end.