443 lines
16 KiB
C
443 lines
16 KiB
C
|
/*
|
||
|
Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
|
||
|
|
||
|
See the accompanying file LICENSE, version 2000-Apr-09 or later
|
||
|
(the contents of which are also included in unzip.h) for terms of use.
|
||
|
If, for some reason, all these files are missing, the Info-ZIP license
|
||
|
also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
|
||
|
*/
|
||
|
/*---------------------------------------------------------------------------
|
||
|
|
||
|
match.c
|
||
|
|
||
|
The match() routine recursively compares a string to a "pattern" (regular
|
||
|
expression), returning TRUE if a match is found or FALSE if not. This
|
||
|
version is specifically for use with unzip.c: as did the previous match()
|
||
|
routines from SEA and J. Kercheval, it leaves the case (upper, lower, or
|
||
|
mixed) of the string alone, but converts any uppercase characters in the
|
||
|
pattern to lowercase if indicated by the global var pInfo->lcflag (which
|
||
|
is to say, string is assumed to have been converted to lowercase already,
|
||
|
if such was necessary).
|
||
|
|
||
|
GRR: reversed order of text, pattern in matche() (now same as match());
|
||
|
added ignore_case/ic flags, Case() macro.
|
||
|
|
||
|
PaulK: replaced matche() with recmatch() from Zip, modified to have an
|
||
|
ignore_case argument; replaced test frame with simpler one.
|
||
|
|
||
|
---------------------------------------------------------------------------
|
||
|
|
||
|
Copyright on recmatch() from Zip's util.c (although recmatch() was almost
|
||
|
certainly written by Mark Adler...ask me how I can tell :-) ):
|
||
|
|
||
|
Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
|
||
|
Kai Uwe Rommel and Igor Mandrichenko.
|
||
|
|
||
|
Permission is granted to any individual or institution to use, copy,
|
||
|
or redistribute this software so long as all of the original files are
|
||
|
included unmodified, that it is not sold for profit, and that this copy-
|
||
|
right notice is retained.
|
||
|
|
||
|
---------------------------------------------------------------------------
|
||
|
|
||
|
Match the pattern (wildcard) against the string (fixed):
|
||
|
|
||
|
match(string, pattern, ignore_case, sepc);
|
||
|
|
||
|
returns TRUE if string matches pattern, FALSE otherwise. In the pattern:
|
||
|
|
||
|
`*' matches any sequence of characters (zero or more)
|
||
|
`?' matches any single character
|
||
|
[SET] matches any character in the specified set,
|
||
|
[!SET] or [^SET] matches any character not in the specified set.
|
||
|
|
||
|
A set is composed of characters or ranges; a range looks like ``character
|
||
|
hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
|
||
|
characters allowed in the [..] pattern construct. Other characters are
|
||
|
allowed (i.e., 8-bit characters) if your system will support them.
|
||
|
|
||
|
To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
|
||
|
side or outside a [..] construct, and match the character exactly, precede
|
||
|
it with a ``\'' (backslash).
|
||
|
|
||
|
Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is
|
||
|
defined. See the DOSWILD section below for an explanation. Note also
|
||
|
that with VMSWILD defined, '%' is used instead of '?', and sets (ranges)
|
||
|
are delimited by () instead of [].
|
||
|
|
||
|
---------------------------------------------------------------------------*/
|
||
|
|
||
|
|
||
|
#define __MATCH_C /* identifies this source module */
|
||
|
|
||
|
/* define ToLower() in here (for Unix, define ToLower to be macro (using
|
||
|
* isupper()); otherwise just use tolower() */
|
||
|
#define UNZIP_INTERNAL
|
||
|
#include "unzip.h"
|
||
|
|
||
|
#ifndef THEOS /* the Theos port defines its own variant of match() */
|
||
|
|
||
|
#if 0 /* this is not useful until it matches Amiga names insensitively */
|
||
|
#ifdef AMIGA /* some other platforms might also want to use this */
|
||
|
# define ANSI_CHARSET /* MOVE INTO UNZIP.H EVENTUALLY */
|
||
|
#endif
|
||
|
#endif /* 0 */
|
||
|
|
||
|
#ifdef ANSI_CHARSET
|
||
|
# ifdef ToLower
|
||
|
# undef ToLower
|
||
|
# endif
|
||
|
/* uppercase letters are values 41 thru 5A, C0 thru D6, and D8 thru DE */
|
||
|
# define IsUpper(c) (c>=0xC0 ? c<=0xDE && c!=0xD7 : c>=0x41 && c<=0x5A)
|
||
|
# define ToLower(c) (IsUpper((uch) c) ? (unsigned) c | 0x20 : (unsigned) c)
|
||
|
#endif
|
||
|
#define Case(x) (ic? ToLower(x) : (x))
|
||
|
|
||
|
#ifdef VMSWILD
|
||
|
# define WILDCHAR '%'
|
||
|
# define BEG_RANGE '('
|
||
|
# define END_RANGE ')'
|
||
|
#else
|
||
|
# define WILDCHAR '?'
|
||
|
# define BEG_RANGE '['
|
||
|
# define END_RANGE ']'
|
||
|
#endif
|
||
|
|
||
|
#if 0 /* GRR: add this to unzip.h someday... */
|
||
|
#if !(defined(MSDOS) && defined(DOSWILD))
|
||
|
#ifdef WILD_STOP_AT_DIR
|
||
|
#define match(s,p,ic,sc) (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic,sc) == 1)
|
||
|
#else
|
||
|
#define match(s,p,ic) (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic) == 1)
|
||
|
#endif
|
||
|
int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
|
||
|
int ignore_case __WDLPRO));
|
||
|
#endif
|
||
|
#endif /* 0 */
|
||
|
static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
|
||
|
int ignore_case __WDLPRO));
|
||
|
static char *isshexp OF((ZCONST char *p));
|
||
|
static int namecmp OF((ZCONST char *s1, ZCONST char *s2));
|
||
|
|
||
|
|
||
|
/* match() is a shell to recmatch() to return only Boolean values. */
|
||
|
|
||
|
int match(string, pattern, ignore_case __WDL)
|
||
|
ZCONST char *string, *pattern;
|
||
|
int ignore_case;
|
||
|
__WDLDEF
|
||
|
{
|
||
|
#if (defined(MSDOS) && defined(DOSWILD))
|
||
|
char *dospattern;
|
||
|
int j = strlen(pattern);
|
||
|
|
||
|
/*---------------------------------------------------------------------------
|
||
|
Optional MS-DOS preprocessing section: compare last three chars of the
|
||
|
wildcard to "*.*" and translate to "*" if found; else compare the last
|
||
|
two characters to "*." and, if found, scan the non-wild string for dots.
|
||
|
If in the latter case a dot is found, return failure; else translate the
|
||
|
"*." to "*". In either case, continue with the normal (Unix-like) match
|
||
|
procedure after translation. (If not enough memory, default to normal
|
||
|
match.) This causes "a*.*" and "a*." to behave as MS-DOS users expect.
|
||
|
---------------------------------------------------------------------------*/
|
||
|
|
||
|
if ((dospattern = (char *)malloc(j+1)) != NULL) {
|
||
|
strcpy(dospattern, pattern);
|
||
|
if (!strcmp(dospattern+j-3, "*.*")) {
|
||
|
dospattern[j-2] = '\0'; /* nuke the ".*" */
|
||
|
} else if (!strcmp(dospattern+j-2, "*.")) {
|
||
|
char *p = MBSCHR(string, '.');
|
||
|
|
||
|
if (p) { /* found a dot: match fails */
|
||
|
free(dospattern);
|
||
|
return 0;
|
||
|
}
|
||
|
dospattern[j-1] = '\0'; /* nuke the end "." */
|
||
|
}
|
||
|
j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL);
|
||
|
free(dospattern);
|
||
|
return j == 1;
|
||
|
} else
|
||
|
#endif /* MSDOS && DOSWILD */
|
||
|
return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
static int recmatch(p, s, ic __WDL)
|
||
|
ZCONST uch *p; /* sh pattern to match */
|
||
|
ZCONST uch *s; /* string to which to match it */
|
||
|
int ic; /* true for case insensitivity */
|
||
|
__WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */
|
||
|
/* Recursively compare the sh pattern p with the string s and return 1 if
|
||
|
* they match, and 0 or 2 if they don't or if there is a syntax error in the
|
||
|
* pattern. This routine recurses on itself no more deeply than the number
|
||
|
* of characters in the pattern. */
|
||
|
{
|
||
|
unsigned int c; /* pattern char or start of range in [-] loop */
|
||
|
|
||
|
/* Get first character, the pattern for new recmatch calls follows */
|
||
|
c = *p; INCSTR(p);
|
||
|
|
||
|
/* If that was the end of the pattern, match if string empty too */
|
||
|
if (c == 0)
|
||
|
return *s == 0;
|
||
|
|
||
|
/* '?' (or '%') matches any character (but not an empty string). */
|
||
|
if (c == WILDCHAR)
|
||
|
#ifdef WILD_STOP_AT_DIR
|
||
|
/* If uO.W_flag is non-zero, it won't match '/' */
|
||
|
return (*s && (!sepc || *s != (uch)sepc))
|
||
|
? recmatch(p, s + CLEN(s), ic, sepc) : 0;
|
||
|
#else
|
||
|
return *s ? recmatch(p, s + CLEN(s), ic) : 0;
|
||
|
#endif
|
||
|
|
||
|
/* '*' matches any number of characters, including zero */
|
||
|
#ifdef AMIGA
|
||
|
if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
|
||
|
c = '*', p++;
|
||
|
#endif /* AMIGA */
|
||
|
if (c == '*') {
|
||
|
#ifdef WILD_STOP_AT_DIR
|
||
|
if (sepc) {
|
||
|
/* check for single "*" or double "**" */
|
||
|
# ifdef AMIGA
|
||
|
if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
|
||
|
c = '*', p++;
|
||
|
if (c != '*') {
|
||
|
# else /* !AMIGA */
|
||
|
if (*p != '*') {
|
||
|
# endif /* ?AMIGA */
|
||
|
/* single "*": this doesn't match the dirsep character */
|
||
|
for (; *s && *s != (uch)sepc; INCSTR(s))
|
||
|
if ((c = recmatch(p, s, ic, sepc)) != 0)
|
||
|
return (int)c;
|
||
|
/* end of pattern: matched if at end of string, else continue */
|
||
|
if (*p == '\0')
|
||
|
return (*s == 0);
|
||
|
/* continue to match if at sepc in pattern, else give up */
|
||
|
return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc))
|
||
|
? recmatch(p, s, ic, sepc) : 2;
|
||
|
}
|
||
|
/* "**": this matches slashes */
|
||
|
++p; /* move p behind the second '*' */
|
||
|
/* and continue with the non-W_flag code variant */
|
||
|
}
|
||
|
#endif /* WILD_STOP_AT_DIR */
|
||
|
if (*p == 0)
|
||
|
return 1;
|
||
|
if (isshexp((ZCONST char *)p) == NULL) {
|
||
|
/* Optimization for rest of pattern being a literal string:
|
||
|
* If there are no other shell expression chars in the rest
|
||
|
* of the pattern behind the multi-char wildcard, then just
|
||
|
* compare the literal string tail.
|
||
|
*/
|
||
|
ZCONST uch *srest;
|
||
|
|
||
|
srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p));
|
||
|
if (srest - s < 0)
|
||
|
/* remaining literal string from pattern is longer than rest
|
||
|
* of test string, there can't be a match
|
||
|
*/
|
||
|
return 0;
|
||
|
else
|
||
|
/* compare the remaining literal pattern string with the last
|
||
|
* bytes of the test string to check for a match
|
||
|
*/
|
||
|
#ifdef _MBCS
|
||
|
{
|
||
|
ZCONST uch *q = s;
|
||
|
|
||
|
/* MBCS-aware code must not scan backwards into a string from
|
||
|
* the end.
|
||
|
* So, we have to move forward by character from our well-known
|
||
|
* character position s in the test string until we have
|
||
|
* advanced to the srest position.
|
||
|
*/
|
||
|
while (q < srest)
|
||
|
INCSTR(q);
|
||
|
/* In case the byte *srest is a trailing byte of a multibyte
|
||
|
* character in the test string s, we have actually advanced
|
||
|
* past the position (srest).
|
||
|
* For this case, the match has failed!
|
||
|
*/
|
||
|
if (q != srest)
|
||
|
return 0;
|
||
|
return ((ic
|
||
|
? namecmp((ZCONST char *)p, (ZCONST char *)q)
|
||
|
: strcmp((ZCONST char *)p, (ZCONST char *)q)
|
||
|
) == 0);
|
||
|
}
|
||
|
#else /* !_MBCS */
|
||
|
return ((ic
|
||
|
? namecmp((ZCONST char *)p, (ZCONST char *)srest)
|
||
|
: strcmp((ZCONST char *)p, (ZCONST char *)srest)
|
||
|
) == 0);
|
||
|
#endif /* ?_MBCS */
|
||
|
} else {
|
||
|
/* pattern contains more wildcards, continue with recursion... */
|
||
|
for (; *s; INCSTR(s))
|
||
|
if ((c = recmatch(p, s, ic __WDL)) != 0)
|
||
|
return (int)c;
|
||
|
return 2; /* 2 means give up--match will return false */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Parse and process the list of characters and ranges in brackets */
|
||
|
if (c == BEG_RANGE) {
|
||
|
int e; /* flag true if next char to be taken literally */
|
||
|
ZCONST uch *q; /* pointer to end of [-] group */
|
||
|
int r; /* flag true to match anything but the range */
|
||
|
|
||
|
if (*s == 0) /* need a character to match */
|
||
|
return 0;
|
||
|
p += (r = (*p == '!' || *p == '^')); /* see if reverse */
|
||
|
for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */
|
||
|
if (e)
|
||
|
e = 0;
|
||
|
else
|
||
|
if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
|
||
|
e = 1;
|
||
|
else if (*q == END_RANGE)
|
||
|
break;
|
||
|
if (*q != END_RANGE) /* nothing matches if bad syntax */
|
||
|
return 0;
|
||
|
for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
|
||
|
/* go through the list */
|
||
|
if (!e && *p == '\\') /* set escape flag if \ */
|
||
|
e = 1;
|
||
|
else if (!e && *p == '-') /* set start of range if - */
|
||
|
c = *(p-1);
|
||
|
else {
|
||
|
unsigned int cc = Case(*s);
|
||
|
|
||
|
if (*(p+1) != '-')
|
||
|
for (c = c ? c : *p; c <= *p; c++) /* compare range */
|
||
|
if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
|
||
|
return r ? 0 : recmatch(q + 1, s + 1, ic __WDL);
|
||
|
c = e = 0; /* clear range, escape flags */
|
||
|
}
|
||
|
}
|
||
|
return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0;
|
||
|
/* bracket match failed */
|
||
|
}
|
||
|
|
||
|
/* if escape ('\\'), just compare next character */
|
||
|
if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
|
||
|
return 0;
|
||
|
|
||
|
/* just a character--compare it */
|
||
|
#ifdef QDOS
|
||
|
return QMatch(Case((uch)c), Case(*s)) ?
|
||
|
recmatch(p, s + CLEN(s), ic __WDL) : 0;
|
||
|
#else
|
||
|
return Case((uch)c) == Case(*s) ?
|
||
|
recmatch(p, s + CLEN(s), ic __WDL) : 0;
|
||
|
#endif
|
||
|
|
||
|
} /* end function recmatch() */
|
||
|
|
||
|
|
||
|
|
||
|
static char *isshexp(p)
|
||
|
ZCONST char *p;
|
||
|
/* If p is a sh expression, a pointer to the first special character is
|
||
|
returned. Otherwise, NULL is returned. */
|
||
|
{
|
||
|
for (; *p; INCSTR(p))
|
||
|
if (*p == '\\' && *(p+1))
|
||
|
p++;
|
||
|
else if (*p == WILDCHAR || *p == '*' || *p == BEG_RANGE)
|
||
|
return (char *)p;
|
||
|
return NULL;
|
||
|
} /* end function isshexp() */
|
||
|
|
||
|
|
||
|
|
||
|
static int namecmp(s1, s2)
|
||
|
ZCONST char *s1, *s2;
|
||
|
{
|
||
|
int d;
|
||
|
|
||
|
for (;;) {
|
||
|
d = (int)ToLower((uch)*s1)
|
||
|
- (int)ToLower((uch)*s2);
|
||
|
|
||
|
if (d || *s1 == 0 || *s2 == 0)
|
||
|
return d;
|
||
|
|
||
|
s1++;
|
||
|
s2++;
|
||
|
}
|
||
|
} /* end function namecmp() */
|
||
|
|
||
|
#endif /* !THEOS */
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
int iswild(p) /* originally only used for stat()-bug workaround in */
|
||
|
ZCONST char *p; /* VAX C, Turbo/Borland C, Watcom C, Atari MiNT libs; */
|
||
|
{ /* now used in process_zipfiles() as well */
|
||
|
for (; *p; INCSTR(p))
|
||
|
if (*p == '\\' && *(p+1))
|
||
|
++p;
|
||
|
#ifdef THEOS
|
||
|
else if (*p == '?' || *p == '*' || *p=='#'|| *p == '@')
|
||
|
#else /* !THEOS */
|
||
|
#ifdef VMS
|
||
|
else if (*p == '%' || *p == '*')
|
||
|
#else /* !VMS */
|
||
|
#ifdef AMIGA
|
||
|
else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[')
|
||
|
#else /* !AMIGA */
|
||
|
else if (*p == '?' || *p == '*' || *p == '[')
|
||
|
#endif /* ?AMIGA */
|
||
|
#endif /* ?VMS */
|
||
|
#endif /* ?THEOS */
|
||
|
#ifdef QDOS
|
||
|
return (int)p;
|
||
|
#else
|
||
|
return TRUE;
|
||
|
#endif
|
||
|
|
||
|
return FALSE;
|
||
|
|
||
|
} /* end function iswild() */
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
#ifdef TEST_MATCH
|
||
|
|
||
|
#define put(s) {fputs(s,stdout); fflush(stdout);}
|
||
|
#ifdef main
|
||
|
# undef main
|
||
|
#endif
|
||
|
|
||
|
int main(int argc, char **argv)
|
||
|
{
|
||
|
char pat[256], str[256];
|
||
|
|
||
|
for (;;) {
|
||
|
put("Pattern (return to exit): ");
|
||
|
gets(pat);
|
||
|
if (!pat[0])
|
||
|
break;
|
||
|
for (;;) {
|
||
|
put("String (return for new pattern): ");
|
||
|
gets(str);
|
||
|
if (!str[0])
|
||
|
break;
|
||
|
printf("Case sensitive: %s insensitive: %s\n",
|
||
|
match(str, pat, 0) ? "YES" : "NO",
|
||
|
match(str, pat, 1) ? "YES" : "NO");
|
||
|
}
|
||
|
}
|
||
|
EXIT(0);
|
||
|
}
|
||
|
|
||
|
#endif /* TEST_MATCH */
|