mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-25 01:02:24 +00:00
Implemented a simple wildcard matching engine, and used it to
restore remote wildcard capability in sftp-style PSCP. [originally from svn r1209]
This commit is contained in:
parent
ff9a038cdd
commit
0da98d052d
2
Makefile
2
Makefile
@ -88,7 +88,7 @@ TOBJS = be_nossh.$(OBJ)
|
|||||||
##-- objects plink
|
##-- objects plink
|
||||||
PLOBJS = plink.$(OBJ)
|
PLOBJS = plink.$(OBJ)
|
||||||
##-- objects pscp
|
##-- objects pscp
|
||||||
SOBJS = scp.$(OBJ) winnet.$(OBJ) be_none.$(OBJ)
|
SOBJS = scp.$(OBJ) winnet.$(OBJ) be_none.$(OBJ) wildcard.$(OBJ)
|
||||||
##-- objects psftp
|
##-- objects psftp
|
||||||
FOBJS = psftp.$(OBJ) winnet.$(OBJ) be_none.$(OBJ)
|
FOBJS = psftp.$(OBJ) winnet.$(OBJ) be_none.$(OBJ)
|
||||||
##-- objects pscp psftp
|
##-- objects pscp psftp
|
||||||
|
6
putty.h
6
putty.h
@ -555,5 +555,11 @@ void crypto_wrapup();
|
|||||||
void agent_query(void *in, int inlen, void **out, int *outlen);
|
void agent_query(void *in, int inlen, void **out, int *outlen);
|
||||||
int agent_exists(void);
|
int agent_exists(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exports from wildcard.c
|
||||||
|
*/
|
||||||
|
const char *wc_error(int value);
|
||||||
|
int wc_match(const char *wildcard, const char *target);
|
||||||
|
int wc_unescape(char *output, const char *wildcard);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
126
scp.c
126
scp.c
@ -685,17 +685,19 @@ static char *colon(char *str)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Return a pointer to the portion of str that comes after the last
|
* Return a pointer to the portion of str that comes after the last
|
||||||
* slash or backslash.
|
* slash (or backslash, if `local' is TRUE).
|
||||||
*/
|
*/
|
||||||
static char *stripslashes(char *str)
|
static char *stripslashes(char *str, int local)
|
||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
p = strrchr(str, '/');
|
p = strrchr(str, '/');
|
||||||
if (p) str = p+1;
|
if (p) str = p+1;
|
||||||
|
|
||||||
|
if (local) {
|
||||||
p = strrchr(str, '\\');
|
p = strrchr(str, '\\');
|
||||||
if (p) str = p+1;
|
if (p) str = p+1;
|
||||||
|
}
|
||||||
|
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
@ -833,8 +835,10 @@ static struct scp_sftp_dirstack {
|
|||||||
struct fxp_name *names;
|
struct fxp_name *names;
|
||||||
int namepos, namelen;
|
int namepos, namelen;
|
||||||
char *dirpath;
|
char *dirpath;
|
||||||
|
char *wildcard;
|
||||||
} *scp_sftp_dirstack_head;
|
} *scp_sftp_dirstack_head;
|
||||||
static char *scp_sftp_remotepath, *scp_sftp_currentname;
|
static char *scp_sftp_remotepath, *scp_sftp_currentname;
|
||||||
|
static char *scp_sftp_wildcard;
|
||||||
static int scp_sftp_targetisdir, scp_sftp_donethistarget;
|
static int scp_sftp_targetisdir, scp_sftp_donethistarget;
|
||||||
static int scp_sftp_preserve, scp_sftp_recursive;
|
static int scp_sftp_preserve, scp_sftp_recursive;
|
||||||
static unsigned long scp_sftp_mtime, scp_sftp_atime;
|
static unsigned long scp_sftp_mtime, scp_sftp_atime;
|
||||||
@ -1058,15 +1062,71 @@ int scp_send_enddir(void)
|
|||||||
* right at the start, whereas scp_sink_init is called to
|
* right at the start, whereas scp_sink_init is called to
|
||||||
* initialise every level of recursion in the protocol.
|
* initialise every level of recursion in the protocol.
|
||||||
*/
|
*/
|
||||||
void scp_sink_setup(char *source, int preserve, int recursive)
|
int scp_sink_setup(char *source, int preserve, int recursive)
|
||||||
{
|
{
|
||||||
if (using_sftp) {
|
if (using_sftp) {
|
||||||
scp_sftp_remotepath = dupstr(source);
|
char *newsource;
|
||||||
|
/*
|
||||||
|
* It's possible that the source string we've been given
|
||||||
|
* contains a wildcard. If so, we must split the directory
|
||||||
|
* away from the wildcard itself (throwing an error if any
|
||||||
|
* wildcardness comes before the final slash) and arrange
|
||||||
|
* things so that a dirstack entry will be set up.
|
||||||
|
*/
|
||||||
|
newsource = smalloc(1+strlen(source));
|
||||||
|
if (!wc_unescape(newsource, source)) {
|
||||||
|
/* Yes, here we go; it's a wildcard. Bah. */
|
||||||
|
char *dupsource, *lastpart, *dirpart, *wildcard;
|
||||||
|
dupsource = dupstr(source);
|
||||||
|
lastpart = stripslashes(dupsource, 0);
|
||||||
|
wildcard = dupstr(lastpart);
|
||||||
|
*lastpart = '\0';
|
||||||
|
if (*dupsource && dupsource[1]) {
|
||||||
|
/*
|
||||||
|
* The remains of dupsource are at least two
|
||||||
|
* characters long, meaning the pathname wasn't
|
||||||
|
* empty or just `/'. Hence, we remove the trailing
|
||||||
|
* slash.
|
||||||
|
*/
|
||||||
|
lastpart[-1] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now we have separated our string into dupsource (the
|
||||||
|
* directory part) and wildcard. Both of these will
|
||||||
|
* need freeing at some point. Next step is to remove
|
||||||
|
* wildcard escapes from the directory part, throwing
|
||||||
|
* an error if it contains a real wildcard.
|
||||||
|
*/
|
||||||
|
dirpart = smalloc(1+strlen(dupsource));
|
||||||
|
if (!wc_unescape(dirpart, dupsource)) {
|
||||||
|
tell_user(stderr, "%s: multiple-level wildcards unsupported",
|
||||||
|
source);
|
||||||
|
errs++;
|
||||||
|
sfree(dirpart);
|
||||||
|
sfree(wildcard);
|
||||||
|
sfree(dupsource);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now we have dirpart (unescaped, ie a valid remote
|
||||||
|
* path), and wildcard (a wildcard). This will be
|
||||||
|
* sufficient to arrange a dirstack entry.
|
||||||
|
*/
|
||||||
|
scp_sftp_remotepath = dirpart;
|
||||||
|
scp_sftp_wildcard = wildcard;
|
||||||
|
sfree(dupsource);
|
||||||
|
} else {
|
||||||
|
scp_sftp_remotepath = newsource;
|
||||||
|
scp_sftp_wildcard = NULL;
|
||||||
|
}
|
||||||
scp_sftp_preserve = preserve;
|
scp_sftp_preserve = preserve;
|
||||||
scp_sftp_recursive = recursive;
|
scp_sftp_recursive = recursive;
|
||||||
scp_sftp_donethistarget = 0;
|
scp_sftp_donethistarget = 0;
|
||||||
scp_sftp_dirstack_head = NULL;
|
scp_sftp_dirstack_head = NULL;
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scp_sink_init(void)
|
int scp_sink_init(void)
|
||||||
@ -1080,6 +1140,7 @@ int scp_sink_init(void)
|
|||||||
#define SCP_SINK_FILE 1
|
#define SCP_SINK_FILE 1
|
||||||
#define SCP_SINK_DIR 2
|
#define SCP_SINK_DIR 2
|
||||||
#define SCP_SINK_ENDDIR 3
|
#define SCP_SINK_ENDDIR 3
|
||||||
|
#define SCP_SINK_RETRY 4 /* not an action; just try again */
|
||||||
struct scp_sink_action {
|
struct scp_sink_action {
|
||||||
int action; /* FILE, DIR, ENDDIR */
|
int action; /* FILE, DIR, ENDDIR */
|
||||||
char *buf; /* will need freeing after use */
|
char *buf; /* will need freeing after use */
|
||||||
@ -1121,7 +1182,10 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
*/
|
*/
|
||||||
struct scp_sftp_dirstack *head = scp_sftp_dirstack_head;
|
struct scp_sftp_dirstack *head = scp_sftp_dirstack_head;
|
||||||
while (head->namepos < head->namelen &&
|
while (head->namepos < head->namelen &&
|
||||||
is_dots(head->names[head->namepos].filename))
|
(is_dots(head->names[head->namepos].filename) ||
|
||||||
|
(head->wildcard &&
|
||||||
|
!wc_match(head->wildcard,
|
||||||
|
head->names[head->namepos].filename))))
|
||||||
head->namepos++; /* skip . and .. */
|
head->namepos++; /* skip . and .. */
|
||||||
if (head->namepos < head->namelen) {
|
if (head->namepos < head->namelen) {
|
||||||
fname = dupcat(head->dirpath, "/",
|
fname = dupcat(head->dirpath, "/",
|
||||||
@ -1131,15 +1195,21 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* We've come to the end of the list; pop it off
|
* We've come to the end of the list; pop it off
|
||||||
* the stack and return an ENDDIR action.
|
* the stack and return an ENDDIR action (or RETRY
|
||||||
|
* if this was a wildcard match).
|
||||||
*/
|
*/
|
||||||
|
if (head->wildcard) {
|
||||||
|
act->action = SCP_SINK_RETRY;
|
||||||
|
sfree(head->wildcard);
|
||||||
|
} else {
|
||||||
|
act->action = SCP_SINK_ENDDIR;
|
||||||
|
}
|
||||||
|
|
||||||
sfree(head->dirpath);
|
sfree(head->dirpath);
|
||||||
sfree(head->names);
|
sfree(head->names);
|
||||||
scp_sftp_dirstack_head = head->next;
|
scp_sftp_dirstack_head = head->next;
|
||||||
sfree(head);
|
sfree(head);
|
||||||
|
|
||||||
act->action = SCP_SINK_ENDDIR;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1164,10 +1234,11 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
struct fxp_names *names;
|
struct fxp_names *names;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It's a directory. If we're not in recursive
|
* It's a directory. If we're not in recursive mode and
|
||||||
* mode, this just merits a complaint.
|
* we haven't been passed a wildcard from
|
||||||
|
* scp_sink_setup, this just merits a complaint.
|
||||||
*/
|
*/
|
||||||
if (!scp_sftp_recursive) {
|
if (!scp_sftp_recursive && !scp_sftp_wildcard) {
|
||||||
tell_user(stderr, "pscp: %s: is a directory", fname);
|
tell_user(stderr, "pscp: %s: is a directory", fname);
|
||||||
errs++;
|
errs++;
|
||||||
if (must_free_fname) sfree(fname);
|
if (must_free_fname) sfree(fname);
|
||||||
@ -1177,9 +1248,10 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
/*
|
/*
|
||||||
* Otherwise, the fun begins. We must fxp_opendir() the
|
* Otherwise, the fun begins. We must fxp_opendir() the
|
||||||
* directory, slurp the filenames into memory, return
|
* directory, slurp the filenames into memory, return
|
||||||
* SCP_SINK_DIR, and set targetisdir. The next time
|
* SCP_SINK_DIR (unless this is a wildcard match), and
|
||||||
* we're called, we will run through the list of
|
* set targetisdir. The next time we're called, we will
|
||||||
* filenames one by one.
|
* run through the list of filenames one by one,
|
||||||
|
* matching them against a wildcard if present.
|
||||||
*
|
*
|
||||||
* If targetisdir is _already_ set (meaning we're
|
* If targetisdir is _already_ set (meaning we're
|
||||||
* already in the middle of going through another such
|
* already in the middle of going through another such
|
||||||
@ -1235,10 +1307,19 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
newitem->dirpath = fname;
|
newitem->dirpath = fname;
|
||||||
else
|
else
|
||||||
newitem->dirpath = dupstr(fname);
|
newitem->dirpath = dupstr(fname);
|
||||||
|
if (scp_sftp_wildcard) {
|
||||||
|
newitem->wildcard = scp_sftp_wildcard;
|
||||||
|
scp_sftp_wildcard = NULL;
|
||||||
|
} else {
|
||||||
|
newitem->wildcard = NULL;
|
||||||
|
}
|
||||||
scp_sftp_dirstack_head = newitem;
|
scp_sftp_dirstack_head = newitem;
|
||||||
|
|
||||||
|
if (newitem->wildcard) {
|
||||||
|
act->action = SCP_SINK_RETRY;
|
||||||
|
} else {
|
||||||
act->action = SCP_SINK_DIR;
|
act->action = SCP_SINK_DIR;
|
||||||
act->buf = dupstr(stripslashes(fname));
|
act->buf = dupstr(stripslashes(fname, 0));
|
||||||
act->name = act->buf;
|
act->name = act->buf;
|
||||||
act->size = 0; /* duhh, it's a directory */
|
act->size = 0; /* duhh, it's a directory */
|
||||||
act->mode = 07777 & attrs.permissions;
|
act->mode = 07777 & attrs.permissions;
|
||||||
@ -1249,6 +1330,7 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
act->settime = 1;
|
act->settime = 1;
|
||||||
} else
|
} else
|
||||||
act->settime = 0;
|
act->settime = 0;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -1256,7 +1338,7 @@ int scp_get_sink_action(struct scp_sink_action *act)
|
|||||||
* It's a file. Return SCP_SINK_FILE.
|
* It's a file. Return SCP_SINK_FILE.
|
||||||
*/
|
*/
|
||||||
act->action = SCP_SINK_FILE;
|
act->action = SCP_SINK_FILE;
|
||||||
act->buf = dupstr(stripslashes(fname));
|
act->buf = dupstr(stripslashes(fname, 0));
|
||||||
act->name = act->buf;
|
act->name = act->buf;
|
||||||
if (attrs.flags & SSH_FILEXFER_ATTR_SIZE) {
|
if (attrs.flags & SSH_FILEXFER_ATTR_SIZE) {
|
||||||
if (uint64_compare(attrs.size,
|
if (uint64_compare(attrs.size,
|
||||||
@ -1614,6 +1696,9 @@ static void sink(char *targ, char *src)
|
|||||||
if (act.action == SCP_SINK_ENDDIR)
|
if (act.action == SCP_SINK_ENDDIR)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (act.action == SCP_SINK_RETRY)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (targisdir) {
|
if (targisdir) {
|
||||||
/*
|
/*
|
||||||
* Prevent the remote side from maliciously writing to
|
* Prevent the remote side from maliciously writing to
|
||||||
@ -1644,7 +1729,7 @@ static void sink(char *targ, char *src)
|
|||||||
*/
|
*/
|
||||||
char *striptarget, *stripsrc;
|
char *striptarget, *stripsrc;
|
||||||
|
|
||||||
striptarget = stripslashes(act.name);
|
striptarget = stripslashes(act.name, 1);
|
||||||
if (striptarget != act.name) {
|
if (striptarget != act.name) {
|
||||||
tell_user(stderr, "warning: remote host sent a compound"
|
tell_user(stderr, "warning: remote host sent a compound"
|
||||||
" pathname - possibly malicious! (ignored)");
|
" pathname - possibly malicious! (ignored)");
|
||||||
@ -1661,7 +1746,7 @@ static void sink(char *targ, char *src)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (src) {
|
if (src) {
|
||||||
stripsrc = stripslashes(src);
|
stripsrc = stripslashes(src, 1);
|
||||||
if (!stripsrc[strcspn(stripsrc, "*?[]")] &&
|
if (!stripsrc[strcspn(stripsrc, "*?[]")] &&
|
||||||
strcmp(striptarget, stripsrc)) {
|
strcmp(striptarget, stripsrc)) {
|
||||||
tell_user(stderr, "warning: remote host attempted to"
|
tell_user(stderr, "warning: remote host attempted to"
|
||||||
@ -1715,7 +1800,7 @@ static void sink(char *targ, char *src)
|
|||||||
stat_bytes = 0;
|
stat_bytes = 0;
|
||||||
stat_starttime = time(NULL);
|
stat_starttime = time(NULL);
|
||||||
stat_lasttime = 0;
|
stat_lasttime = 0;
|
||||||
stat_name = stripslashes(destfname);
|
stat_name = stripslashes(destfname, 1);
|
||||||
|
|
||||||
received = 0;
|
received = 0;
|
||||||
while (received < act.size) {
|
while (received < act.size) {
|
||||||
@ -1844,7 +1929,7 @@ static void toremote(int argc, char *argv[])
|
|||||||
* filenames returned from Find{First,Next}File.
|
* filenames returned from Find{First,Next}File.
|
||||||
*/
|
*/
|
||||||
srcpath = dupstr(src);
|
srcpath = dupstr(src);
|
||||||
last = stripslashes(srcpath);
|
last = stripslashes(srcpath, 1);
|
||||||
if (last == srcpath) {
|
if (last == srcpath) {
|
||||||
last = strchr(srcpath, ':');
|
last = strchr(srcpath, ':');
|
||||||
if (last)
|
if (last)
|
||||||
@ -1937,7 +2022,8 @@ static void tolocal(int argc, char *argv[])
|
|||||||
do_cmd(host, user, cmd);
|
do_cmd(host, user, cmd);
|
||||||
sfree(cmd);
|
sfree(cmd);
|
||||||
|
|
||||||
scp_sink_setup(src, preserve, recursive);
|
if (scp_sink_setup(src, preserve, recursive))
|
||||||
|
return;
|
||||||
|
|
||||||
sink(targ, src);
|
sink(targ, src);
|
||||||
}
|
}
|
||||||
|
459
wildcard.c
Normal file
459
wildcard.c
Normal file
@ -0,0 +1,459 @@
|
|||||||
|
/*
|
||||||
|
* Wildcard matching engine for use with SFTP-based file transfer
|
||||||
|
* programs (PSFTP, new-look PSCP): since SFTP has no notion of
|
||||||
|
* getting the remote side to do globbing (and rightly so) we have
|
||||||
|
* to do it locally, by retrieving all the filenames in a directory
|
||||||
|
* and checking each against the wildcard pattern.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Definition of wildcard syntax:
|
||||||
|
*
|
||||||
|
* - * matches any sequence of characters, including zero.
|
||||||
|
* - ? matches exactly one character which can be anything.
|
||||||
|
* - [abc] matches exactly one character which is a, b or c.
|
||||||
|
* - [a-f] matches anything from a through f.
|
||||||
|
* - [^a-f] matches anything _except_ a through f.
|
||||||
|
* - [-_] matches - or _; [^-_] matches anything else. (The - is
|
||||||
|
* non-special if it occurs immediately after the opening
|
||||||
|
* bracket or ^.)
|
||||||
|
* - [a^] matches an a or a ^. (The ^ is non-special if it does
|
||||||
|
* _not_ occur immediately after the opening bracket.)
|
||||||
|
* - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \.
|
||||||
|
* - All other characters are non-special and match themselves.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The wildcard matching technique we use is very simple and
|
||||||
|
* potentially O(N^2) in running time, but I don't anticipate it
|
||||||
|
* being that bad in reality (particularly since N will be the size
|
||||||
|
* of a filename, which isn't all that much). Perhaps one day, once
|
||||||
|
* PuTTY has grown a regexp matcher for some other reason, I might
|
||||||
|
* come back and reimplement wildcards by translating them into
|
||||||
|
* regexps or directly into NFAs; but for the moment, in the
|
||||||
|
* absence of any other need for the NFA->DFA translation engine,
|
||||||
|
* anything more than the simplest possible wildcard matcher is
|
||||||
|
* vast code-size overkill.
|
||||||
|
*
|
||||||
|
* Essentially, these wildcards are much simpler than regexps in
|
||||||
|
* that they consist of a sequence of rigid fragments (? and [...]
|
||||||
|
* can never match more or less than one character) separated by
|
||||||
|
* asterisks. It is therefore extremely simple to look at a rigid
|
||||||
|
* fragment and determine whether or not it begins at a particular
|
||||||
|
* point in the test string; so we can search along the string
|
||||||
|
* until we find each fragment, then search for the next. As long
|
||||||
|
* as we find each fragment in the _first_ place it occurs, there
|
||||||
|
* will never be a danger of having to backpedal and try to find it
|
||||||
|
* again somewhere else.
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum {
|
||||||
|
WC_TRAILINGBACKSLASH = 1,
|
||||||
|
WC_UNCLOSEDCLASS,
|
||||||
|
WC_INVALIDRANGE,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Error reporting is done by returning various negative values
|
||||||
|
* from the wildcard routines. Passing any such value to wc_error
|
||||||
|
* will give a human-readable message.
|
||||||
|
*/
|
||||||
|
const char *wc_error(int value)
|
||||||
|
{
|
||||||
|
value = abs(value);
|
||||||
|
switch (value) {
|
||||||
|
case WC_TRAILINGBACKSLASH:
|
||||||
|
return "'\' occurred at end of string (expected another character)";
|
||||||
|
case WC_UNCLOSEDCLASS:
|
||||||
|
return "expected ']' to close character class";
|
||||||
|
case WC_INVALIDRANGE:
|
||||||
|
return "character range was not terminated (']' just after '-')";
|
||||||
|
}
|
||||||
|
return "INTERNAL ERROR: unrecognised wildcard error number";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the routine that tests a target string to see if an
|
||||||
|
* initial substring of it matches a fragment. If successful, it
|
||||||
|
* returns 1, and advances both `fragment' and `target' past the
|
||||||
|
* fragment and matching substring respectively. If unsuccessful it
|
||||||
|
* returns zero. If the wildcard fragment suffers a syntax error,
|
||||||
|
* it returns <0 and the precise value indexes into wc_error.
|
||||||
|
*/
|
||||||
|
static int wc_match_fragment(const char **fragment, const char **target)
|
||||||
|
{
|
||||||
|
const char *f, *t;
|
||||||
|
|
||||||
|
f = *fragment;
|
||||||
|
t = *target;
|
||||||
|
/*
|
||||||
|
* The fragment terminates at either the end of the string, or
|
||||||
|
* the first (unescaped) *.
|
||||||
|
*/
|
||||||
|
while (*f && *f != '*' && *t) {
|
||||||
|
/*
|
||||||
|
* Extract one character from t, and one character's worth
|
||||||
|
* of pattern from f, and step along both. Return 0 if they
|
||||||
|
* fail to match.
|
||||||
|
*/
|
||||||
|
if (*f == '\\') {
|
||||||
|
/*
|
||||||
|
* Backslash, which means f[1] is to be treated as a
|
||||||
|
* literal character no matter what it is. It may not
|
||||||
|
* be the end of the string.
|
||||||
|
*/
|
||||||
|
if (!f[1])
|
||||||
|
return -WC_TRAILINGBACKSLASH; /* error */
|
||||||
|
if (f[1] != *t)
|
||||||
|
return 0; /* failed to match */
|
||||||
|
f += 2;
|
||||||
|
} else if (*f == '?') {
|
||||||
|
/*
|
||||||
|
* Question mark matches anything.
|
||||||
|
*/
|
||||||
|
f++;
|
||||||
|
} else if (*f == '[') {
|
||||||
|
int invert = 0;
|
||||||
|
int matched = 0;
|
||||||
|
/*
|
||||||
|
* Open bracket introduces a character class.
|
||||||
|
*/
|
||||||
|
f++;
|
||||||
|
if (*f == '^') {
|
||||||
|
invert = 1;
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
while (*f != ']') {
|
||||||
|
if (*f == '\\')
|
||||||
|
f++; /* backslashes still work */
|
||||||
|
if (!*f)
|
||||||
|
return -WC_UNCLOSEDCLASS; /* error again */
|
||||||
|
if (f[1] == '-') {
|
||||||
|
int lower, upper, ourchr;
|
||||||
|
lower = (unsigned char) *f++;
|
||||||
|
f++; /* eat the minus */
|
||||||
|
if (*f == ']')
|
||||||
|
return -WC_INVALIDRANGE; /* different error! */
|
||||||
|
if (*f == '\\')
|
||||||
|
f++; /* backslashes _still_ work */
|
||||||
|
if (!*f)
|
||||||
|
return -WC_UNCLOSEDCLASS; /* error again */
|
||||||
|
upper = (unsigned char) *f++;
|
||||||
|
ourchr = (unsigned char) *t;
|
||||||
|
if (lower > upper) {
|
||||||
|
int t = lower; lower = upper; upper = t;
|
||||||
|
}
|
||||||
|
if (ourchr >= lower && ourchr <= upper)
|
||||||
|
matched = 1;
|
||||||
|
} else {
|
||||||
|
matched |= (*t == *f++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (invert == matched)
|
||||||
|
return 0; /* failed to match character class */
|
||||||
|
f++; /* eat the ] */
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Non-special character matches itself.
|
||||||
|
*/
|
||||||
|
if (*f != *t)
|
||||||
|
return 0;
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Now we've done that, increment t past the character we
|
||||||
|
* matched.
|
||||||
|
*/
|
||||||
|
t++;
|
||||||
|
}
|
||||||
|
if (!*f || *f == '*') {
|
||||||
|
/*
|
||||||
|
* We have reached the end of f without finding a mismatch;
|
||||||
|
* so we're done. Update the caller pointers and return 1.
|
||||||
|
*/
|
||||||
|
*fragment = f;
|
||||||
|
*target = t;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Otherwise, we must have reached the end of t before we
|
||||||
|
* reached the end of f; so we've failed. Return 0.
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the real wildcard matching routine. It returns 1 for a
|
||||||
|
* successful match, 0 for an unsuccessful match, and <0 for a
|
||||||
|
* syntax error in the wildcard.
|
||||||
|
*/
|
||||||
|
int wc_match(const char *wildcard, const char *target)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Every time we see a '*' _followed_ by a fragment, we just
|
||||||
|
* search along the string for a location at which the fragment
|
||||||
|
* matches. The only special case is when we see a fragment
|
||||||
|
* right at the start, in which case we just call the matching
|
||||||
|
* routine once and give up if it fails.
|
||||||
|
*/
|
||||||
|
if (*wildcard != '*') {
|
||||||
|
ret = wc_match_fragment(&wildcard, &target);
|
||||||
|
if (ret <= 0)
|
||||||
|
return ret; /* pass back failure or error alike */
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*wildcard) {
|
||||||
|
assert(*wildcard == '*');
|
||||||
|
while (*wildcard == '*')
|
||||||
|
wildcard++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's possible we've just hit the end of the wildcard
|
||||||
|
* after seeing a *, in which case there's no need to
|
||||||
|
* bother searching any more because we've won.
|
||||||
|
*/
|
||||||
|
if (!*wildcard)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now `wildcard' points at the next fragment. So we
|
||||||
|
* attempt to match it against `target', and if that fails
|
||||||
|
* we increment `target' and try again, and so on. When we
|
||||||
|
* find we're about to try matching against the empty
|
||||||
|
* string, we give up and return 0.
|
||||||
|
*/
|
||||||
|
ret = 0;
|
||||||
|
while (*target) {
|
||||||
|
const char *save_w = wildcard, *save_t = target;
|
||||||
|
|
||||||
|
ret = wc_match_fragment(&wildcard, &target);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
return ret; /* syntax error */
|
||||||
|
|
||||||
|
if (ret > 0 && !*wildcard && *target) {
|
||||||
|
/*
|
||||||
|
* Final special case - literally.
|
||||||
|
*
|
||||||
|
* This situation arises when we are matching a
|
||||||
|
* _terminal_ fragment of the wildcard (that is,
|
||||||
|
* there is nothing after it, e.g. "*a"), and it
|
||||||
|
* has matched _too early_. For example, matching
|
||||||
|
* "*a" against "parka" will match the "a" fragment
|
||||||
|
* against the _first_ a, and then (if it weren't
|
||||||
|
* for this special case) matching would fail
|
||||||
|
* because we're at the end of the wildcard but not
|
||||||
|
* at the end of the target string.
|
||||||
|
*
|
||||||
|
* In this case what we must do is measure the
|
||||||
|
* length of the fragment in the target (which is
|
||||||
|
* why we saved `target'), jump straight to that
|
||||||
|
* distance from the end of the string using
|
||||||
|
* strlen, and match the same fragment again there
|
||||||
|
* (which is why we saved `wildcard'). Then we
|
||||||
|
* return whatever that operation returns.
|
||||||
|
*/
|
||||||
|
target = save_t + strlen(save_t) - (target - save_t);
|
||||||
|
wildcard = save_w;
|
||||||
|
return wc_match_fragment(&wildcard, &target);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret > 0)
|
||||||
|
break;
|
||||||
|
target++;
|
||||||
|
}
|
||||||
|
if (ret > 0)
|
||||||
|
continue;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we reach here, it must be because we successfully matched
|
||||||
|
* a fragment and then found ourselves right at the end of the
|
||||||
|
* wildcard. Hence, we return 1 if and only if we are also
|
||||||
|
* right at the end of the target.
|
||||||
|
*/
|
||||||
|
return (*target ? 0 : 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Another utility routine that translates a non-wildcard string
|
||||||
|
* into its raw equivalent by removing any escaping backslashes.
|
||||||
|
* Expects a target string buffer of anything up to the length of
|
||||||
|
* the original wildcard. You can also pass NULL as the output
|
||||||
|
* buffer if you're only interested in the return value.
|
||||||
|
*
|
||||||
|
* Returns 1 on success, or 0 if a wildcard character was
|
||||||
|
* encountered. In the latter case the output string MAY not be
|
||||||
|
* zero-terminated and you should not use it for anything!
|
||||||
|
*/
|
||||||
|
int wc_unescape(char *output, const char *wildcard)
|
||||||
|
{
|
||||||
|
while (*wildcard) {
|
||||||
|
if (*wildcard == '\\') {
|
||||||
|
wildcard++;
|
||||||
|
/* We are lenient about trailing backslashes in non-wildcards. */
|
||||||
|
if (*wildcard) {
|
||||||
|
if (output)
|
||||||
|
*output++ = *wildcard;
|
||||||
|
wildcard++;
|
||||||
|
}
|
||||||
|
} else if (*wildcard == '*' || *wildcard == '?' ||
|
||||||
|
*wildcard == '[' || *wildcard == ']') {
|
||||||
|
return 0; /* it's a wildcard! */
|
||||||
|
} else {
|
||||||
|
if (output)
|
||||||
|
*output++ = *wildcard;
|
||||||
|
wildcard++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*output = '\0';
|
||||||
|
return 1; /* it's clean */
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TESTMODE
|
||||||
|
|
||||||
|
struct test {
|
||||||
|
const char *wildcard;
|
||||||
|
const char *target;
|
||||||
|
int expected_result;
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct test fragment_tests[] = {
|
||||||
|
/*
|
||||||
|
* We exhaustively unit-test the fragment matching routine
|
||||||
|
* itself, which should save us the need to test all its
|
||||||
|
* intricacies during the full wildcard tests.
|
||||||
|
*/
|
||||||
|
{"abc", "abc", 1},
|
||||||
|
{"abc", "abd", 0},
|
||||||
|
{"abc", "abcd", 1},
|
||||||
|
{"abcd", "abc", 0},
|
||||||
|
{"ab[cd]", "abc", 1},
|
||||||
|
{"ab[cd]", "abd", 1},
|
||||||
|
{"ab[cd]", "abe", 0},
|
||||||
|
{"ab[^cd]", "abc", 0},
|
||||||
|
{"ab[^cd]", "abd", 0},
|
||||||
|
{"ab[^cd]", "abe", 1},
|
||||||
|
{"ab\\", "abc", -WC_TRAILINGBACKSLASH},
|
||||||
|
{"ab\\*", "ab*", 1},
|
||||||
|
{"ab\\?", "ab*", 0},
|
||||||
|
{"ab?", "abc", 1},
|
||||||
|
{"ab?", "ab", 0},
|
||||||
|
{"ab[", "abc", -WC_UNCLOSEDCLASS},
|
||||||
|
{"ab[c-", "abb", -WC_UNCLOSEDCLASS},
|
||||||
|
{"ab[c-]", "abb", -WC_INVALIDRANGE},
|
||||||
|
{"ab[c-e]", "abb", 0},
|
||||||
|
{"ab[c-e]", "abc", 1},
|
||||||
|
{"ab[c-e]", "abd", 1},
|
||||||
|
{"ab[c-e]", "abe", 1},
|
||||||
|
{"ab[c-e]", "abf", 0},
|
||||||
|
{"ab[e-c]", "abb", 0},
|
||||||
|
{"ab[e-c]", "abc", 1},
|
||||||
|
{"ab[e-c]", "abd", 1},
|
||||||
|
{"ab[e-c]", "abe", 1},
|
||||||
|
{"ab[e-c]", "abf", 0},
|
||||||
|
{"ab[^c-e]", "abb", 1},
|
||||||
|
{"ab[^c-e]", "abc", 0},
|
||||||
|
{"ab[^c-e]", "abd", 0},
|
||||||
|
{"ab[^c-e]", "abe", 0},
|
||||||
|
{"ab[^c-e]", "abf", 1},
|
||||||
|
{"ab[^e-c]", "abb", 1},
|
||||||
|
{"ab[^e-c]", "abc", 0},
|
||||||
|
{"ab[^e-c]", "abd", 0},
|
||||||
|
{"ab[^e-c]", "abe", 0},
|
||||||
|
{"ab[^e-c]", "abf", 1},
|
||||||
|
{"ab[a^]", "aba", 1},
|
||||||
|
{"ab[a^]", "ab^", 1},
|
||||||
|
{"ab[a^]", "abb", 0},
|
||||||
|
{"ab[^a^]", "aba", 0},
|
||||||
|
{"ab[^a^]", "ab^", 0},
|
||||||
|
{"ab[^a^]", "abb", 1},
|
||||||
|
{"ab[-c]", "ab-", 1},
|
||||||
|
{"ab[-c]", "abc", 1},
|
||||||
|
{"ab[-c]", "abd", 0},
|
||||||
|
{"ab[^-c]", "ab-", 0},
|
||||||
|
{"ab[^-c]", "abc", 0},
|
||||||
|
{"ab[^-c]", "abd", 1},
|
||||||
|
{"ab[\\[-\\]]", "abZ", 0},
|
||||||
|
{"ab[\\[-\\]]", "ab[", 1},
|
||||||
|
{"ab[\\[-\\]]", "ab\\", 1},
|
||||||
|
{"ab[\\[-\\]]", "ab]", 1},
|
||||||
|
{"ab[\\[-\\]]", "ab^", 0},
|
||||||
|
{"ab[^\\[-\\]]", "abZ", 1},
|
||||||
|
{"ab[^\\[-\\]]", "ab[", 0},
|
||||||
|
{"ab[^\\[-\\]]", "ab\\", 0},
|
||||||
|
{"ab[^\\[-\\]]", "ab]", 0},
|
||||||
|
{"ab[^\\[-\\]]", "ab^", 1},
|
||||||
|
{"ab[a-fA-F]", "aba", 1},
|
||||||
|
{"ab[a-fA-F]", "abF", 1},
|
||||||
|
{"ab[a-fA-F]", "abZ", 0},
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct test full_tests[] = {
|
||||||
|
{"a", "argh", 0},
|
||||||
|
{"a", "ba", 0},
|
||||||
|
{"a", "a", 1},
|
||||||
|
{"a*", "aardvark", 1},
|
||||||
|
{"a*", "badger", 0},
|
||||||
|
{"*a", "park", 0},
|
||||||
|
{"*a", "pArka", 1},
|
||||||
|
{"*a", "parka", 1},
|
||||||
|
{"*a*", "park", 1},
|
||||||
|
{"*a*", "perk", 0},
|
||||||
|
{"?b*r?", "abracadabra", 1},
|
||||||
|
{"?b*r?", "abracadabr", 0},
|
||||||
|
{"?b*r?", "abracadabzr", 0},
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int fails, passes;
|
||||||
|
|
||||||
|
fails = passes = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
|
||||||
|
const char *f, *t;
|
||||||
|
int eret, aret;
|
||||||
|
f = fragment_tests[i].wildcard;
|
||||||
|
t = fragment_tests[i].target;
|
||||||
|
eret = fragment_tests[i].expected_result;
|
||||||
|
aret = wc_match_fragment(&f, &t);
|
||||||
|
if (aret != eret) {
|
||||||
|
printf("failed test: /%s/ against /%s/ returned %d not %d\n",
|
||||||
|
fragment_tests[i].wildcard, fragment_tests[i].target,
|
||||||
|
aret, eret);
|
||||||
|
fails++;
|
||||||
|
} else
|
||||||
|
passes++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
|
||||||
|
const char *f, *t;
|
||||||
|
int eret, aret;
|
||||||
|
f = full_tests[i].wildcard;
|
||||||
|
t = full_tests[i].target;
|
||||||
|
eret = full_tests[i].expected_result;
|
||||||
|
aret = wc_match(f, t);
|
||||||
|
if (aret != eret) {
|
||||||
|
printf("failed test: /%s/ against /%s/ returned %d not %d\n",
|
||||||
|
full_tests[i].wildcard, full_tests[i].target,
|
||||||
|
aret, eret);
|
||||||
|
fails++;
|
||||||
|
} else
|
||||||
|
passes++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("passed %d, failed %d\n", passes, fails);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue
Block a user