diff options
-rw-r--r-- | src/regex/glob.c | 217 |
1 files changed, 112 insertions, 105 deletions
diff --git a/src/regex/glob.c b/src/regex/glob.c index 3e1b034e..751b6966 100644 --- a/src/regex/glob.c +++ b/src/regex/glob.c @@ -1,3 +1,4 @@ +#define _BSD_SOURCE #include <glob.h> #include <fnmatch.h> #include <sys/stat.h> @@ -14,27 +15,6 @@ struct match char name[]; }; -static int is_literal(const char *p, int useesc) -{ - int bracket = 0; - for (; *p; p++) { - switch (*p) { - case '\\': - if (!useesc) break; - case '?': - case '*': - return 0; - case '[': - bracket = 1; - break; - case ']': - if (bracket) return 0; - break; - } - } - return 1; -} - static int append(struct match **tail, const char *name, size_t len, int mark) { struct match *new = malloc(sizeof(struct match) + len + 2); @@ -42,7 +22,7 @@ static int append(struct match **tail, const char *name, size_t len, int mark) (*tail)->next = new; new->next = NULL; memcpy(new->name, name, len+1); - if (mark) { + if (mark && len && name[len-1]!='/') { new->name[len] = '/'; new->name[len+1] = 0; } @@ -50,96 +30,125 @@ static int append(struct match **tail, const char *name, size_t len, int mark) return 0; } -static int match_in_dir(const char *d, const char *p, int flags, int (*errfunc)(const char *path, int err), struct match **tail) +static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*errfunc)(const char *path, int err), struct match **tail) { - DIR *dir; - struct dirent de_buf, *de; - char pat[strlen(p)+1]; - char *p2; - size_t l = strlen(d); - int literal; - int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0) - | ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0); - int error; - - if ((p2 = strchr(p, '/'))) { - strcpy(pat, p); - pat[p2-p] = 0; - for (; *p2 == '/'; p2++); - p = pat; + /* If GLOB_MARK is unused, we don't care about type. */ + if (!type && !(flags & GLOB_MARK)) type = DT_REG; + + /* Special-case the remaining pattern being all slashes, in + * which case we can use caller-passed type if it's a dir. */ + if (*pat && type!=DT_DIR) type = 0; + while (pos+1 < PATH_MAX && *pat=='/') buf[pos++] = *pat++; + + /* Consume maximal [escaped-]literal prefix of pattern, copying + * and un-escaping it to the running buffer as we go. */ + ptrdiff_t i=0, j=0; + int in_bracket = 0, overflow = 0; + for (; pat[i]!='*' && pat[i]!='?' && (!in_bracket || pat[i]!=']'); i++) { + if (!pat[i]) { + if (overflow) return 0; + pat += i; + pos += j; + i = j = 0; + break; + } else if (pat[i] == '[') { + in_bracket = 1; + } else if (pat[i] == '/') { + if (overflow) return 0; + in_bracket = 0; + pat += i+1; + i = -1; + pos += j+1; + j = -1; + } else if (pat[i] == '\\' && !(flags & GLOB_NOESCAPE)) { + /* Backslashes inside a bracket are (at least by + * our interpretation) non-special, so if next + * char is ']' we have a complete expression. */ + if (in_bracket && pat[i+1]==']') break; + /* Unpaired final backslash never matches. */ + if (!pat[i+1] || pat[i+1]=='/') return 0; + i++; + } + /* Only store a character if it fits in the buffer, but if + * a potential bracket expression is open, the overflow + * must be remembered and handled later only if the bracket + * is unterminated (and thereby a literal), so as not to + * disallow long bracket expressions with short matches. */ + if (pos+(j+1) < PATH_MAX) { + buf[pos+j++] = pat[i]; + } else if (in_bracket) { + overflow = 1; + } else { + return 0; + } + /* If we consume any new components, the caller-passed type + * or dummy type from above is no longer valid. */ + type = 0; } - literal = is_literal(p, !(flags & GLOB_NOESCAPE)); - if (*d == '/' && !*(d+1)) l = 0; - - /* rely on opendir failing for nondirectory objects */ - dir = opendir(*d ? d : "."); - error = errno; - if (!dir) { - /* this is not an error -- we let opendir call stat for us */ - if (error == ENOTDIR) return 0; - if (error == EACCES && !*p) { - struct stat st; - if (!stat(d, &st) && S_ISDIR(st.st_mode)) { - if (append(tail, d, l, l)) - return GLOB_NOSPACE; - return 0; - } + buf[pos] = 0; + if (!*pat) { + /* If we consumed any components above, or if GLOB_MARK is + * requested and we don't yet know if the match is a dir, + * we must call stat to confirm the file exists and/or + * determine its type. */ + struct stat st; + if ((flags & GLOB_MARK) && type==DT_LNK) type = 0; + if (!type && stat(buf, &st)) { + if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR))) + return GLOB_ABORTED; + return 0; } - if (errfunc(d, error) || (flags & GLOB_ERR)) - return GLOB_ABORTED; + if (!type && S_ISDIR(st.st_mode)) type = DT_DIR; + if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR)) + return GLOB_NOSPACE; return 0; } - if (!*p) { - error = append(tail, d, l, l) ? GLOB_NOSPACE : 0; - closedir(dir); - return error; + char *p2 = strchr(pat, '/'); + DIR *dir = opendir(pos ? buf : "."); + if (!dir) { + if (errfunc(buf, errno) || (flags & GLOB_ERR)) + return GLOB_ABORTED; + return 0; } - while (!(error = readdir_r(dir, &de_buf, &de)) && de) { - char namebuf[l+de->d_reclen+2], *name = namebuf; - if (!literal && fnmatch(p, de->d_name, fnm_flags)) + int old_errno = errno; + struct dirent *de; + while (errno=0, de=readdir(dir)) { + /* Quickly skip non-directories when there's pattern left. */ + if (p2 && de->d_type && de->d_type!=DT_DIR && de->d_type!=DT_LNK) continue; - if (literal && strcmp(p, de->d_name)) - continue; - if (p2 && de->d_type && !S_ISDIR(de->d_type<<12) && !S_ISLNK(de->d_type<<12)) + + size_t l = strlen(de->d_name); + if (l >= PATH_MAX-pos) continue; + + if (p2) *p2 = 0; + + int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0) + | ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0); + + if (fnmatch(pat, de->d_name, fnm_flags)) continue; + /* With GLOB_PERIOD, don't allow matching . or .. unless * fnmatch would match them with FNM_PERIOD rules in effect. */ if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.' && (!de->d_name[1] || de->d_name[1]=='.' && !de->d_name[2]) - && fnmatch(p, de->d_name, fnm_flags | FNM_PERIOD)) + && fnmatch(pat, de->d_name, fnm_flags | FNM_PERIOD)) continue; - if (*d) { - memcpy(name, d, l); - name[l] = '/'; - strcpy(name+l+1, de->d_name); - } else { - name = de->d_name; - } - if (p2) { - if ((error = match_in_dir(name, p2, flags, errfunc, tail))) { - closedir(dir); - return error; - } - } else { - int mark = 0; - if (flags & GLOB_MARK) { - if (de->d_type && !S_ISLNK(de->d_type<<12)) - mark = S_ISDIR(de->d_type<<12); - else { - struct stat st; - stat(name, &st); - mark = S_ISDIR(st.st_mode); - } - } - if (append(tail, name, l+de->d_reclen+1, mark)) { - closedir(dir); - return GLOB_NOSPACE; - } + + memcpy(buf+pos, de->d_name, l+1); + if (p2) *p2 = '/'; + int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", flags, errfunc, tail); + if (r) { + closedir(dir); + return r; } } + int readerr = errno; + if (p2) *p2 = '/'; closedir(dir); - if (error && (errfunc(d, error) || (flags & GLOB_ERR))) + if (readerr && (errfunc(buf, errno) || (flags & GLOB_ERR))) return GLOB_ABORTED; + errno = old_errno; return 0; } @@ -164,19 +173,12 @@ static int sort(const void *a, const void *b) int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g) { - const char *p=pat, *d; struct match head = { .next = NULL }, *tail = &head; size_t cnt, i; size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0; int error = 0; + char buf[PATH_MAX]; - if (*p == '/') { - for (; *p == '/'; p++); - d = "/"; - } else { - d = ""; - } - if (!errfunc) errfunc = ignore_err; if (!(flags & GLOB_APPEND)) { @@ -185,9 +187,14 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i g->gl_pathv = NULL; } - if (strnlen(p, PATH_MAX+1) > PATH_MAX) return GLOB_NOSPACE; + if (*pat) { + char *p = strdup(pat); + if (!p) return GLOB_NOSPACE; + buf[0] = 0; + error = do_glob(buf, 0, 0, p, flags, errfunc, &tail); + free(p); + } - if (*pat) error = match_in_dir(d, p, flags, errfunc, &tail); if (error == GLOB_NOSPACE) { freelist(&head); return error; |