Kaz Kylheku
2023-09-12 03:49:16 UTC
Hi all,
I'm experimenting with a wrapper function that drop-in replaces
for the POSIX glob, but gives it the /**/ superpower.
The /**/ pattern matches zero or more path components.
I have a prototype here which works like this.
If the /**/ sub-pattern does not occur in pattern, then
it just calls glob, passing it all its parameters.
If the /**/ sub-pattern occurs in the pattern, then
it iterates on it, successively replacing it with
/, /*/, /*/*/, /*/*/*/, ... and calling itself recursively.
After the first recursive call, it adds GLOB_APPEND
to the flags.
There are issues to do with termination (when do we stop?)
and performance.
In the prototype, I have the recursion generate a maximum of 48 /*/ star
wildcards across the entire path, and each /**/ pattern can individually
expand to no more than 10.
Multiple occurrences of /**/ drag down the performance of the prototype
badly. Up to three is what I would call practical.
The real function should handle patterns starting with "**/" and also
ending in "/**", as well as when "**" is the entire pattern.
Plus there are issues of sorting. We might want to collect results with
GLOB_NOSORT and sort the paths ourselves.
I'm already thinking forward to a different algorithm, but
here is the prototype.
#include <glob.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
static int super_glob_rec(const char *pattern, int flags,
int (*errfunc) (const char *epath, int eerrno),
glob_t *pglob, size_t star_limit)
{
const char *dblstar = strstr(pattern, "/**/");
if (dblstar == 0) {
return glob(pattern, flags, errfunc, pglob);
} else {
size_t i, base_len = strlen(pattern);
size_t ds_off = dblstar - pattern + 1;
size_t tail_off = ds_off + 3;
size_t limit = star_limit > 10 ? 10 : star_limit;
for (i = 0; i < limit; i++) {
size_t space = base_len - 3 + i * 2;
char *pat_copy = malloc(space + 1);
size_t j;
char *out = pat_copy + ds_off;
int res;
strncpy(pat_copy, pattern, ds_off);
for (j = 0; j < i; j++) {
*out++ = '*';
*out++ = '/';
}
strcpy(out, pattern + tail_off);
if (i > 0)
flags |= GLOB_APPEND;
res = super_glob_rec(pat_copy, flags, errfunc, pglob, star_limit - i);
free(pat_copy);
if (res && res != GLOB_NOMATCH)
return res;
}
return 0;
}
}
static int super_glob(const char *pattern, int flags,
int (*errfunc) (const char *epath, int eerrno),
glob_t *pglob)
{
return super_glob_rec(pattern, flags, errfunc, pglob, 48);
}
int main(int argc, char **argv)
{
int status = EXIT_FAILURE;
if (argc == 2) {
glob_t glb;
int res = super_glob(argv[1], 0, NULL, &glb);
if (res && res != GLOB_NOMATCH) {
fprintf(stderr, "%s: glob failed with %d\n", argv[0], res);
} else {
for (size_t i = 0; i < glb.gl_pathc; i++)
puts(glb.gl_pathv[i]);
}
globfree(&glb);
} else if (argc == 1) {
fprintf(stderr, "%s: specify one glob pattern argument\n", argv[0]);
}
return status;
}
I'm experimenting with a wrapper function that drop-in replaces
for the POSIX glob, but gives it the /**/ superpower.
The /**/ pattern matches zero or more path components.
I have a prototype here which works like this.
If the /**/ sub-pattern does not occur in pattern, then
it just calls glob, passing it all its parameters.
If the /**/ sub-pattern occurs in the pattern, then
it iterates on it, successively replacing it with
/, /*/, /*/*/, /*/*/*/, ... and calling itself recursively.
After the first recursive call, it adds GLOB_APPEND
to the flags.
There are issues to do with termination (when do we stop?)
and performance.
In the prototype, I have the recursion generate a maximum of 48 /*/ star
wildcards across the entire path, and each /**/ pattern can individually
expand to no more than 10.
Multiple occurrences of /**/ drag down the performance of the prototype
badly. Up to three is what I would call practical.
The real function should handle patterns starting with "**/" and also
ending in "/**", as well as when "**" is the entire pattern.
Plus there are issues of sorting. We might want to collect results with
GLOB_NOSORT and sort the paths ourselves.
I'm already thinking forward to a different algorithm, but
here is the prototype.
#include <glob.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
static int super_glob_rec(const char *pattern, int flags,
int (*errfunc) (const char *epath, int eerrno),
glob_t *pglob, size_t star_limit)
{
const char *dblstar = strstr(pattern, "/**/");
if (dblstar == 0) {
return glob(pattern, flags, errfunc, pglob);
} else {
size_t i, base_len = strlen(pattern);
size_t ds_off = dblstar - pattern + 1;
size_t tail_off = ds_off + 3;
size_t limit = star_limit > 10 ? 10 : star_limit;
for (i = 0; i < limit; i++) {
size_t space = base_len - 3 + i * 2;
char *pat_copy = malloc(space + 1);
size_t j;
char *out = pat_copy + ds_off;
int res;
strncpy(pat_copy, pattern, ds_off);
for (j = 0; j < i; j++) {
*out++ = '*';
*out++ = '/';
}
strcpy(out, pattern + tail_off);
if (i > 0)
flags |= GLOB_APPEND;
res = super_glob_rec(pat_copy, flags, errfunc, pglob, star_limit - i);
free(pat_copy);
if (res && res != GLOB_NOMATCH)
return res;
}
return 0;
}
}
static int super_glob(const char *pattern, int flags,
int (*errfunc) (const char *epath, int eerrno),
glob_t *pglob)
{
return super_glob_rec(pattern, flags, errfunc, pglob, 48);
}
int main(int argc, char **argv)
{
int status = EXIT_FAILURE;
if (argc == 2) {
glob_t glb;
int res = super_glob(argv[1], 0, NULL, &glb);
if (res && res != GLOB_NOMATCH) {
fprintf(stderr, "%s: glob failed with %d\n", argv[0], res);
} else {
for (size_t i = 0; i < glb.gl_pathc; i++)
puts(glb.gl_pathv[i]);
}
globfree(&glb);
} else if (argc == 1) {
fprintf(stderr, "%s: specify one glob pattern argument\n", argv[0]);
}
return status;
}
--
TXR Programming Language: http://nongnu.org/txr
Cygnal: Cygwin Native Application Library: http://kylheku.com/cygnal
Mastodon: @***@mstdn.ca
NOTE: If you use Google Groups, I don't see you, unless you're whitelisted.
TXR Programming Language: http://nongnu.org/txr
Cygnal: Cygwin Native Application Library: http://kylheku.com/cygnal
Mastodon: @***@mstdn.ca
NOTE: If you use Google Groups, I don't see you, unless you're whitelisted.