Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active December 22, 2018 17:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vurtun/a4a06e7b5f3cc82cd76fef58428942f1 to your computer and use it in GitHub Desktop.
Save vurtun/a4a06e7b5f3cc82cd76fef58428942f1 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
/* ---------------------------------------------------------------------------
* ARG
* --------------------------------------------------------------------------- */
static char *argv0;
#define ARG_END }}
#define ARGC() argc_
#define ARG_BEGIN \
for (argv0 = *argv, argv++, argc--; argv[0] && argv[0][1] && argv[0][0] == '-'; argc--, argv++) {\
char argc_, **argv_; int brk_;\
if (argv[0][1] == '-' && argv[0][2] == '\0')\
{argv++; argc--; break;}\
for (brk_ = 0, argv[0]++, argv_ = argv; argv[0][0] && !brk_; argv[0]++) {\
if (argv_ != argv) break;\
argc_ = argv[0][0];\
switch (argc_)
#define ARG_EF(x) \
((argv[0][1] == '\0' && argv[1] == 0)?\
((x), abort(), (char *)0) :\
(brk_ = 1, (argv[0][1] != '\0')?\
(&argv[0][1]) :\
(argc--, argv++, argv[0])))
/* ---------------------------------------------------------------------------
* UTIL
* --------------------------------------------------------------------------- */
#define cast(t,p) ((t)(p))
#define szof(a) ((int)sizeof(a))
#define min(a,b) ((a)<(b)?(a):(b))
#define max(a,b) ((a)>(b)?(a):(b))
#define cntof(a) ((int)(sizeof(a)/sizeof((a)[0])))
#define offsetof(st,m) ((int)((uintptr_t)&(((st*)0)->m)))
#define streq(a,b) (strcmp(a,b) == 0)
#define strneq(a,b,n) (strncmp(a,b,n) == 0)
#define alignof(t) ((int)((char*)(&((struct {char c; t _h;}*)0)->_h) - (char*)0))
#define aligned(x,a) (!((uintptr_t)(x) & ((uintptr_t)(a-1))))
#define align(x,mask) ((void*)(((intptr_t)((const char*)(x)+(mask-1))&~(mask-1))))
#define align_mask(a) ((a)-1)
#define align_down_masked(n,m) ((n) & ~(m))
#define align_down(n,a) align_down_masked(n, align_mask(a))
#define align_up(n,a) align_down((n) + align_mask(a), (a))
#define align_down_ptr(p,a) ((void*)align_down((uintptr_t)(p),(a)))
#define align_up_ptr(p,a) ((void*)align_up((uintptr_t)(p),(a)))
static void
die(const char *fmt, ...)
{
va_list args;
fflush(stdout);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(EXIT_FAILURE);
}
static void*
xrealloc(void *p, int size)
{
p = realloc(p, (size_t)size);
if (!p) {
perror("xrealloc failed!\n");
exit(1);
} return p;
}
static void*
xmalloc(int size)
{
void *p = calloc(1,(size_t)size);
if (!p) {
perror("xmalloc failed!\n");
exit(1);
} return p;
}
/* ---------------------------------------------------------------------------
* STRING
* --------------------------------------------------------------------------- */
typedef unsigned short wchr;
static int strmatch_here(const char *regexp, const char *text);
static int strmatch_star(int c, const char *regexp, const char *text);
static int strmatch(const char *text, const char *regexp);
static wchr*
wstr_utf8(wchr *buf, const char *ostr, int n)
{
const unsigned char *s = (const unsigned char*)ostr;
unsigned long c;
int i = 0;
--n;
while (*s) {
if (i >= n)
return 0;
if (!(*s & 0x80))
buf[i++] = *s++;
else if ((*s & 0xe0) == 0xc0) {
if (*s < 0xc2) return 0;
c = (unsigned long)((*s++ & 0x1f) << 6);
if ((*s & 0xc0) != 0x80) return 0;
buf[i++] = (wchr)(c + (*s++ & 0x3f));
} else if ((*s & 0xf0) == 0xe0) {
if (*s == 0xe0 && (s[1] < 0xa0 || s[1] > 0xbf)) return 0;
if (*s == 0xed && s[1] > 0x9f) return 0;
c = (unsigned long)((*s++ & 0x0f) << 12);
if ((*s & 0xc0) != 0x80) return 0;
c += (unsigned long)((*s++ & 0x3f) << 6);
if ((*s & 0xc0) != 0x80) return 0;
buf[i++] = (wchr)(c + (*s++ & 0x3f));
} else if ((*s & 0xf8) == 0xf0) {
if (*s > 0xf4) return 0;
if (*s == 0xf0 && (s[1] < 0x90 || s[1] > 0xbf)) return 0;
if (*s == 0xf4 && s[1] > 0x8f) return 0;
c = (unsigned long)((*s++ & 0x07) << 18);
if ((*s & 0xc0) != 0x80) return 0;
c += (unsigned long)((*s++ & 0x3f) << 12);
if ((*s & 0xc0) != 0x80) return 0;
c += (unsigned long)((*s++ & 0x3f) << 6);
if ((*s & 0xc0) != 0x80) return 0;
c += (unsigned long)((*s++ & 0x3f));
/* utf-8 encodings of values used in surrogate pairs are invalid */
if ((c & 0xFFFFF800) == 0xD800) return 0;
if (c >= 0x10000) {
c -= 0x10000;
if (i + 2 > n) return 0;
buf[i++] = (wchr)(0xD800 | (0x3ff & (c >> 10)));
buf[i++] = (wchr)(0xDC00 | (0x3ff & (c )));
}
} else return 0;
} buf[i] = 0;
return buf;
}
static char*
utf8_wstr(char *buf, const wchr *str, int n)
{
int i=0;
--n;
while (*str) {
if (*str < 0x80) {
if (i+1 > n) return 0;
buf[i++] = (char) *str++;
} else if (*str < 0x800) {
if (i+2 > n) return 0;
buf[i++] = (char)(0xc0 + (*str >> 6));
buf[i++] = (char)(0x80 + (*str & 0x3f));
str += 1;
} else if (*str >= 0xd800 && *str < 0xdc00) {
unsigned long c;
if (i+4 > n) return 0;
c = (unsigned long)(((str[0] - 0xd800) << 10) + ((str[1]) - 0xdc00) + 0x10000);
buf[i++] = (char)(0xf0 + (c >> 18));
buf[i++] = (char)(0x80 + ((c >> 12) & 0x3f));
buf[i++] = (char)(0x80 + ((c >> 6) & 0x3f));
buf[i++] = (char)(0x80 + ((c ) & 0x3f));
str += 2;
} else if (*str >= 0xdc00 && *str < 0xe000) {
return 0;
} else {
if (i+3 > n) return 0;
buf[i++] = (char)(0xe0 + (*str >> 12));
buf[i++] = (char)(0x80 + ((*str >> 6) & 0x3f));
buf[i++] = (char)(0x80 + ((*str ) & 0x3f));
str += 1;
}
} buf[i] = 0;
return buf;
}
static wchr*
wstr___utf8(char *str)
{
static wchr buf[4*1024];
return wstr_utf8(buf, str, szof(buf));
}
static char*
utf8__wstr(const wchr *str)
{
static char buf[4*1024];
return utf8_wstr(buf, str, szof(buf));
}
static int
strscpy(char *d, const char *s, int n)
{
int cnt = (int)strlen(s);
if (cnt >= n) {
if (n > 0) d[0] = 0;
return 0;
}
strcpy(d, s);
return cnt + 1;
}
static int
strmatch_here(const char *regexp, const char *text)
{
if (regexp[0] == '\0')
return 1;
if (regexp[1] == '*')
return strmatch_star(regexp[0], regexp+2, text);
if (regexp[0] == '$' && regexp[1] == '\0')
return *text == '\0';
if (*text!='\0' && (regexp[0]=='?' || regexp[0]==*text))
return strmatch_here(regexp+1, text+1);
return 0;
}
static int
strmatch_star(int c, const char *regexp, const char *text)
{
do if (strmatch_here(regexp, text)) return 1;
while (*text != '\0' && (*text++ == c || c == '?'));
return 0;
}
static int
strmatch(const char *text, const char *regexp)
{
if (regexp[0] == '^')
return strmatch_here(regexp+1, text);
do if (strmatch_here(regexp, text)) return 1;
while (*text++ != '\0');
return 0;
}
static int
is_wildcard(int ch)
{
return (ch == '?' || ch == '^' || ch == '$' || ch == '*');
}
static int
str_is_regex(const char *str)
{
const char *s = str;
if (s == 0) return 0;
while (*s && !is_wildcard(*s)) s++;
return *s != 0;
}
/* ---------------------------------------------------------------------------
* Buffer
* --------------------------------------------------------------------------- */
struct buf_hdr {
int n, cap;
char buf[1];
};
#define buf__hdr(b) ((struct buf_hdr*)(void*)((char*)(b) - offsetof(struct buf_hdr, buf)))
#define buf__fits(b,n) (buf_cnt(b) + (n) <= buf_cap(b))
#define buf_space(t,n) (sizeof(struct buf_hdr)-1+sizeof(t)*n+8)
#define buf_cnt(b) ((b) ? buf__hdr(b)->n: 0)
#define buf_cap(b) ((b) ? abs(buf__hdr(b)->cap): 0)
#define buf_begin(b) ((b) + 0)
#define buf_end(b) ((b) + buf_cnt(b))
#define buf_fit(b,n) (buf__fits((b), (n)) ? 0: ((b) = buf__grow((b),buf_cnt(b)+(n), sizeof(*(b)))))
#define buf_push(b, x) (buf_fit((b),1), (b)[buf__hdr(b)->n++] = x)
#define buf_free(b) ((!(b)) ? 0: (buf__hdr(b)->cap <= 0) ? (b) = 0: (free(buf__hdr(b)), (b) = 0))
#define buf_clear(b) ((b) ? buf__hdr(b)->len = 0 : 0)
static void*
buf__grow(void *buf, int new_len, int elem_size)
{
struct buf_hdr *hdr = 0;
int cap = buf_cap(buf);
int new_cap = max(2*cap + 1, new_len);
int new_size = offsetof(struct buf_hdr, buf) + new_cap*elem_size;
assert(new_len <= new_cap);
if (!buf) {
hdr = xmalloc(new_size);
hdr->n = 0;
} else if (buf__hdr(buf)->cap < 0) {
hdr = xmalloc(new_size);
hdr->n = buf_cnt(buf);
memcpy(hdr->buf, buf__hdr(buf)->buf, (size_t)(cap*elem_size));
} else hdr = realloc(buf__hdr(buf), (size_t)new_size);
hdr->cap = new_cap;
return hdr->buf;
}
static void*
buf_fixed(void *buf, int n)
{
struct buf_hdr *hdr = align(buf,8);
hdr->cap = -n, hdr->n = 0;
return hdr->buf;
}
/* ---------------------------------------------------------------------------
* Arena
* --------------------------------------------------------------------------- */
#define ARENA_ALIGNMENT 8
#define ARENA_BLOCK_SIZE (512*1024)
struct arena {
char *ptr;
char *end;
char **blks;
};
static void
arena__grow(struct arena *a, int min_size)
{
min_size = align_up(min_size, ARENA_ALIGNMENT);
a->ptr = xmalloc(min_size);
a->end = a->ptr + min_size;
buf_push(a->blks, a->ptr);
}
static void*
arena_push(struct arena *a, int size)
{
char *p = 0;
if (size > (a->end - a->ptr)) {
int min_size = max(size, ARENA_BLOCK_SIZE);
arena__grow(a, min_size);
}
p = a->ptr;
a->ptr = align(p + size, ARENA_ALIGNMENT);
assert(a->ptr < a->end);
return p;
}
static char*
arena_printf(struct arena *a, const char *fmt, ...)
{
int n = 0;
char *res = 0;
va_list args;
va_start(args, fmt);
n = 1 + vsnprintf(0, 0, fmt, args);
va_end(args);
res = arena_push(a, n);
va_start(args, fmt);
vsnprintf(res, (size_t)n, fmt, args);
va_end(args);
return res;
}
static void
arena_free(struct arena *a)
{
int i = 0;
for (i = 0; i < buf_cnt(a->blks); ++i)
free(a->blks[i]);
}
/* ---------------------------------------------------------------------------
* DIRECTORY
* --------------------------------------------------------------------------- */
#ifdef _MSC_VER
#include <io.h>
#else
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
#endif
enum lsdir_flags {
LS_FILES = 0x01,
LS_DIRS = 0x02
};
enum lsdir_option {
LS_FILES_ONLY = LS_FILES,
LS_DIRS_ONLY = LS_DIRS,
LS_FULL = LS_FILES|LS_DIRS
};
static void
fixpath(char *path)
{
for(;*path; ++path)
if (*path == '\\')
*path = '/';
}
static int
is_dot_dirname(const char *n)
{
if (n[0] == '.')
return (n[1] == '.') ? !n[2]: !n[1];
return 0;
}
static char**
lsdir(const char *dir, struct arena *a, unsigned flags, const char *filter)
{
char **res = 0;
char buf[4096];
char with_slash[4096];
int n;
#ifdef _MSC_VER
stb__wchar *ws;
struct _wfinddata_t data;
#ifdef _WIN64
const intptr_t none = -1;
intptr_t z;
#else
const long none = -1;
long z;
#endif
#else /* !_MSC_VER */
const DIR *none = 0;
DIR *z;
#endif
flags = (flags == 0) ? LS_FULL: flags;
n = strscpy(buf,dir,szof(buf));
if (!n || n >= szof(buf))
return 0;
fixpath(buf);
n--;
if (n > 0 && (buf[n-1] != '/'))
buf[n++] = '/';
buf[n] = 0;
if (!strscpy(with_slash,buf,szof(with_slash)))
return 0;
#ifdef _MSC_VER
if (!strscpy(buf+n,"*.*",szof(buf)-n))
return 0;
ws = wstr__utf8(buf);
z = _wfindfirst((const wchar_t *)ws, &data);
#else
z = opendir(dir);
#endif
if (z != none) {
int nonempty = 1;
#ifndef _MSC_VER
struct dirent *data = readdir(z);
nonempty = (data != 0);
#endif
if (nonempty) {
do {int is_dir = 0;
#ifdef _MSC_VER
char *name = utf8__wstr((stb__wchar *)data.name);
if (n == 0) {
fprintf(stderr, "Unable to convert '%s' to utf8!\n", data.name);
continue;
}
is_subdir = (data.attrib & _A_SUBDIR) ? 1: 0;
#else
char *name = data->d_name;
if (!strscpy(buf+n,name,szof(buf)-n)) break;
is_dir = (data->d_type & DT_DIR) ? 1: 0;
#endif
if ((is_dir == 0 && (flags & LS_FILES)) ||
(is_dir == 1 && (flags & LS_DIRS) && is_dot_dirname(name))) {
if (filter == 0 || strmatch(name, filter))
buf_push(res, arena_printf(a, "%s%s", with_slash, name));
}
}
#ifdef _MSC_VER
while (0 == _wfindnext(z, &data));
#else
while ((data = readdir(z)) != 0);
#endif
}
#ifdef _MSC_VER
_findclose(z);
#else
closedir(z);
#endif
}
return res;
}
/* ---------------------------------------------------------------------------
* APP
* --------------------------------------------------------------------------- */
static struct arena arena;
static char **files_intro = 0;
static char **files_pub = 0;
static char **files_priv = 0;
static char **files_outro = 0;
static char *file_out = 0;
static char *prefix = 0;
static char *work_dir = "./";
static void
usage(const char *app)
{
die("\n"
"usage: %s -w <dir> [options]\n"
"\n"
" options:\n"
"\n"
" -w <dir> Workspace directory. Defaults to executable directory\n"
" -p <prefix> Namespacing prefix\n"
" -i <files> specifies a list of intro files\n"
" -g <files> specifies a list of public header source files\n"
" -l <files> specifies a list of private implementation source files\n"
" -a <files> specifies a list of outro files\n"
" -o <file> specifies an output file to store the resulting output\n"
" -h, print this help messages\n"
"\n"
" wildcards: (Example: ?*.c)\n"
"\n"
" c matches any literal character c\n"
" ? matches any single character\n"
" ^ matches the beginning of the input string\n"
" $ matches the end of the input string\n"
" * matches zero or more occurrences of the previous character\n"
"\n",
app
);
}
static int
iswhite(int ch)
{
return (ch == ' ' || ch == '\t' || ch == '\n');
}
static void
remove_whitespace(char **begin, char **end)
{
char *b = *begin, *e = *end;
while (b < e && iswhite(*b)) ++b;
while (e > b && iswhite(*(e-1))) --e;
*begin = b, *end = e;
}
static int
isnewline(int ch)
{
return (ch == '\n' || ch == '\r');
}
static char*
skipline(char *s)
{
while (*s != 0 && isnewline(*s) == 0) s++;
if (isnewline(s[0]))
return s + ((s[0] + s[1] == '\r' + '\n') ? 2: 1);
return s;
}
static void
path_split(char **path, char **name, char *full_path)
{
*path = full_path;
*name = full_path + strlen(full_path);
while (*name > *path) {
if (*name[0] == '/')
break;
*name -= 1;
}
if (*path == *name) {
*path = "./";
} else if (*name[0] == '/') {
*name[0] = '\0';
*name += 1;
}
}
static char**
add_path(char **list, const char *arg)
{
int i = 0;
char *full_path = arena_printf(&arena, "%s%s", work_dir, arg);
fixpath(full_path);
if (str_is_regex(full_path)) {
char *path = 0, *name = 0;
path_split(&path, &name, full_path);
if (str_is_regex(name)) {
char **files = lsdir(path, &arena, LS_FILES_ONLY, name);
for (i = 0; i < buf_cnt(files); ++i)
buf_push(list, files[i]);
buf_free(files);
} else printf("Invalid regex wildcard: %s", arg);
} else buf_push(list, full_path);
return list;
}
static char**
parse_filenames(char ***list, char **argv)
{
char **l = *list;
while (*argv != 0) {
char *arg = argv[0], *sep = 0;
if (*argv[0] == '-') break;
while ((sep = strchr(arg, ',')) != 0) {
char *end = sep;
remove_whitespace(&arg, &end);
*end = '\0';
l = add_path(l, arg);
arg = sep + 1;
}
if (arg[0] != 0) {
char *end = arg + strlen(arg);
remove_whitespace(&arg, &end);
*end = '\0';
l = add_path(l, arg);
} argv++;
} *list = l;
return argv;
}
static char*
file_load(const char* path, int* siz)
{
char *buf = 0;
FILE *fd = 0;
long ret = 0;
fd = fopen(path, "r");
if (fd == 0) die("Failed to open file: %s\n", path);
fseek(fd, 0, SEEK_END);
ret = ftell(fd);
if (ret < 0) {
die("Failed to access file: %s\n", path);
return 0;
}
*siz = (int)ret;
fseek(fd, 0, SEEK_SET);
buf = xmalloc(*siz+1);
buf[*siz] = 0;
*siz = (int)fread(buf, 1, (size_t)*siz, fd);
fclose(fd);
return buf;
}
int main(int argc, char **argv)
{
int i = 0;
FILE *fout = 0;
/* parse arguments */
ARG_BEGIN {
case 'p': prefix = ARG_EF(usage(argv0)); break;
case 'i': argv = parse_filenames(&files_intro, argv+1)-1; break;
case 'g': argv = parse_filenames(&files_pub, argv+1)-1; break;
case 'l': argv = parse_filenames(&files_priv, argv+1)-1; break;
case 'a': argv = parse_filenames(&files_outro, argv+1)-1; break;
case 'o': file_out = ARG_EF(usage(argv0)); break;
case 'h': default: usage(argv0); break;
case 'w': {
int n = 0;
work_dir = ARG_EF(usage(argv0));
n = (int)strlen(work_dir);
if (n && (work_dir[n-1] != '/' && work_dir[n-1] != '\\'))
work_dir = arena_printf(&arena, "%s/", work_dir);
} break;}
ARG_END;
if (!prefix) usage(argv0);
/* open output file */
if (file_out) {
fout = fopen(file_out, "w");
if (!fout) die("Failed to open output file %s\n", file_out);
} else fout = stdout;
/* output intro files */
fprintf(fout, "/*\n");
for (i = 0; i < buf_cnt(files_intro); ++i) {
int siz = 0;
char *f = file_load(files_intro[i], &siz);
fwrite(f, (size_t)siz, 1, fout);
free(f);
} fprintf(fout, "*/\n");
/* output public prefix */
fprintf(fout, "#ifndef NK_SINGLE_FILE\n");
fprintf(fout, " #define NK_SINGLE_FILE\n");
fprintf(fout, "#endif\n");
fprintf(fout, "\n");
/* output public files */
for (i = 0; i < buf_cnt(files_pub); ++i) {
int siz = 0;
char *f = file_load(files_pub[i], &siz);
fwrite(f, (size_t)siz, 1, fout);
free(f);
}
/* output private files */
fprintf(fout, "#ifdef %s_IMPLEMENTATION\n", prefix);
for (i = 0; i < buf_cnt(files_priv); ++i) {
int siz = 0;
char *f = file_load(files_priv[i], &siz);
char *b = f, *inc = 0;
if (strstr(files_priv[i], ".c") != 0) {
/* skip includes in source files */
while ((inc = strstr(b, "#include")) != 0) {
fwrite(b, (size_t)(inc - b), 1, fout);
b = skipline(inc);
}
} fwrite(b, (size_t)((f + siz) - b), 1, fout);
free(f);
} fprintf(fout, "#endif /* %s_IMPLEMENTATION */\n", prefix);
/* output outro files */
fprintf(fout, "/*\n");
for (i = 0; i < buf_cnt(files_outro); ++i) {
int siz = 0;
char *f = file_load(files_outro[i], &siz);
fwrite(f, (size_t)siz, 1, fout);
free(f);
} fprintf(fout, "*/\n");
/* close output file */
if (fout != stdout) {
fflush(fout);
fclose(fout);
} return 0;
}
@dumblob
Copy link

dumblob commented Apr 10, 2018

This will sound really rude, but I'll do anyway 😉.

Why didn't you write this app paq in POSIX sh? To my knowledge this works on MacOSX, BSDs, Linux, and of course on Windows (the best approach known to me is to install Git for Windows as it works on any Windows system unlike WSL, is small, is maintained, and provides in addition a full seamless access to all cmd.exe applications). This sh solution would be also just very few lines of code long and therefore far less error prone and time saving than the current solution in C.

It's always discouraged to reinvent the wheel - e.g. the "regexes" in this paq.c are not standardized anywhere. Non-standard solutions in the long term are making apps quite unpopular.

@vurtun
Copy link
Author

vurtun commented Apr 11, 2018

This will sound really rude, but I'll do anyway

Don't worry it is not, at least if it comes from you 😄 .

Why didn't you write this app paq in POSIX sh?

Or python. Works on windows, mac and linux as well 😆. Actually this is just the C implementation of nuklears build.py. The reason why I wrote it was to test how much code it would take to write the same functionality in C for comparison. Especially since 80% is just copy & pasted from other sources so the time it took to write was quite short. Also one interesting aspect of using C as a script language is it works everywhere once compiled. In addition it is possible to use tcc in interpreter mode. In that case you don't even have to compile it and just run it, Basically I wanted to test sean barrett approach of writing small programs in C. Both from productivity perspective as well as running it using the tcc interpreter.

It's always discouraged to reinvent the wheel - e.g. the "regexes" in this paq.c are not standardized anywhere. Non-standard solutions in the long term are making apps quite unpopular.

Honestly I quite like the regex implemented here. It is a lot simpler than any existing regex implementation but still provides the most commonly used functionality. Problem is only that it is user facing which makes it more a no go, since everyone else has a different idea of how regex looks like. Still what I like about this version is that '.' for file path does actually just work. So many times I have seen that mistake made for file processing apps so I further changed each regex symbol.

Would I use it in production? Probably not. For my stuff however I prefer this small implementation over the real deal.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment