Skip to content

Instantly share code, notes, and snippets.

@rain-1
Created January 8, 2021 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rain-1/f24216a0c75946b8a46984cd31d6f304 to your computer and use it in GitHub Desktop.
Save rain-1/f24216a0c75946b8a46984cd31d6f304 to your computer and use it in GitHub Desktop.
S as a single file
#include <assert.h>
#include <errno.h>
#include <libgen.h>
#include <limits.h>
#include <linux/limits.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
///// headers
typedef enum { STRPORT_CHAR, STRPORT_FILE } Strport;
typedef struct {
Strport kind;
/* kind=STRPORT_CHAR */
char *text;
int place;
/* kind=STRPORT_FILE */
FILE *fptr;
} string_port;
int port_peek(string_port *port);
int port_eof(string_port *port);
int port_getc(string_port *port);
/*
* Copy me if you can.
* by 20h
*/
#ifndef ARG_H__
#define ARG_H__
extern char *argv0;
/* use main(int argc, char *argv[]) */
#define ARGBEGIN for (argv0 = basename(*argv), argv++, argc--;\
argv[0] && argv[0][0] == '-'\
&& argv[0][1];\
argc--, argv++) {\
char argc_;\
char **argv_;\
int brk_;\
if (argv[0][1] == '-' && argv[0][2] == '\0') {\
argv++;\
argc--;\
break;\
}\
for (brk_ = 0, argv[0]++, argv_ = argv;\
argv[0][0] && !brk_;\
argv[0]++) {\
if (argv_ != argv)\
break;\
argc_ = argv[0][0];\
switch (argc_)
#define ARGEND }\
} argc++; argv--
#define ARGC() argc_
#define EARGF(x) ((argv[0][1] == '\0' && argv[1] == NULL)?\
((x), abort(), (char *)0) :\
(brk_ = 1, (argv[0][1] != '\0')?\
(&argv[0][1]) :\
(argc--, argv++, argv[0])))
#define ARGF() ((argv[0][1] == '\0' && argv[1] == NULL)?\
(char *)0 :\
(brk_ = 1, (argv[0][1] != '\0')?\
(&argv[0][1]) :\
(argc--, argv++, argv[0])))
#endif
/* see LICENSE file for copyright and license details */
#define MAX(A, B) ((A) > (B) ? (A) : (B))
#define MIN(A, B) ((A) < (B) ? (A) : (B))
#define BETWEEN(X, A, B) ((A) <= (X) && (X) <= (B))
#define LEN(X) (sizeof(X) / sizeof((X)[0]))
void *ecalloc(size_t nmemb, size_t size);
void *emalloc(size_t size);
void *erealloc(void *p, size_t size);
char *estrdup(char *s);
void efree(void *p);
/* see LICENSE file for copyright and license details */
typedef struct {
size_t len;
size_t alloc_len;
void **pointers;
} region;
void region_create(region *r);
void* region_malloc(region *r, size_t size);
void* region_realloc(region *r, void *v, size_t size);
void region_free(region *r);
/* see LICENSE file for copyright and license details */
#define TOK_MAX 4096
#define MAX_TOKS_PER_LINE 4096
extern char tok_buf[TOK_MAX];
void skip_newline(string_port *stream);
char **read_tokens(region *r, string_port *stream);
/* see LICENSE file for copyright and license details */
typedef enum {
NODE_COMMAND,
NODE_PIPE,
NODE_CONJ,
NODE_DISJ,
} NodeType;
struct AST {
NodeType type;
union {
char **tokens;
struct {
struct AST *l;
struct AST *r;
} child;
} node;
};
extern char *operator_for[];
struct AST *parse(region *r, string_port *port, int *bg_flag);
/* see LICENSE file for copyright and license details */
extern char variable_name[TOK_MAX];
void vars_set(char **argv);
void vars_unset(void);
char *expand_variables(region *r, char *tok, int t);
/* see LICENSE file for copyright and license details */
void interpret(struct AST* n);
int parse_and_execute(string_port *port, char **string_capture);
void interpreter_loop(FILE *f);
/* see LICENSE file for copyright and license details */
/* report errors or warnings */
extern char *argv0;
extern int debug;
extern int interactive_mode;
#define reportprint(E, M, ...) do { \
if (debug) \
fprintf(stderr, "%s:%d: %s: " M "\n", __FILE__, __LINE__, \
!E || interactive_mode ? "warning" : "error", ##__VA_ARGS__); \
else \
fprintf(stderr, "%s: " M "\n", argv0, ##__VA_ARGS__); \
} while(0)
#define reporterr(M, ...) do { \
reportprint(1, M, ##__VA_ARGS__); \
exit(1); \
} while(0)
#define _reporterr(M, ...) do { \
reportprint(1, M, ##__VA_ARGS__); \
_exit(1); \
} while(0)
#define report(M, ...) do { \
reportprint(0, M, ##__VA_ARGS__); \
if (!interactive_mode) \
exit(1); \
} while(0)
#define reportret(R, M, ...) do { \
reportprint(0, M, ##__VA_ARGS__); \
if (!interactive_mode) \
exit(1); \
else \
return R; \
} while(0)
#define reportvar(V, M) do { \
V = M; \
return NULL; \
} while(0)
/* see LICENSE file for copyright and license details */
typedef struct {
char *name;
void (*func)(char **);
} Builtin;
int perform_builtin(struct AST *n);
void builtin_source(char **argv);
void builtin_cd(char **argv);
void builtin_set(char **argv);
void builtin_unset(char **argv);
void builtin_exit(char **argv);
/////
#define VERSION "0.1"
char *argv0;
int debug = 0;
void
handler_sigint(int sig)
{
sig = sig; /* get rid of compiler warnings from -Wextra */
/* signal(sig, SIG_IGN); */
}
static void
usage(int eval)
{
fprintf(stderr, "usage: %s [-dvh] [SCRIPT ...]\n", argv0);
exit(eval);
}
int
main(int argc, char **argv)
{
FILE *f;
ARGBEGIN {
case 'd':
debug = 1;
break;
case 'v':
printf("%s v%s (c) 2014, 2016, 2017 s team\n", argv0, VERSION);
return 0;
case 'h':
usage(0);
return 0;
default:
usage(1);
return 1;
} ARGEND;
signal(SIGINT, handler_sigint);
setenv("SHELL", "/bin/s", 1);
if (argc == 1) {
f = stdin;
interactive_mode = isatty(fileno(stdin));
} else {
if (!(f = fopen(argv[1], "r")))
reporterr("source: %s: could not load file", argv[1]);
vars_set(argv);
interactive_mode = 0;
}
interpreter_loop(f);
return 0;
}
char cwd[PATH_MAX];
char owd[PATH_MAX];
Builtin builtins[] = {
{ "source", &builtin_source },
{ "cd", &builtin_cd },
{ "set", &builtin_set },
{ "unset", &builtin_unset },
{ "exit", &builtin_exit },
};
int
perform_builtin(struct AST *n)
{
size_t i;
if (n->type != NODE_COMMAND || !n->node.tokens[0])
return 0;
for (i = 0; i < LEN(builtins); i++)
if (!strcmp(builtins[i].name, n->node.tokens[0])) {
(*builtins[i].func)(n->node.tokens);
return 1;
}
return 0;
}
void
builtin_source(char **argv)
{
FILE *f;
int mode;
if (!argv[1])
reportret(,"%s: argument required", argv[0]);
if (!(f = fopen(argv[1], "r")))
reportret(,"%s: %s: could not load file", argv[0], argv[1]);
mode = interactive_mode;
interactive_mode = 0;
vars_set(argv);
interpreter_loop(f);
interactive_mode = mode;
}
void
builtin_cd(char **argv)
{
char *dir;
int isowd = 0;
if (!(dir = argv[1])) {
if (!(dir = getenv("HOME")))
reportret(,"%s: invalid $HOME", argv[0]);
} else if (strcmp(dir, "-") == 0) {
if (!(dir = getenv("OWD")))
reportret(,"%s: invalid $OWD", argv[0]);
isowd = 1;
}
getcwd(owd, PATH_MAX);
if (chdir(dir)) {
report("%s: %s: could not change to directory", argv[0], dir);
} else {
getcwd(cwd, PATH_MAX);
setenv("PWD", cwd, 1);
setenv("OWD", owd, 1);
if (isowd)
printf("%s\n", dir);
}
}
void
builtin_set(char **argv)
{
if (argv[1] && argv[2])
setenv(argv[1], argv[2], INT_MAX);
else
report("%s: two arguments required", argv[0]);
}
void
builtin_unset(char **argv)
{
if (argv[1])
unsetenv(argv[1]);
else
report("%s: argument required", argv[0]);
}
void
builtin_exit(char **argv)
{
if (!argv[1])
exit(0);
else
exit(strtol(argv[1], NULL, 0));
}
char *operator_for[] = {
[NODE_PIPE] = "|",
[NODE_CONJ] = "&&",
[NODE_DISJ] = "||",
};
static struct AST *
parse_binop(region *r, char **toks, NodeType ty)
{
char **stoks = toks;
struct AST* n;
struct AST* m;
if (ty == NODE_COMMAND) {
if (!toks[0])
reportret(NULL, "zero-length command");
n = region_malloc(r, sizeof(struct AST));
n->type = NODE_COMMAND;
n->node.tokens = stoks;
return n;
}
while (toks[0]) {
if (!strcmp(operator_for[ty], toks[0])) {
toks[0] = NULL;
m = region_malloc(r, sizeof(struct AST));
m->type = ty;
if (!(m->node.child.l = parse_binop(r, stoks, ty-1)))
return NULL;
if (!(m->node.child.r = parse_binop(r, toks+1, ty)))
return NULL;
return m;
} else {
toks++;
}
}
return parse_binop(r, stoks, ty-1);
}
static struct AST *
parse_tokens(region *r, char **toks, int *bg)
{
int tokc = 0;
while (toks[tokc])
tokc++;
if (tokc > 0 && !strcmp("&", toks[--tokc])) {
*bg = 1;
toks[tokc] = NULL;
} else {
*bg = 0;
}
return parse_binop(r, toks, NODE_DISJ);
}
struct AST *
parse(region *r, string_port *port, int *bg)
{
char **toks = read_tokens(r, port);
if (!toks)
return NULL;
if (toks[0])
return parse_tokens(r, toks, bg);
return NULL;
}
enum {
EXPAND_DEFAULT,
EXPAND_NONE,
EXPAND_EVAL,
};
static int
is_quote(char c)
{
return c && strchr("\"'`", c);
}
static int
is_eot(char c)
{
return c && strchr(" \t\n\r#", c);
}
char escs[][2] = {
{ '\\', '\\' },
{ 't', '\t' },
{ 'n', '\n' },
{ 'r', '\r' },
{ '"', '"' },
{ '\'', '\'' },
{ '`', '`' }
};
static int
is_esc(int c)
{
size_t i;
for (i = 0; i < LEN(escs); i++)
if (escs[i][0] == c)
return 1;
return 0;
}
static void
skip_spaces(string_port *stream)
{
while (!port_eof(stream) &&
(port_peek(stream) == ' ' || port_peek(stream) == '\t'))
port_getc(stream);
}
static void
skip_spaces_and_comments(string_port *stream)
{
while (!port_eof(stream))
if (port_peek(stream) == ' ' || port_peek(stream) == '\t')
port_getc(stream);
else if (port_peek(stream) == '#')
while (!port_eof(stream) && port_getc(stream) != '\n') ;
else break;
}
void
skip_newline(string_port *stream)
{
skip_spaces(stream);
if (port_peek(stream) == '\n')
port_getc(stream);
}
/* returns -1 on failure, length of the token on success
* a word token cannot have length 0 but a string token can */
static int
read_token(char *tok_buf, string_port *stream, int *out_should_expand)
{
size_t len = 0;
size_t i = 0;
int escape_char, var = 0;
char c, quote;
/* TOK(c) adds a character c to the buffer, erroring if it went over the limit */
#define TOK(c) \
do { \
if (len >= TOK_MAX) \
reporterr("token too long"); \
tok_buf[len++] = c; \
} while(0)
*out_should_expand = EXPAND_DEFAULT;
/* this routine is used to read the next token in a 'line' of tokens therefore
* we need to exit if we hit a newline or a comment */
st_restart:
skip_spaces(stream);
if (port_eof(stream) ||
port_peek(stream) == '\n' ||
port_peek(stream) == '#')
return -1;
goto st_tok; /* parse using a state machine */
st_tok:
c = port_getc(stream);
if (is_quote(c)) {
quote = c;
escape_char = 0;
if (c == '\'')
*out_should_expand = EXPAND_NONE;
else if (c == '`')
*out_should_expand = EXPAND_EVAL;
goto st_string;
} else if (c == '~') {
TOK('$');
TOK('{');
TOK('H');
TOK('O');
TOK('M');
TOK('E');
TOK('}');
goto st_word_continue;
} else if (c == '\\') {
if (port_peek(stream) == '\n') {
port_getc(stream);
goto st_restart;
} else
goto st_word;
} else if (c == '$') {
var = 1;
goto st_word;
} else {
goto st_word;
}
st_word:
if (c == '\\') {
if (port_eof(stream))
return -1;
c = port_getc(stream);
}
TOK(c);
st_word_continue:
if ((port_eof(stream) || is_eot(port_peek(stream))) &&
!(port_peek(stream) == '#' && var)) {
if (len)
goto st_accept;
else
return -1;
} else {
c = port_getc(stream);
goto st_word;
}
st_string:
if (port_eof(stream))
return -1;
c = port_getc(stream);
if (!escape_char && c == quote) {
/* check that the very next char is not another string */
if (is_quote(port_peek(stream)))
reportret(-2, "strings too close together");
goto st_accept;
} else if (!escape_char && c == '\\') {
escape_char = 1;
goto st_string;
} else if (escape_char && is_esc(c)) {
escape_char = 0;
for (i = 0; i < (size_t)LEN(escs); i++)
if (escs[i][0] == c) {
TOK(escs[i][1]);
goto st_string;
}
reportret(-2, "impossible escape");
} else {
if (escape_char)
reportret(-2, "escaped a non-escapable char");
var = 0;
TOK(c);
goto st_string;
}
st_accept:
tok_buf[len] = '\0';
return len;
}
char **
read_tokens(region *r, string_port *stream)
{
char tok_buf[TOK_MAX+1]; // +1 is for the terminating \0
char **tokens, *result;
int i = 0, len, should_expand;
string_port port;
tokens = region_malloc(r, sizeof(char*)*MAX_TOKS_PER_LINE);
skip_spaces_and_comments(stream);
while ((len = read_token(tok_buf, stream, &should_expand)) > -1) {
switch (should_expand) {
case EXPAND_DEFAULT:
if (!(tokens[i] = expand_variables(r, tok_buf, len)))
return NULL;
break;
case EXPAND_NONE:
tokens[i] = region_malloc(r, len + 1);
strncpy(tokens[i], tok_buf, len + 1);
break;
case EXPAND_EVAL:
port = (string_port){ .kind=STRPORT_CHAR, .text=tok_buf, .place=0 };
if (!parse_and_execute(&port, &result)) /* TODO fix result memory leak */
tokens[i] = result;
else {
efree(result);
reportret(NULL, "eval failed");
}
break;
}
if (++i >= MAX_TOKS_PER_LINE)
reportret(NULL, "line too long");
}
if (len == -2)
return NULL;
tokens[i] = NULL;
return tokens;
}
char varname[TOK_MAX];
char *varerr;
/* set positional variables before loading file */
void
vars_set(char **argv)
{
int i = 0;
char var[8];
/* char varcat[ARG_MAX_STRLEN]; */
long max = sysconf(_SC_ARG_MAX);
for (argv++; *argv && i < max; argv++, i++) {
snprintf(var, sizeof(var), "%d", i);
setenv(var, *argv, 1);
}
setenv("#", var, 1);
}
/* remove positional variables to prevent leakage */
void
vars_unset(void)
{
int i = 0;
char var[8];
long max = sysconf(_SC_ARG_MAX);
for (; i < max; i++) {
snprintf(var, sizeof(var), "%d", i);
if (!getenv(var)) break;
unsetenv(var);
}
unsetenv("#");
}
static int
variable_character(char c)
{
return c == '_' ||
BETWEEN(c, 'A', 'Z') ||
BETWEEN(c, 'a', 'z') ||
BETWEEN(c, '0', '9');
}
static char *
read_variable_prefix(char *tok)
{
int pos = 0;
int brc = 0;
assert(*tok == '$');
tok++;
/* NOTE: We don't bother to bounds check here */
/* because tok is already <= the size of a token */
/* ...lets see if this ever bites? */
if (*tok == '{') {
brc = 1;
tok++;
}
while (variable_character(*tok) || (!pos && *tok == '#'))
varname[pos++] = *tok++;
if (brc && *tok++ != '}')
reportvar(varerr, "missing '}'");
varname[pos] = '\0';
if (!pos)
reportvar(varerr, "length 0 variable");
return tok;
}
char *
expand_variables(region *r, char *tok, int t)
{
char *stok = tok, *o, *val;
int alloc_len = t+1;
int pos = 0, l;
o = region_malloc(r, alloc_len);
while (*tok)
if (*tok == '$') {
if (!(tok = read_variable_prefix(tok)))
reportret(NULL, "problem parsing variable '%s' at character %d: %s",
stok, pos, varerr);
if (!(val = getenv(varname)))
reportret(NULL, "reference to undefined variable '%s'", stok);
l = strlen(val);
alloc_len += l;
if (alloc_len > TOK_MAX)
reportret(NULL, "variable expansion blew up token size too large");
o = region_realloc(r, o, alloc_len);
memcpy(o + pos, val, l);
pos += l;
} else {
o[pos++] = *tok++;
}
o[pos] = '\0';
return o;
}
int interactive_mode = 0;
void
interpret_command(struct AST* n)
{
assert(n->type == NODE_COMMAND);
execvp(n->node.tokens[0], n->node.tokens);
_reporterr("%s: command not found", n->node.tokens[0]);
}
void
interpret_junction(struct AST* n)
{
pid_t p;
int r;
if (n->type == NODE_COMMAND)
interpret_command(n);
switch (p = fork()) {
case -1:
_reporterr("fork() failure");
break;
case 0:
interpret(n->node.child.l);
break;
default:
waitpid(p, &r, 0);
/* DISJ and CONJ are dual */
/* xor flips the boolean for us depending on the case we're in */
/* so: in a disj (conj) node we exit right away */
/* if the exit status is (isn't) true */
/* otherwise continue executing the disj (conj) */
/* chain. */
if ((!WEXITSTATUS(r)) ^ (n->type == NODE_CONJ))
_exit(WEXITSTATUS(r));
else
interpret(n->node.child.r);
break;
}
}
void
interpret_pipe(struct AST* n)
{
if (n->type == NODE_COMMAND)
interpret_command(n);
int fd[2];
int f;
pipe(fd);
f = fork();
if (f == -1) {
_reporterr("fork() failure");
} else if (!f) { /* child */
close(fd[0]);
close(STDOUT_FILENO);
dup(fd[1]);
close(fd[1]);
interpret_command(n->node.child.l);
} else { /* parent */
close(fd[1]);
close(STDIN_FILENO);
dup(fd[0]);
close(fd[0]);
interpret_pipe(n->node.child.r);
}
}
void
interpret(struct AST* n)
{
switch (n->type) {
case NODE_COMMAND:
interpret_command(n);
break;
case NODE_CONJ:
case NODE_DISJ:
interpret_junction(n);
break;
case NODE_PIPE:
interpret_pipe(n);
break;
}
}
int
prompt(string_port *port)
{
char *line;
if (interactive_mode) {
/*
if ((line = getline(geteuid() == 0 ? "s# " : "s$ "))) {
*port = (string_port){ .kind=STRPORT_CHAR, .text=line, .place=0 };
return 0;
}
*/
return 1;
}
return 0;
}
void
drain_pipe(int fd, char **out)
{
int len = 0;
int size = 1000;
char *str = malloc(size);
int i, n;
int delta = 0;
/* read everything from the pipe into a buffer */
while ((n = read(fd, str + len, size - 1 - len)) > 0) {
len += n;
if (len > size/2) {
size *= 2;
str = realloc(str, size);
}
}
/* now strip out the \0 characters */
for (i = 0; i < len; i++) {
str[i] = str[i + delta];
if (str[i] == '\0') {
delta++;
len--;
i--;
continue;
}
}
str[len] = '\0';
*out = str;
}
int
parse_and_execute(string_port *port, char **string_capture)
{
pid_t p;
region r;
struct AST *n;
int bg;
int status = 0;
int fd[2];
if (string_capture)
*string_capture = NULL;
region_create(&r);
n = parse(&r, port, &bg);
if (n && !perform_builtin(n)) {
if (string_capture)
pipe(&fd[0]);
if (!(p = fork())) {
if (string_capture) {
close(fd[0]);
close(STDOUT_FILENO);
dup(fd[1]);
}
interpret(n);
_reporterr("== SHOULD NEVER GET HERE ==");
}
if (string_capture) {
close(fd[1]);
drain_pipe(fd[0], string_capture);
close(fd[0]);
}
if (!bg)
waitpid(p, &status, 0);
}
region_free(&r);
return status;
}
void
interpreter_loop(FILE *f)
{
string_port port;
if (interactive_mode) {
/*
linenoiseSetEncodingFunctions(
linenoiseUtf8PrevCharLen,
linenoiseUtf8NextCharLen,
linenoiseUtf8ReadCode);
*/
}
else
port = (string_port){ .kind=STRPORT_FILE, .fptr=f };
do {
if (prompt(&port)) {
if (errno == EAGAIN) {
errno = 0;
continue;
} else break;
}
parse_and_execute(&port, NULL);
if (interactive_mode) {
/* TODO: Only add if command was sucessful? */
// linenoiseHistoryAdd(port.text);
efree(port.text);
} else {
skip_newline(&port);
}
} while (!feof(f));
fclose(f);
vars_unset();
}
void
region_create(region *r)
{
r->len = 0;
r->alloc_len = 2;
r->pointers = emalloc(sizeof(void*)*r->alloc_len);
}
void *
region_malloc(region *r, size_t size)
{
if (r->len >= r->alloc_len) {
r->alloc_len <<= 1;
r->pointers = erealloc(r->pointers, sizeof(void*)*r->alloc_len);
}
return r->pointers[r->len++] = emalloc(size);
}
void *
region_realloc(region *r, void *v, size_t size)
{
size_t i;
for (i = 0; i < r->len; i++)
if (r->pointers[i] == v)
return r->pointers[i] = erealloc(r->pointers[i], size);
reporterr("unable to realloc region [%p]", v);
}
void
region_free(region *r)
{
size_t i;
for (i = 0; i < r->len; i++)
efree(r->pointers[i]);
efree(r->pointers);
r->pointers = NULL;
}
int
port_peek(string_port *port)
{
int c;
switch (port->kind) {
case STRPORT_CHAR:
return port->text[port->place];
case STRPORT_FILE:
c = fgetc(port->fptr);
ungetc(c, port->fptr);
return c;
}
reporterr("port set to wrong kind");
}
int
port_eof(string_port *port)
{
switch (port->kind) {
case STRPORT_CHAR:
return port->text[port->place] == '\0';
case STRPORT_FILE:
return feof(port->fptr);
}
reporterr("port set to wrong kind");
}
int
port_getc(string_port *port)
{
int c;
switch (port->kind) {
case STRPORT_CHAR:
c = port->text[port->place];
if (c != '\0')
port->place++;
return c;
case STRPORT_FILE:
return fgetc(port->fptr);
}
reporterr("port set to wrong kind");
}
void *
ecalloc(size_t nmemb, size_t size)
{
void *p;
if (!(p = calloc(nmemb, size)))
reporterr("calloc: out of memory");
return p;
}
void *
emalloc(size_t size)
{
void *p;
if (!(p = malloc(size)))
reporterr("malloc: out of memory");
return p;
}
void *
erealloc(void *p, size_t size)
{
if (!(p = realloc(p, size)))
reporterr("realloc: out of memory");
return p;
}
char *
estrdup(char *s)
{
if (!(s = strdup(s)))
reporterr("strdup: out of memory");
return s;
}
void
efree(void *p)
{
if (p)
free(p);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment