Skip to content

Instantly share code, notes, and snippets.

@starwing starwing/lua_lexer.c
Last active Jan 31, 2017

Embed
What would you like to do?
lua lexer - a Lua C module implementing a lexer for Lua.
#define LL_IMPLEMENTATION
#include "lua_lexer.h"
#define LUA_LIB
#include <lua.h>
#include <lauxlib.h>
#include <errno.h>
#define LEXER_NAME "Lexer"
/* utils */
typedef struct LoadFileCtx {
int n;
FILE *fp;
char buff[LL_BUFFERSIZE];
} LoadFileCtx;
static int skipBOM(LoadFileCtx *ctx) {
const char *p = "\xEF\xBB\xBF"; /* UTF-8 BOM mark */
int c;
ctx->n = 0;
do {
c = getc(ctx->fp);
if (c == EOF || c != *(const unsigned char *)p++) return c;
ctx->buff[ctx->n++] = c; /* to be read by the parser */
} while (*p != '\0');
ctx->n = 0;
return getc(ctx->fp);
}
static int skipcomment(LoadFileCtx *ctx, int *cp) {
int c = *cp = skipBOM(ctx);
if (c == '#') { /* first line is a comment (Unix exec. file)? */
do { /* skip first line */
c = getc(ctx->fp);
} while (c != EOF && c != '\n');
*cp = getc(ctx->fp); /* skip end-of-line, if present */
return 1; /* there was a comment */
}
else return 0; /* no comment */
}
static const char *file_reader(ll_State *ls, void *ud, size_t *plen) {
LoadFileCtx *ctx = (LoadFileCtx*)ud;
if (ctx->fp == NULL) return NULL;
if (plen != NULL) {
size_t bytes;
if (ctx->n != 0) {
*plen = ctx->n;
ctx->n = 0;
return ctx->buff;
}
bytes = fread(ctx->buff, 1, LL_BUFFERSIZE, ctx->fp);
if (bytes != 0) {
*plen = bytes;
return ctx->buff;
}
if (ferror(ctx->fp))
ll_setmesasge(ls, strerror(errno));
}
fclose(ctx->fp);
ctx->fp = NULL;
return NULL;
}
static int push_error(lua_State *L, ll_State *ls) {
lua_pushnil(L);
lua_pushstring(L, ll_message(ls));
lua_pushinteger(L, ll_linenumber(ls));
return 3;
}
static int push_token(lua_State *L, ll_State *ls, int tok) {
size_t len;
const char *s;
if (tok < LL_FIRST_RESERVED) {
char ch = tok;
lua_pushlstring(L, &ch, 1);
return 1;
}
lua_pushstring(L, ll_token(tok));
switch (tok) {
case LL_INDENT:
case LL_INT: lua_pushinteger(L, ll_integer(ls)); break;
case LL_FLT: lua_pushnumber(L, ll_number(ls)); break;
case LL_NAME: case LL_STRING: case LL_COMMENT:
s = ll_string(ls, &len);
lua_pushlstring(L, s, len); break;
default:
return 1;
}
return 2;
}
static int ref(lua_State *L, int ref) {
if (ref != LUA_NOREF) lua_rawseti(L, LUA_REGISTRYINDEX, ref);
else ref = luaL_ref(L, LUA_REGISTRYINDEX);
return ref;
}
static int unref(lua_State *L, int ref) {
if (ref != LUA_NOREF)
luaL_unref(L, LUA_REGISTRYINDEX, ref);
return LUA_NOREF;
}
/* routines */
typedef struct Lll_State {
ll_State ls;
int ref;
int ref_func;
int ref_data;
} Lll_State;
static const char *generic_reader (ll_State *ls, void *ud, size_t *size) {
const char *res;
Lll_State *ll = (Lll_State*)ls;
lua_State *L = (lua_State*)ud;
luaL_checkstack(L, 2, "too many nested functions");
lua_rawgeti(L, LUA_REGISTRYINDEX, ll->ref_func);
lua_rawgeti(L, LUA_REGISTRYINDEX, ll->ref);
lua_call(L, 1, 1);
if (lua_isnil(L, -1)) {
lua_pop(L, 1);
*size = 0;
return NULL;
}
else if (!lua_isstring(L, -1))
luaL_error(L, "reader function must return a string");
res = lua_tolstring(L, -1, size);
ll->ref_data = ref(L, ll->ref_data);
return res;
}
static int Lnew(lua_State *L) {
Lll_State *ll = lua_newuserdata(L, sizeof(Lll_State));
ll_initstate(&ll->ls);
ll->ref = LUA_NOREF;
ll->ref_func = LUA_NOREF;
ll->ref_data = LUA_NOREF;
luaL_setmetatable(L, LEXER_NAME);
return 1;
}
static int Lload(lua_State *L) {
Lll_State *ll = luaL_testudata(L, 1, LEXER_NAME);
int res;
size_t len;
const char *data;
if (ll == NULL) {
Lnew(L);
lua_insert(L, 1);
ll = (Lll_State*)lua_touserdata(L, 1);
}
data = lua_tolstring(L, 2, &len);
if (data != NULL) {
lua_pushvalue(L, 2);
ll->ref_data = ref(L, ll->ref_data);
res = ll_loadbuffer(&ll->ls, data, len);
}
else {
luaL_checktype(L, 2, LUA_TFUNCTION);
lua_pushvalue(L, 2);
ll->ref_func = ref(L, ll->ref_func);
res = ll_load(&ll->ls, generic_reader, L);
}
if (res != 0) return push_error(L, &ll->ls);
lua_settop(L, 1);
return 1;
}
static int Lloadfile(lua_State *L) {
Lll_State *ll = luaL_testudata(L, 1, LEXER_NAME);
LoadFileCtx *ctx;
const char *fn;
FILE *fp;
int ch;
if (ll == NULL) {
Lnew(L);
lua_insert(L, 1);
ll = (Lll_State*)lua_touserdata(L, 1);
}
fn = luaL_checkstring(L, 2);
if ((fp = fopen(fn, "r")) == NULL) {
ll_setmesasge(&ll->ls, strerror(errno));
return push_error(L, &ll->ls);
}
ctx = (LoadFileCtx*)lua_newuserdata(L, sizeof(LoadFileCtx));
ctx->fp = fp;
ctx->n = 0;
if (skipcomment(ctx, &ch))
ctx->buff[ctx->n++] = '\n';
if (ch != EOF)
ctx->buff[ctx->n++] = ch;
if (ll_load(&ll->ls, file_reader, ctx) < 0) {
fclose(ctx->fp);
return push_error(L, &ll->ls);
}
ll->ref_data = ref(L, ll->ref_data);
lua_settop(L, 1);
return 1;
}
static int Ldelete(lua_State *L) {
Lll_State *ll = (Lll_State*)luaL_testudata(L, 1, LEXER_NAME);
ll_cleanup(&ll->ls);
ll->ref = unref(L, ll->ref);
ll->ref_func = unref(L, ll->ref_func);
ll->ref_data = unref(L, ll->ref_data);
return 0;
}
static int Ltostring(lua_State *L) {
Lll_State *ll = (Lll_State*)luaL_testudata(L, 1, LEXER_NAME);
if (ll == NULL) luaL_tolstring(L, 1, NULL);
else lua_pushfstring(L, LEXER_NAME ": %p", ll);
return 1;
}
static int Lescape(lua_State *L) {
ll_State *ls = (ll_State*)luaL_checkudata(L, 1, LEXER_NAME);
size_t len;
const char *s = ll_escape(ls, &len);
if (s == NULL) return push_error(L, ls);
lua_pushlstring(L, s, len);
return 1;
}
static int Lcurrent(lua_State *L) {
ll_State *ls = (ll_State*)luaL_checkudata(L, 1, LEXER_NAME);
int tok = ll_current(ls);
if (tok < 0) return push_error(L, ls);
return push_token(L, ls, tok);
}
static int Lnext(lua_State *L) {
ll_State *ls = (ll_State*)luaL_checkudata(L, 1, LEXER_NAME);
int tok = ll_next(ls);
if (tok < 0) return push_error(L, ls);
return push_token(L, ls, tok);
}
static int Llinenumber(lua_State *L) {
ll_State *ls = (ll_State*)luaL_checkudata(L, 1, LEXER_NAME);
lua_pushinteger(L, ll_linenumber(ls));
return 1;
}
LUALIB_API int luaopen_lexer(lua_State *L) {
luaL_Reg libs[] = {
{ "__gc", Ldelete },
{ "__tostring", Ltostring },
{ "__call", Lnext },
{ "__len", Llinenumber },
#define ENTRY(name) { #name, L##name }
ENTRY(new),
ENTRY(load),
ENTRY(loadfile),
ENTRY(delete),
ENTRY(current),
ENTRY(next),
ENTRY(escape),
ENTRY(linenumber),
#undef ENTRY
{ NULL, NULL }
};
if (luaL_newmetatable(L, LEXER_NAME)) {
luaL_setfuncs(L, libs, 0);
lua_pushvalue(L, -1);
lua_setfield(L, -2, "__index");
}
return 1;
}
/* win32cc: flags+='-s -O2 -mdll -DLUA_BUILD_AS_DLL ' output='lexer.dll'
* win32cc: libs+='-llua53' output='lexer.dll'
* maccc: flags+='-O2 -shared -undefined dynamic_lookup' output='lexer.so' */
#ifndef lua_lexer_h
#define lua_lexer_h
#ifndef LL_NS_BEGIN
# ifdef __cplusplus
# define LL_NS_BEGIN extern "C" {
# define LL_NS_END }
# else
# define LL_NS_BEGIN
# define LL_NS_END
# endif
#endif /* LL_NS_BEGIN */
#ifdef LL_STATIC_API
# ifndef LL_IMPLEMENTATION
# define LL_IMPLEMENTATION
# endif
# if __GNUC__
# define LL_API static __attribute((unused))
# else
# define LL_API static
# endif
#endif
#ifndef LL_API
# define LL_API extern
#endif
#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
# define _CRT_SECURE_NO_WARNINGS
#endif
#include <setjmp.h>
#include <stdarg.h>
#include <stddef.h>
LL_NS_BEGIN
typedef struct ll_State ll_State;
typedef const char *ll_Reader (ll_State *ls, void *ud, size_t *plen);
#if LL_NO_LONGLONG
typedef ptrdiff_t ll_Integer;
#else
typedef long long ll_Integer;
#endif
#if LL_NO_DOUBLE
typedef float ll_Number;
#else
typedef double ll_Number;
#endif
#define LL_OK (0)
#define LL_ERROR (-1)
#define LL_ERRMEM (-2)
#define LL_FIRST_RESERVED 257
enum LL_RESERVED {
/* terminal symbols denoted by reserved words */
LL_AND = LL_FIRST_RESERVED, LL_BREAK,
LL_DO, LL_ELSE, LL_ELSEIF, LL_END, LL_FALSE, LL_FOR, LL_FUNCTION,
LL_GOTO, LL_IF, LL_IN, LL_LOCAL, LL_NIL, LL_NOT, LL_OR, LL_REPEAT,
LL_RETURN, LL_THEN, LL_TRUE, LL_UNTIL, LL_WHILE,
/* other terminal symbols */
LL_IDIV, LL_CONCAT, LL_DOTS, LL_EQ, LL_GE, LL_LE, LL_NE,
LL_SHL, LL_SHR,
LL_DBCOLON, LL_EOS,
LL_FLT, LL_INT, LL_NAME, LL_STRING,
LL_COMMENT, LL_INDENT, LL_LAST
};
LL_API void ll_initstate (ll_State *ls);
LL_API void ll_cleanup (ll_State *ls);
LL_API void ll_setmesasge (ll_State *ls, const char *fmt, ...);
LL_API int ll_load (ll_State *ls, ll_Reader *reader, void *ud);
LL_API int ll_loadbuffer (ll_State *ls, const char *s, size_t len);
LL_API int ll_loadstring (ll_State *ls, const char *s);
LL_API int ll_current (ll_State *ls);
LL_API int ll_next (ll_State *ls);
LL_API int ll_linenumber (ll_State *ls);
LL_API ll_Integer ll_integer (ll_State *ls);
LL_API ll_Number ll_number (ll_State *ls);
LL_API const char *ll_message (ll_State *ls);
LL_API const char *ll_string (ll_State *ls, size_t *plen);
LL_API const char *ll_escape (ll_State *ls, size_t *plen);
LL_API const char *ll_token (int token);
/* lex buffer */
typedef struct ll_Buffer ll_Buffer;
#define LL_BUFFERSIZE 1024
#define ll_buffer(B) ((B)->buff)
#define ll_buffsize(B) ((B)->size)
#define ll_resetbuffer(B) ((B)->size = 0)
#define ll_addstring(B, s) ll_addlstring((B),(s),strlen(s))
LL_API void ll_initbuffer (ll_Buffer *B, jmp_buf *jbuf);
LL_API void ll_freebuffer (ll_Buffer *B);
LL_API char *ll_prepbuffsize (ll_Buffer *B, size_t len);
LL_API int ll_addchar (ll_Buffer *B, int ch);
LL_API int ll_addlstring (ll_Buffer *B, const char *s, size_t len);
LL_API int ll_addvfstring (ll_Buffer *B, const char *fmt, va_list l);
LL_API int ll_addfstring (ll_Buffer *B, const char *fmt, ...);
/* structs */
struct ll_Buffer {
size_t size, capacity;
jmp_buf *jbuf;
char *buff;
char init_buffer[LL_BUFFERSIZE];
};
struct ll_State {
jmp_buf jbuf;
ll_Reader *reader;
void *ud;
size_t n; /* bytes still unread */
const char *p; /* bytes still unread */
int line; /* current line number */
int current; /* current char */
int seplen; /* length of string delimiter */
int token; /* current token */
ll_Integer integer;
ll_Number number;
ll_Buffer buffer; /* token data */
ll_Buffer errmsg; /* error message */
};
LL_NS_END
#endif /* lua_lexer_h */
#if defined(LL_IMPLEMENTATION) && !defined(ll_implemented)
#define ll_implemented
#include <assert.h>
#include <limits.h>
#include <locale.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if !defined(ll_getlocaledecpoint)
# ifdef __ANDROID__
# define ll_getlocaledecpoint() '.'
# else
# define ll_getlocaledecpoint() (localeconv()->decimal_point[0])
# endif /* __ANDROID__ */
#endif
LL_NS_BEGIN
/* lex buffer */
LL_API void ll_initbuffer(ll_Buffer *B, jmp_buf *jbuf) {
B->jbuf = jbuf;
B->size = 0;
B->capacity = LL_BUFFERSIZE;
B->buff = B->init_buffer;
}
LL_API void ll_freebuffer(ll_Buffer *B) {
if (B->buff != B->init_buffer)
free(B->buff);
ll_initbuffer(B, NULL);
}
LL_API char *ll_prepbuffsize(ll_Buffer *B, size_t len) {
if (B->size + len > B->capacity) {
void *newptr;
size_t newsize = LL_BUFFERSIZE;
while (newsize < B->size + len && newsize < ~(size_t)0/2)
newsize *= 2;
if (B->buff != B->init_buffer) {
newptr = realloc(B->buff, newsize);
if (newptr == NULL) goto nomem;
}
else {
newptr = malloc(newsize);
if (newptr == NULL) goto nomem;
memcpy(newptr, B->buff, B->size);
}
B->buff = (char*)newptr;
B->capacity = newsize;
}
return &B->buff[B->size];
nomem:
if (B->jbuf) longjmp(*B->jbuf, LL_ERRMEM);
return NULL;
}
LL_API int ll_addchar(ll_Buffer *B, int ch) {
char *ptr = (char*)ll_prepbuffsize(B, 1);
if (ptr == NULL) return 0;
*ptr = ch;
return ++B->size;
}
LL_API int ll_addlstring(ll_Buffer *B, const char *s, size_t len) {
char *ptr = (char*)ll_prepbuffsize(B, 1);
if (ptr == NULL) return 0;
memcpy(ptr, s, len);
return B->size += len;
}
LL_API int ll_addvfstring(ll_Buffer *B, const char *fmt, va_list l) {
const int init_size = 80;
char *ptr;
int len;
va_list l_count;
if ((ptr = (char*)ll_prepbuffsize(B, init_size+1)) == NULL)
return 0;
va_copy(l_count, l);
len = vsnprintf(ptr, init_size, fmt, l_count);
va_end(l_count);
if (len < 0) return 0;
if (len > init_size) {
if ((ptr = ll_prepbuffsize(B, len+1)) == NULL)
return 0;
vsnprintf(ptr, len, fmt, l);
}
return B->size += len;
}
LL_API int ll_addfstring(ll_Buffer *B, const char *fmt, ...) {
int ret;
va_list l;
va_start(l, fmt);
ret = ll_addvfstring(B, fmt, l);
va_end(l);
return ret;
}
/* utils */
#define LX_XNUM_MAXSIGDIG 30
#define LX_UTF8_BUFFERSIZE 8
#define lx_isnewline(ch) ((ch) == '\n' || (ch) == '\r')
#define lx_mask(F) (1 << LX_##F)
#define lx_checkmask(ch,mask) ((lx_charmap[((ch)&0xFF)+1] & (mask)) != 0)
#define lx_isalpha(ch) lx_checkmask(ch, lx_mask(ALPHA))
#define lx_isdigit(ch) lx_checkmask(ch, lx_mask(DIGIT))
#define lx_isprint(ch) lx_checkmask(ch, lx_mask(PRINT))
#define lx_isspace(ch) lx_checkmask(ch, lx_mask(SPACE))
#define lx_isxdigit(ch) lx_checkmask(ch, lx_mask(XDIGIT))
#define lx_isalnum(ch) lx_checkmask(ch, lx_mask(ALPHA)|lx_mask(DIGIT))
enum lx_Type { LX_ALPHA, LX_DIGIT, LX_PRINT, LX_SPACE, LX_XDIGIT };
static const unsigned char lx_charmap[UCHAR_MAX + 2] = {
0x00, /* EOZ */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0. */
0x00, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0c, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* 2. */
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, /* 3. */
0x16, 0x16, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x04, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x05, /* 4. */
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 5. */
0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x05,
0x04, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x05, /* 6. */
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 7. */
0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 9. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* a. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* B. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* d. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* e. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* f. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
static int lx_hexavalue(int ch) {
if (ch >= '0' && ch <= '9') return ch - '0';
if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10;
if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
return 0;
}
static int lx_encodeutf8(char *buff, unsigned long x) {
int n = 1; /* number of bytes put in buffer (backwards) */
assert(x <= 0x10FFFF);
if (x < 0x80) /* ascii? */
buff[LX_UTF8_BUFFERSIZE - 1] = (char)x;
else { /* need continuation bytes */
unsigned int mfb = 0x3f; /* maximum that fits in first byte */
do {
buff[LX_UTF8_BUFFERSIZE - (n++)] = (char)(0x80 | (x & 0x3f));
x >>= 6;
mfb >>= 1;
} while (x > mfb);
buff[LX_UTF8_BUFFERSIZE - n] = (char)((~mfb << 1) | x);
}
return n;
}
static int lx_checkneg(const char **s) {
if (**s == '-') { ++(*s); return 1; }
else if (**s == '+') ++(*s);
return 0;
}
static ll_Integer lx_str2integer(const char *s, char **endptr) {
unsigned long long a = 0;
int empty = 1;
int neg = lx_checkneg(&s);
*endptr = (char*)s;
if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { /* hex? */
s += 2; /* skip '0x' */
for (; lx_isxdigit(*s & 0xFF); ++s) {
a = a * 16 + lx_hexavalue(*s);
empty = 0;
}
}
else { /* decimal */
for (; lx_isdigit(*s & 0xFF); ++s) {
a = a * 10 + *s - '0';
empty = 0;
}
}
if (empty || *s != '\0') /* something wrong in the numeral */
return 0;
*endptr = (char*)s;
return (ll_Integer)(neg ? 0ull - a : a);
}
static ll_Number lx_strx2number(const char *s, char **endptr) {
ll_Number r = 0.0;
int sigdig = 0; /* number of significant digits */
int nosigdig = 0; /* number of non-significant digits */
int e = 0; /* exponent correction */
int neg; /* 1 if number is negative */
int hasdot = 0; /* true after seen a dot */
*endptr = (char*)s; /* nothing is valid yet */
while (lx_isspace(*s)) s++; /* skip initial spaces */
neg = lx_checkneg(&s);
if (!(*s == '0' && (*(s + 1) == 'x' || *(s + 1) == 'X')))
return 0.0;
for (s += 2; ; s++) {
if (*s == '.') {
if (hasdot) break;
else hasdot = 1;
}
else if (lx_isxdigit(*s)) {
if (sigdig == 0 && *s == '0')
nosigdig++;
else if (++sigdig <= LX_XNUM_MAXSIGDIG)
r = (r * (ll_Number)16.0) + lx_hexavalue(*s);
else e++;
if (hasdot) e--;
}
else break;
}
if (nosigdig + sigdig == 0)
return 0.0;
*endptr = (char*)s;
e *= 4; /* each digit multiplies/divides value by 2^4 */
if (*s == 'p' || *s == 'P') {
int exp1 = 0; /* exponent value */
int neg1; /* exponent signal */
s++; /* skip 'p' */
neg1 = lx_checkneg(&s); /* signal */
if (!lx_isdigit(*s))
return 0.0; /* invalid; must have at least one digit */
while (lx_isdigit(*s)) /* read exponent */
exp1 = exp1 * 10 + *(s++) - '0';
if (neg1) exp1 = -exp1;
e += exp1;
*endptr = (char*)s; /* valid up to here */
}
if (neg) r = -r;
return (ll_Number)ldexp(r, e);
}
/* lexer */
#define LX_EOZ (-1) /* end of buffer */
#define LX_START (-2) /* start of buffer */
#define lx_save(ls,ch) ll_addchar(&(ls)->buffer,(ch))
#define lx_endstring(ls) (*ll_prepbuffsize(&(ls)->buffer, 1) = '\0')
#define lx_savenext(ls) (lx_save(ls,(ls)->current), lx_next(ls))
static void lx_addinfo(ll_State *ls)
{ ll_addfstring(&ls->errmsg, "[in]:%d: ", ls->line+1); }
static const char *const lx_tokens[] = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function", "goto", "if",
"in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while",
"//", "..", "...", "==", ">=", "<=", "~=",
"<<", ">>", "::", "<eof>",
"<number>", "<integer>", "<name>", "<string>",
"<comment>", "<indent>"
};
static int lx_next(ll_State *ls) {
size_t size;
const char *buff;
if (ls->n-- == 0) {
if (ls->current == LX_EOZ || ls->reader == NULL)
return ls->current = LX_EOZ;
buff = ls->reader(ls, ls->ud, &size);
if (buff == NULL || size == 0) {
ls->n = 0;
ls->p = NULL;
return ls->current = LX_EOZ;
}
ls->n = size - 1;
ls->p = buff;
}
return ls->current = (unsigned char)*ls->p++;
}
static int lx_checknext1(ll_State *ls, int c) {
if (ls->current == c) {
lx_savenext(ls);
return 1;
}
return 0;
}
static int lx_checknext2(ll_State *ls, const char *set) {
assert(set[2] == '\0');
if (ls->current == set[0] || ls->current == set[1]) {
lx_savenext(ls);
return 1;
}
return 0;
}
static void lx_addtoken(ll_State *ls, int token) {
switch (token) {
case LL_NAME: case LL_STRING:
case LL_FLT: case LL_INT:
lx_endstring(ls);
ll_addfstring(&ls->errmsg, "'%s'", ll_buffer(&ls->buffer));
break;
default:
if (token < LL_FIRST_RESERVED) { /* single-byte symbols? */
assert(token == (unsigned char)token);
ll_addfstring(&ls->errmsg, "'%c'", token);
}
else {
const char *s = lx_tokens[token - LL_FIRST_RESERVED];
if (token < LL_EOS)
ll_addfstring(&ls->errmsg, "'%s'", s);
else
ll_addstring(&ls->errmsg, s);
}
}
}
static void lx_error(ll_State *ls, const char *msg, int token) {
if (msg) {
ll_resetbuffer(&ls->errmsg);
lx_addinfo(ls);
ll_addstring(&ls->errmsg, msg);
}
if (token != 0) {
ll_addstring(&ls->errmsg, " near ");
lx_addtoken(ls, token);
}
longjmp(ls->jbuf, LL_ERROR);
}
static int lx_sep(ll_State *ls) {
int count = 0;
int s = ls->current;
assert(s == '[' || s == ']');
lx_savenext(ls);
while (ls->current == '=') {
lx_savenext(ls);
count++;
}
return (ls->current == s) ? count : (-count) - 1;
}
static void lx_newline(ll_State *ls) {
int old = ls->current;
assert(lx_isnewline(ls->current));
lx_next(ls); /* skip '\n' or '\r' */
if (lx_isnewline(ls->current) && ls->current != old)
lx_next(ls); /* skip '\n\r' or '\r\n' */
if (++ls->line >= INT_MAX)
lx_error(ls, "chunk has too many lines", 0);
}
static int lx_indent(ll_State *ls) {
for (;;) {
ls->integer = 0;
while (ls->current == ' ' || ls->current == '\t') {
ls->integer += ls->current == ' ' ? 1 : 8;
lx_next(ls);
}
if (ls->current == LX_EOZ)
return LL_EOS;
if (!lx_isnewline(ls->current))
break;
lx_newline(ls);
}
return LL_INDENT;
}
static int lx_string(ll_State *ls, int del) {
lx_savenext(ls); /* keep delimiter (for error messages) */
while (ls->current != del) {
switch (ls->current) {
case LX_EOZ:
lx_error(ls, "unfinished string", LL_EOS);
break;
case '\n':
case '\r':
lx_error(ls, "unfinished string", LL_STRING);
break;
case '\\': /* escape sequences */
lx_savenext(ls);
switch (ls->current) {
case '\r': case '\n':
lx_newline(ls), lx_save(ls, '\n');
break;
case 'z':
lx_savenext(ls);
while (lx_isspace(ls->current)) {
if (!lx_isnewline(ls->current))
lx_savenext(ls);
else
lx_newline(ls), lx_save(ls, '\n');
}
break;
default:
lx_savenext(ls);
}
break;
default:
lx_savenext(ls);
}
}
lx_savenext(ls); /* skip delimiter */
ls->seplen = 1;
return LL_STRING;
}
static void lx_longstring(ll_State *ls, int iscomment, int sep) {
int line = ls->line; /* initial line (for error message) */
lx_savenext(ls); /* skip 2nd '[' */
for (;;) {
switch (ls->current) {
case LX_EOZ:
lx_addinfo(ls);
ll_addfstring(&ls->errmsg,
"unfinished long %s (starting at line %d)",
(iscomment ? "string" : "comment"), line);
lx_error(ls, NULL, LL_EOS);
break;
case ']':
if (lx_sep(ls) == sep) {
lx_savenext(ls);
return;
}
break;
case '\n': case '\r':
lx_newline(ls), lx_save(ls, '\n');
break;
default:
lx_savenext(ls);
}
}
}
static int lx_checknumber(ll_State *ls) {
int i, size = ll_buffsize(&ls->buffer);
char *s = ll_buffer(&ls->buffer), *endptr;
ls->integer = lx_str2integer(s, (char**)&endptr);
if (endptr-s == size)
return LL_INT;
ls->number = lx_strx2number(s, (char**)&endptr);
if (endptr-s == size)
return LL_FLT;
ls->number = (ll_Number)strtod(s, (char**)&endptr);
if (endptr-s == size)
return LL_FLT;
for (i = 0; i < size; ++i)
if (s[i] == '.') s[i] = ll_getlocaledecpoint();
ls->number = (ll_Number)strtod(s, (char**)&endptr);
if (endptr-s == size) {
s[i] = '.';
return LL_FLT;
}
lx_error(ls, "malformed number", LL_FLT);
return 0;
}
static int lx_numeral(ll_State *ls) {
const char *expo = "Ee";
int first = ls->current;
assert(lx_isdigit(ls->current));
lx_savenext(ls);
if (first == '0' && lx_checknext2(ls, "xX")) /* hexadecimal? */
expo = "Pp";
for (;;) {
if (lx_checknext2(ls, expo)) /* exponent part? */
lx_checknext2(ls, "-+"); /* optional exponent sign */
if (lx_isxdigit(ls->current))
lx_savenext(ls);
else if (ls->current == '.')
lx_savenext(ls);
else break;
}
lx_endstring(ls);
return lx_checknumber(ls);
}
static int lx_checkkeyword(const char *s, size_t len) {
switch (*s) {
#define KW(str, tok) \
if (len == sizeof(str)-1 && memcmp(s+1, "" str+1, sizeof(str)-2) == 0) \
return tok;
case 'a': KW("and", LL_AND); break;
case 'b': KW("break", LL_BREAK); break;
case 'd': KW("do", LL_DO); break;
case 'e': KW("else", LL_ELSE); KW("elseif", LL_ELSEIF);
KW("end", LL_END); break;
case 'f': KW("false", LL_FALSE); KW("for", LL_FOR);
KW("function", LL_FUNCTION); break;
case 'g': KW("goto", LL_GOTO); break;
case 'i': KW("if", LL_IF); KW("in", LL_IN); break;
case 'l': KW("local", LL_LOCAL); break;
case 'n': KW("nil", LL_NIL); KW("not", LL_NOT); break;
case 'o': KW("or", LL_OR); break;
case 'r': KW("repeat", LL_REPEAT); KW("return", LL_RETURN); break;
case 't': KW("then", LL_THEN); break;
case 'u': KW("until", LL_UNTIL); break;
case 'w': KW("while", LL_WHILE); break;
#undef KW
}
return LL_NAME;
}
static int lx_lexer(ll_State *ls) {
ll_resetbuffer(&ls->buffer);
for (;;) {
switch (ls->current) {
case LX_START:
lx_next(ls);
return lx_indent(ls);
case '\n': case '\r': /* line breaks */
lx_newline(ls);
return lx_indent(ls);
case ' ': case '\f': case '\t': case '\v':
lx_next(ls);
break;
case '-':
lx_savenext(ls);
if (ls->current != '-') return '-';
lx_savenext(ls);
if (ls->current == '[') {
int sep = lx_sep(ls);
if (sep >= 0) {
lx_longstring(ls, 1, sep);
return LL_COMMENT;
}
}
/* else short comment */
while (!lx_isnewline(ls->current) && ls->current != LX_EOZ)
lx_savenext(ls); /* until end of line (or end of source) */
return LL_COMMENT;
case '[': /* long string or simply '[' */
{
int sep = lx_sep(ls);
if (sep < -1) /* '[=...' missing second bracket */
lx_error(ls, "invalid long string delimiter", LL_STRING);
else if (sep >= 0) {
lx_longstring(ls, 0, sep);
ls->seplen = sep + 2;
return LL_STRING;
}
}
return '[';
case '<':
lx_savenext(ls);
if (lx_checknext1(ls, '=')) return LL_LE;
else if (lx_checknext1(ls, '<')) return LL_SHL;
return '<';
case '>':
lx_savenext(ls);
if (lx_checknext1(ls, '=')) return LL_GE;
else if (lx_checknext1(ls, '>')) return LL_SHR;
return '>';
case '/':
lx_savenext(ls);
if (lx_checknext1(ls, '/')) return LL_IDIV;
return '/';
case '~':
lx_savenext(ls);
if (lx_checknext1(ls, '=')) return LL_NE;
return '~';
case ':':
lx_savenext(ls);
if (lx_checknext1(ls, ':')) return LL_DBCOLON;
return ':';
case '"': case '\'': /* short literal strings */
return lx_string(ls, ls->current);
case '.': /* '.', '..', '...', or number */
lx_savenext(ls);
if (lx_checknext1(ls, '.')) {
if (lx_checknext1(ls, '.'))
return LL_DOTS; /* '...' */
return LL_CONCAT; /* '..' */
}
if (!lx_isdigit(ls->current)) return '.';
return lx_numeral(ls);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return lx_numeral(ls);
case LX_EOZ:
return LL_EOS;
default:
if (lx_isalpha(ls->current)) { /* identifier or reserved word? */
do {
lx_savenext(ls);
} while (lx_isalnum(ls->current));
lx_endstring(ls);
return lx_checkkeyword(ll_buffer(&ls->buffer),
ll_buffsize(&ls->buffer));
}
else { /* single-char tokens (+ - / ...) */
int c = ls->current;
lx_next(ls);
return c;
}
}
}
}
/* escape routines */
static void lx_checkescape(ll_State *ls, int check, const char *msg) {
if (!check) lx_error(ls, msg, LL_STRING);
}
static int lx_checkhexa(ll_State *ls, int ch) {
lx_checkescape(ls, lx_isxdigit(ch), "hexadecimal digit expected");
return lx_hexavalue(ch);
}
static int lx_hexaesc(ll_State *ls, const char *s) {
int r = lx_checkhexa(ls, *++s);
r = (r << 4) + lx_checkhexa(ls, *++s);
lx_save(ls, r);
return 2;
}
static int lx_utf8esc(ll_State *ls, const char *s) {
const char *os = s;
unsigned long r;
lx_checkescape(ls, *++s == '{', "missing '{'");
r = lx_checkhexa(ls, *++s); /* must have at least one digit */
while (lx_isxdigit(*++s)) {
r = (r << 4) + lx_hexavalue(*s);
lx_checkescape(ls, r <= 0x10FFFF, "UTF-8 value too large");
}
lx_checkescape(ls, *s == '}', "missing '}'");
{
char buff[LX_UTF8_BUFFERSIZE];
int n = lx_encodeutf8(buff, r);
for (; n > 0; --n)
lx_save(ls, buff[LX_UTF8_BUFFERSIZE - n]);
}
return s - os;
}
static int lx_decesc(ll_State *ls, const char *s) {
const char *os = s;
int i, r = 0; /* result accumulator */
for (i = 0; i < 3 && lx_isdigit(*++s); ++i)
r = 10*r + ls->current - '0';
lx_checkescape(ls, r <= UCHAR_MAX, "decimal escape too large");
lx_save(ls, r);
return os - s - 1;
}
static const char *lx_escape(ll_State *ls, size_t *plen) {
const char *s = ll_buffer(&ls->buffer);
ll_resetbuffer(&ls->buffer);
int del = *s++; /* final character to be saved */
for (; *s != del; ++s) {
if (*s != '\\') { lx_save(ls, *s); continue; }
switch (*++s) {
case 'a': lx_save(ls, '\a'); break;
case 'b': lx_save(ls, '\b'); break;
case 'f': lx_save(ls, '\f'); break;
case 'n': lx_save(ls, '\n'); break;
case 'r': lx_save(ls, '\r'); break;
case 't': lx_save(ls, '\t'); break;
case 'v': lx_save(ls, '\v'); break;
case '\n': case '\r':
lx_save(ls, '\n'); break;
case '\\': case '\"': case '\'':
lx_save(ls, *s); break;
case 'x': s += lx_hexaesc(ls, s); break;
case 'u': s += lx_utf8esc(ls, s); break;
case 'z': /* zap following span of spaces */
while (lx_isspace(*++s))
;
break;
default:
lx_checkescape(ls, lx_isdigit(*s),
"invalid escape sequence");
s += lx_decesc(ls, s);
break;
}
}
lx_endstring(ls);
if (plen) *plen = ll_buffsize(&ls->buffer);
return ll_buffer(&ls->buffer);
}
/* interface routines */
LL_API const char *ll_message(ll_State *ls) { return ll_buffer(&ls->errmsg); }
LL_API ll_Integer ll_integer(ll_State *ls) { return ls->integer; }
LL_API ll_Number ll_number(ll_State *ls) { return ls->number; }
LL_API int ll_linenumber(ll_State *ls) { return ls->line+1; }
LL_API int ll_current(ll_State *ls)
{ return ls->current == LX_START ? ll_next(ls) : ls->token; }
LL_API int ll_loadbuffer(ll_State *ls, const char *s, size_t len)
{ ls->p = s, ls->n = len; return ll_load(ls, NULL, NULL); }
LL_API int ll_loadstring(ll_State *ls, const char *s)
{ return ll_loadbuffer(ls, s, strlen(s)); }
static int ll_result(ll_State *ls, int ret) {
if (ret == LL_ERRMEM) {
lx_addinfo(ls);
ll_addstring(&ls->errmsg, "out of memory");
}
return ret;
}
LL_API void ll_initstate(ll_State *ls) {
memset(ls, 0, sizeof(*ls));
ll_initbuffer(&ls->errmsg, &ls->jbuf);
ll_initbuffer(&ls->buffer, &ls->jbuf);
}
LL_API void ll_cleanup(ll_State *ls) {
if (ls->reader)
ls->reader(ls, ls->ud, NULL);
ll_freebuffer(&ls->errmsg);
ll_freebuffer(&ls->buffer);
ll_initstate(ls);
}
LL_API void ll_setmesasge(ll_State *ls, const char *fmt, ...) {
ll_resetbuffer(&ls->errmsg);
if (fmt == NULL || fmt[0] == '\0') return;
if (ll_result(ls, setjmp(ls->jbuf)) == 0) {
va_list l;
va_start(l, fmt);
ll_addvfstring(&ls->errmsg, fmt, l);
va_end(l);
}
}
LL_API int ll_load(ll_State *ls, ll_Reader *reader, void *ud) {
if (ls->reader != NULL) ll_cleanup(ls);
ls->reader = reader;
ls->ud = ud;
ls->current = LX_START;
return LL_OK;
}
LL_API int ll_next(ll_State *ls) {
int res;
if (ls->token == LL_EOS) {
ll_resetbuffer(&ls->errmsg);
ll_addstring(&ls->errmsg, "end of stream");
return LL_ERROR;
}
if ((res = ll_result(ls, setjmp(ls->jbuf))) != 0)
return ls->token = res;
return ls->token = lx_lexer(ls);
}
LL_API const char *ll_string(ll_State *ls, size_t *plen) {
if (plen) *plen = ll_buffsize(&ls->buffer);
return ll_buffer(&ls->buffer);
}
LL_API const char *ll_token(int token) {
if (token < LL_FIRST_RESERVED || token >= LL_LAST)
return NULL;
return lx_tokens[token - LL_FIRST_RESERVED];
}
LL_API const char *ll_escape(ll_State *ls, size_t *plen) {
size_t len = ll_buffsize(&ls->buffer);
char *s;
if (ls->token != LL_STRING) return NULL;
if (ls->seplen == 1) {
if (ll_result(ls, setjmp(ls->jbuf)) != 0)
return NULL;
return lx_escape(ls, plen);
}
s = ll_buffer(&ls->buffer) + ls->seplen;
len -= ls->seplen*2;
s[len] = '\0';
if (lx_isnewline(*s)) ++s, --len;
if (plen) *plen = len;
return s;
}
LL_NS_END
#endif /* LL_IMPLEMENTATION */
/* win32cc: flags+='-O3 -shared -DLL_IMPLEMENTATION -xc' output='lexer.dll'
* unixcc: flags+='-O3 -shared -DLL_IMPLEMENTATION -xc' output='lexer.so' */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.