Skip to content

Instantly share code, notes, and snippets.

@coline-carle
Created November 13, 2016 18:35
Show Gist options
  • Save coline-carle/b17c1c36e2b1ff06b70abed8c89c03e1 to your computer and use it in GitHub Desktop.
Save coline-carle/b17c1c36e2b1ff06b70abed8c89c03e1 to your computer and use it in GitHub Desktop.
#include "luaparser.h"
#define EXC_ENCODING rb_utf8_encoding()
static VALUE mLuaTable;
static VALUE CNaN;
static VALUE eParserError, eNestingError;
/* unicode */
static const char digit_values[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
-1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1
};
static UTF32 unescape_unicode(const unsigned char *p)
{
char b;
UTF32 result = 0;
b = digit_values[p[0]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
result = (result << 4) | (unsigned char)b;
b = digit_values[p[1]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
result = (result << 4) | (unsigned char)b;
b = digit_values[p[2]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
result = (result << 4) | (unsigned char)b;
b = digit_values[p[3]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
result = (result << 4) | (unsigned char)b;
return result;
}
static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
{
int len = 1;
if (ch <= 0x7F) {
buf[0] = (char) ch;
} else if (ch <= 0x07FF) {
buf[0] = (char) ((ch >> 6) | 0xC0);
buf[1] = (char) ((ch & 0x3F) | 0x80);
len++;
} else if (ch <= 0xFFFF) {
buf[0] = (char) ((ch >> 12) | 0xE0);
buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
buf[2] = (char) ((ch & 0x3F) | 0x80);
len += 2;
} else if (ch <= 0x1fffff) {
buf[0] =(char) ((ch >> 18) | 0xF0);
buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
buf[3] =(char) ((ch & 0x3F) | 0x80);
len += 3;
} else {
buf[0] = '?';
}
return len;
}
static void d(VALUE v) {
ID sym_puts = rb_intern("puts");
ID sym_inspect = rb_intern("inspect");
rb_funcall(rb_mKernel, sym_puts, 1,
rb_funcall(v, sym_inspect, 0));
}
void Init_luaparser(void)
{
mLuaTable = rb_define_module("LuaTable");
rb_define_singleton_method(mLuaTable, "parse", cParser_parse, 1);
CNaN = rb_const_get(mLuaTable, rb_intern("NaN"));
eParserError = rb_path2class("LuaTable::ParserError");
eNestingError = rb_path2class("LuaTable::NestingError");
}
%%{
machine LuaTable_common;
cr = '\n';
cr_neg = '[^\n]';
ws =[ \t\r\n];
value_separator = ',';
comment = '--' cr_neg* cr;
ignore = ws | comment;
begin_number = digit | '-';
Vnil = 'nil';
Vfalse = 'false';
Vtrue = 'true';
VNaN = 'NaN';
begin_string = '"';
begin_variable_name = alpha | '_';
begin_value = [Nnft\"\-] | digit;
begin_key = "[";
begin_name = begin_key | begin_variable_name;
begin_assignation = begin_name;
valid_variable_character = alnum | '_';
}%%
// "
%%{
machine LuaTable_value;
include LuaTable_common;
write data;
action parse_nil {
*result = Qnil;
}
action parse_false {
*result = Qfalse;
}
action parse_true {
*result = Qtrue;
}
action parse_nan {
*result = CNaN;
}
action parse_number {
char *np = LuaTable_parse_integer(lua, p, pe, result);
if(np != NULL) fexec np;
fhold; fbreak;
}
action parse_string {
char *np = LuaTable_parse_string(lua, fpc, pe, result);
if (np == NULL){
fhold; fbreak;
}
else {
fexec np;
}
}
action exit { fhold; fbreak; }
main := ignore* ( Vnil @parse_nil |
Vtrue @parse_true |
Vfalse @parse_false |
VNaN @parse_nan |
begin_number >parse_number |
begin_string >parse_string
) ignore* %*exit;
}%%
static char* LuaTable_parse_value(Lua_Parser *lua, char *p, char *pe, VALUE *result, int current_nesting) {
int cs;
%% write init;
%% write exec;
if (cs >= LuaTable_value_first_final) {
return p;
} else {
return NULL;
}
}
%%{
machine LuaTable_assignation;
include LuaTable_common;
write data;
action parse_value {
VALUE v = Qnil;
char *np = LuaTable_parse_value(lua, fpc, pe, &v, current_nesting);
if (np == NULL) {
fhold; fbreak;
} else {
rb_hash_aset(*context, key_name, v);
fexec np;
}
}
action parse_name {
char *np;
np = LuaTable_parse_name(lua, fpc, pe, &key_name);
if (np == NULL) { fhold; fbreak; } else fexec np;
}
action exit { fhold; fbreak; }
main := begin_name >parse_name ignore* '=' ignore* begin_value >parse_value @exit;
}%%
static char *LuaTable_parse_assignation(Lua_Parser *lua, char *p, char *pe, VALUE *context, int current_nesting)
{
int cs;
VALUE key_name = Qnil;
if (lua->max_nesting && current_nesting > lua->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
%% write init;
%% write exec;
if (cs >= LuaTable_assignation_first_final) {
return p + 1;
} else {
return NULL;
}
}
%%{
machine LuaTable_name;
include LuaTable_common;
write data;
action parse_variable_name {
LuaTable_parse_variable_name(lua, fpc, pe, result);
if (NIL_P(*result)) {
fhold;
fbreak;
} else {
FORCE_UTF8(*result);
fexec p + 1;
}
}
action parse_key_name {
LuaTable_parse_string(lua, fpc, pe, result);
if (NIL_P(*result)) {
fhold;
fbreak;
} else {
FORCE_UTF8(*result);
fexec p + 1;
}
}
action exit { fhold; fbreak; }
main := (( begin_variable_name >parse_variable_name) |
('[' ignore* (^[\]])* >parse_key_name ignore* ']'))
@exit;
}%%
static char *LuaTable_parse_name(Lua_Parser *lua, char *p, char *pe, VALUE *result)
{
int cs;
char *eof;
*result = Qnil;
printf("luatable_parse_name\n");
%% write init;
%% write exec;
if(cs > LuaTable_name_first_final) {
return p+1;
}
else {
return NULL;
}
}
%%{
machine LuaTable_integer;
action exit { fhold; fbreak; }
main := '-' ? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
}%%
%% write data;
static char *LuaTable_parse_integer(Lua_Parser *lua, char *p, char *pe, VALUE *result)
{
int cs;
%% write init;
lua->memo = p;
%% write exec;
if(cs > LuaTable_integer_first_final) {
long len = p - lua->memo;
char * buf = malloc(len + 1);
memcpy(buf, lua->memo, len);
buf[len] = 0;
*result = rb_cstr2inum(buf, 10);
return p+1;
}
else {
return NULL;
}
}
%%{
machine LuaTable_variable_name;
include LuaTable_common;
write data;
action parse_variable_name {
printf("vn: %.*s\n",(int)(p - lua->memo), lua->memo);
*result = LuaTable_string_unescape(*result, lua->memo, p);
if (NIL_P(*result)) {
fhold;
fbreak;
} else {
FORCE_UTF8(*result);
fexec p + 1;
}
}
action exit { fhold; fbreak; }
main := begin_variable_name valid_variable_character* %parse_variable_name !valid_variable_character @exit;
}%%
//"'
static char *LuaTable_parse_variable_name(Lua_Parser *lua, char *p, char *pe, VALUE *result)
{
int cs;
char *eof;
printf("lutable parse variable name\n");
*result = rb_str_buf_new(0);
%% write init;
lua->memo = p;
%% write exec;
d(*result);
if (cs >= LuaTable_variable_name_first_final) {
return p + 1;
} else {
return NULL;
}
}
%%{
machine luatable_string;
include luatable_common;
write data;
action parse_string {
*result = LuaTable_string_unescape(*result, lua->memo + 1, p);
if (NIL_P(*result)) {
fhold;
fbreak;
} else {
FORCE_UTF8(*result);
fexec p + 1;
}
}
action exit { fhold; fbreak; }
main := '"' (^[\"])* %parse_string '"' @exit;
}%%
//"'
static char *LuaTable_parse_string(Lua_Parser *lua, char *p, char *pe, VALUE *result)
{
int cs;
VALUE match_string;
*result = rb_str_buf_new(0);
%% write init;
lua->memo = p;
%% write exec;
if (cs >= luatable_string_first_final) {
return p + 1;
} else {
return NULL;
}
}
static VALUE LuaTable_string_unescape(VALUE result, char *string, char *stringEnd)
{
char *p = string, *pe = string, *unescape;
int unescape_len;
char buf[4];
while (pe < stringEnd) {
if (*pe == '\\') {
unescape = (char *) "?";
unescape_len = 1;
if (pe > p) rb_str_buf_cat(result, p, pe - p);
switch (*++pe) {
case 'n':
unescape = (char *) "\n";
break;
case 'r':
unescape = (char *) "\r";
break;
case 't':
unescape = (char *) "\t";
break;
case '"':
unescape = (char *) "\"";
break;
case '\\':
unescape = (char *) "\\";
break;
case 'b':
unescape = (char *) "\b";
break;
case 'f':
unescape = (char *) "\f";
break;
case 'u':
if (pe > stringEnd - 4) {
return Qnil;
} else {
UTF32 ch = unescape_unicode((unsigned char *) ++pe);
pe += 3;
if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
pe++;
if (pe > stringEnd - 6) return Qnil;
if (pe[0] == '\\' && pe[1] == 'u') {
UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
| (sur & 0x3FF));
pe += 5;
} else {
unescape = (char *) "?";
break;
}
}
unescape_len = convert_UTF32_to_UTF8(buf, ch);
unescape = buf;
}
break;
default:
p = pe;
continue;
}
rb_str_buf_cat(result, unescape, unescape_len);
p = ++pe;
} else {
pe++;
}
}
rb_str_buf_cat(result, p, pe - p);
return result;
}
static VALUE convert_encoding(VALUE source)
{
rb_encoding *enc = rb_enc_get(source);
if (enc == rb_ascii8bit_encoding()) {
if (OBJ_FROZEN(source)) {
source = rb_str_dup(source);
}
FORCE_UTF8(source);
} else {
source = rb_str_conv_enc(source, NULL, rb_utf8_encoding());
}
return source;
}
%%{
machine LuaTable;
write data;
include LuaTable_common;
action parse_assignation {
char *np = LuaTable_parse_assignation(lua, fpc, pe, &result, 0);
}
main := ignore* (
begin_assignation >parse_assignation
) ignore*;
}%%
/* action parse_value {
char *np = LuaTable_parse_value(lua, fpc, pe, &result);
if (np == NULL) { fhold; fbreak; } else fexec np;
} */
static VALUE cParser_parse(VALUE self, VALUE source)
{
source = convert_encoding(StringValue(source));
Lua_Parser *lua = malloc(sizeof(Lua_Parser));
lua->source = RSTRING_PTR(source);
lua->len = RSTRING_LEN(source);
VALUE result = rb_hash_new();
int cs;
char *p, *pe;
%% write init;
p = lua->source;
pe = p + lua->len;
%% write exec;
if (cs >= LuaTable_first_final && p == pe) {
return result;
} else {
rb_enc_raise(rb_utf8_encoding(), eParserError, "%u: unexpected token at '%s'", __LINE__, p);
return Qnil;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment