sergof/expression.c

## expression.c
/*
 * Copyright (c) 2017-2018 Sergej Reich
 *
 * This software is provided 'as-is', without any express or implied
 * warranty. In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 *    1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 *
 *    2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 *
 *    3. This notice may not be removed or altered from any source
 *    distribution.
 */

#include <ctype.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <setjmp.h>
#include <math.h>
#include <assert.h>
#ifdef _MSC_VER
    #include <malloc.h> /* alloca */
#endif
// TODO might need to include alloca.h for BSD

#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#define SSRE_X64
#endif

#include "expression.h"

#define arraycount(x) (sizeof(x) / sizeof(x[0]))

enum {
    /* binary operators */
    VMOP_ADD = 0,
    VMOP_SUB,
    VMOP_MUL,
    VMOP_DIV,
    VMOP_MOD,
    VMOP_POW,
    /* unary operators */
    VMOP_NEG,
    /* comparison */
    VMOP_CMPEQ,
    VMOP_CMPGT,
    VMOP_CMPGE,
    VMOP_CMPLT,
    VMOP_CMPLE,
    VMOP_ASSIGN,
    /* builtin functions */
    VMOP_SQRT,
    VMOP_POW_FUNC,
    VMOP_MOD_FUNC,
    VMOP_SIN,
    VMOP_COS,
    VMOP_TAN,
    VMOP_COUNT, /* number of vmpos */
    VMOP_FUNCTIONS_START = VMOP_SQRT,
    VMOP_FUNCTIONS_END = VMOP_TAN,
};
struct { char type,   num_args, precedence, associativity, string[16];} vmops[] = {
       {  VMOP_ADD,      2,         1,          'l',          "+"     },
       {  VMOP_SUB,      2,         1,          'l',          "-"     },
       {  VMOP_MUL,      2,         2,          'l',          "*"     },
       {  VMOP_DIV,      2,         2,          'l',          "/"     },
       {  VMOP_MOD,      2,         2,          'l',          "%"     },
       {  VMOP_POW,      2,         3,          'r',          "**"    },
       {  VMOP_NEG,      1,         4,          'r',          "-"     },

       {  VMOP_CMPEQ,    2,         1,          'l',          "=="    },
       {  VMOP_CMPGT,    2,         1,          'l',          ">"     },
       {  VMOP_CMPGE,    2,         1,          'l',          ">="    },
       {  VMOP_CMPLT,    2,         1,          'l',          "<"     },
       {  VMOP_CMPLE,    2,         1,          'l',          "<="    },

       {  VMOP_ASSIGN,   2,         0,          'r',          "="     },

       {  VMOP_SQRT,     1,        -1,          'l',         "sqrt"   },
       {  VMOP_POW_FUNC, 2,        -1,          'l',         "pow"    },
       {  VMOP_MOD_FUNC, 2,        -1,          'l',         "mod"    },
       {  VMOP_SIN,      1,        -1,          'l',         "sin"    },
       {  VMOP_COS,      1,        -1,          'l',         "cos"    },
       {  VMOP_TAN,      1,        -1,          'l',         "tan"    },
};
static_assert(arraycount(vmops) == VMOP_COUNT, "vmop enum and array not in sync");

/* either a packed instruction or a floating point constant */
enum {
    CODE_VAR = 0x1ff, /* sign = 0, exponent == 255, 1 mantissa bit = 1 */
    CODE_OP  = 0x3ff, /* sign = 1, exponent == 255, 1 mantissa bit = 1 */
};
typedef union {
    struct {
        uint32_t data : 22; /* opcode or variable index */
        uint32_t code : 10; /* CODE_VAR -> variable, CODE_OP -> opcode, else -> constant */
    };
    float f;
    uint32_t u;
} instruction;

enum {
    TOKEN_END,
    TOKEN_NUMBER,
    TOKEN_VARIABLE,
    TOKEN_OPERATOR,
    TOKEN_FUNCTION,
    TOKEN_PAREN_OPEN,
    TOKEN_PAREN_CLOSE,
    TOKEN_SEPARATOR,
    TOKEN_INVALID,
};
typedef struct {
    int type;
    instruction inst;
} token;
typedef struct {
    const char *pos;
    token curr;
} tokenizer;

typedef struct {
    uint32_t *bytecode;
    int bytecode_max;
    const char **constants;
    float *constant_values;
    int num_constants;
    const char **variables;
    int num_variables;
    int stack_count;
    tokenizer *tn;
    jmp_buf error_jmp;
} parse_context;

static inline int maxi(int a, int b) { return (a > b) ? a : b; }

static inline bool is_space(char c) { return (c == ' ' || c == '\t' || c == '\v' || c == '\f' || c == '\r'); }

static bool cmp_str_token(const char *s, const char *tstr, int tlen)
{
    int i;
    for (i = 0; i < tlen; i++) {
        if (s[i] == '\0' || tstr[i] != s[i])
            return false;
    }
    return s[i] == '\0';
}
static int find_token_in_string_table(const char **table, int num_entries, const char *tstr, int tlen)
{
    int index = -1;
    for (int i = 0; i < num_entries; i++) {
        if (cmp_str_token(table[i], tstr, tlen)) {
            index = i;
            break;
        }
    }
    return index;
}

/* used to distinguish between one and two character operators */
#define CASE2(c1, c2, op1, op2)     \
    case (c1):                      \
        t.type = TOKEN_OPERATOR;    \
        if (*pc->tn->pos == (c2)) { \
            t.inst.data = (op2);    \
            pc->tn->pos++;          \
        }                           \
        else {                      \
            t.inst.data = (op1);    \
        }                           \
        break;

/* we parse as much as possible here, since it makes the rest of the parsing simpler and we have the context anyway */
static token next_token(parse_context *pc)
{
    token t = {0};
    t.inst.code = CODE_OP;
    while (is_space(*pc->tn->pos)) pc->tn->pos++;
    const char *start  = pc->tn->pos;

    bool is_unary = pc->tn->curr.type == TOKEN_OPERATOR ||
                    pc->tn->curr.type == TOKEN_PAREN_OPEN ||
                    pc->tn->curr.type == TOKEN_END;
    char c = *pc->tn->pos++;
    switch (c) {
    case '+':
        if (is_unary) /* skip unary + */
            return next_token(pc);
        t.type = TOKEN_OPERATOR;
        t.inst.data = VMOP_ADD;
        break;
    case '-': t.type = TOKEN_OPERATOR; t.inst.data = (is_unary) ? VMOP_NEG : VMOP_SUB; break;
    CASE2('*', '*', VMOP_MUL, VMOP_POW)
    case '/': t.type = TOKEN_OPERATOR; t.inst.data = VMOP_DIV; break;
    case '^': t.type = TOKEN_OPERATOR; t.inst.data = VMOP_POW; break; // TODO remove this
    case '%': t.type = TOKEN_OPERATOR; t.inst.data = VMOP_MOD; break;
    CASE2('=', '=', VMOP_ASSIGN, VMOP_CMPEQ)
    CASE2('>', '=', VMOP_CMPGT, VMOP_CMPGE)
    CASE2('<', '=', VMOP_CMPLT, VMOP_CMPLE)
    case '(': t.type = TOKEN_PAREN_OPEN; break;
    case ')': t.type = TOKEN_PAREN_CLOSE; break;
    case ',': t.type = TOKEN_SEPARATOR; break;
    default:
        if (isalpha(c)) { /* identifier */
            c = *pc->tn->pos;
            while (isalnum(c) || c == '_') {
                c = *++pc->tn->pos;
            }
            int len = pc->tn->pos - start;
            int index = -1;
            for (size_t i = 0; i < arraycount(vmops); i++) {
                if (cmp_str_token(vmops[i].string, start, len)) {
                    t.type = TOKEN_FUNCTION;
                    t.inst.data = vmops[i].type;
                    index = 0;
                    break;
                }
            }
            if (index >= 0) {
                break;
            }
            else if ((index = find_token_in_string_table(pc->constants, pc->num_constants, start, len)) >= 0) {
                t.type = TOKEN_NUMBER;
                t.inst.f = pc->constant_values[index];
            }
            else if ((index = find_token_in_string_table(pc->variables, pc->num_variables, start, len)) >= 0) {
                t.type = TOKEN_VARIABLE;
                t.inst.code = CODE_VAR;
                t.inst.data = index;
            }
            else {
                t.type = TOKEN_INVALID; /* unknown identifier */
                //longjmp(pc->error_jmp, SSRE_ERROR_UNKNOWN_IDENTIFIER);
            }
            break;
        }
        if (isdigit(c)) {
            char *end;
            t.inst.f = strtof(pc->tn->pos-1, &end);
            pc->tn->pos = end;
            t.type = TOKEN_NUMBER;
            break;
        }
        if (c == '\0') {
            break;
        }
        //longjmp(pc->error_jmp, SSRE_ERROR_UNKNOWN_IDENTIFIER);
        t.type = TOKEN_INVALID; /* unknown identifier */
    }
    pc->tn->curr = t;
    return t;

}

#undef CASE2

static inline token curr_token(parse_context *pc)
{
    token t = pc->tn->curr;
    if (t.type == TOKEN_INVALID) /* we do this here because we don't always want to jump out of next_token() */
        longjmp(pc->error_jmp, SSRE_ERROR_UNKNOWN_IDENTIFIER);
    return t;
}

static bool is_token(parse_context *pc, int token_type)
{
    return curr_token(pc).type == token_type;
}
static bool expect_token(parse_context *pc, int token_type)
{
    if (curr_token(pc).type == token_type) {
        next_token(pc);
        return true;
    }
    return false;
}

static void write_bytecode(parse_context *pc, instruction inst)
{
    if (inst.code == CODE_OP)
        pc->stack_count -= (vmops[inst.data].num_args - 1);
    else /* variable or constant */
        pc->stack_count++;

    int i = pc->bytecode[0]++;
    if (i >= pc->bytecode_max)
        longjmp(pc->error_jmp, SSRE_ERROR_CODE_ARRAY_TOO_SMALL);

    pc->bytecode[1] = maxi(pc->bytecode[1], pc->stack_count);

    pc->bytecode[i+2] = inst.u; /* +2 to move past header */
}

static void parse_expression(parse_context *pc, int min_precedence);
static void parse_atom(parse_context *pc)
{
    token t = curr_token(pc);
    switch (t.type) {
    case TOKEN_NUMBER:
    case TOKEN_VARIABLE:
        write_bytecode(pc, t.inst);
        next_token(pc);
        break;
    case TOKEN_PAREN_OPEN:
        //next_token(pc);
        parse_expression(pc, 1);
        if (!expect_token(pc, TOKEN_PAREN_CLOSE))
                longjmp(pc->error_jmp, SSRE_ERROR_MISSING_CLOSE_PAREN);
        break;
    case TOKEN_PAREN_CLOSE:
        longjmp(pc->error_jmp, SSRE_ERROR_MISSING_OPEN_PAREN);
    case TOKEN_FUNCTION:
        next_token(pc);
        if (!is_token(pc, TOKEN_PAREN_OPEN))
            longjmp(pc->error_jmp, SSRE_ERROR_MISSING_OPEN_PAREN);
        if (vmops[t.inst.data].num_args > 0) {
            parse_expression(pc, 1);
            for (int i = 1; i < vmops[t.inst.data].num_args; i++) {
                if (!is_token(pc, TOKEN_SEPARATOR))
                    longjmp(pc->error_jmp, SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS);
                parse_expression(pc, 1);
            }
        }
        if (!expect_token(pc, TOKEN_PAREN_CLOSE)) {
            if (curr_token(pc).type == TOKEN_SEPARATOR)
                longjmp(pc->error_jmp, SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS);
            else
                longjmp(pc->error_jmp, SSRE_ERROR_MISSING_CLOSE_PAREN);
        }
        write_bytecode(pc, t.inst);
        break;
    case TOKEN_END:
        longjmp(pc->error_jmp, SSRE_ERROR_UNEXPECTED_SYMBOL);
    default:
        break;
    }
}
static void parse_expression(parse_context *pc, int min_precedence)
{
    next_token(pc);
    parse_atom(pc);
    token t = curr_token(pc);
    switch (t.type) {
    case TOKEN_OPERATOR:
        if (vmops[t.inst.data].num_args == 1) {
            //next_token(pc);
            parse_expression(pc, min_precedence);
            write_bytecode(pc, t.inst);
            break;
        }
        for (;;) {
            t = curr_token(pc);
            int p = vmops[t.inst.data].precedence;
            if (t.type != TOKEN_OPERATOR || p < min_precedence)
                break;

            //next_token(pc);
            if (vmops[t.inst.data].associativity == 'r')
                parse_expression(pc, p);
            else
                parse_expression(pc, p+1);
            write_bytecode(pc, t.inst);
        }
        break;
    //case TOKEN_END:
        //printf("expression token end\n");
        //return;
    default:
        break;
    }
}

#define OP1(op) stack[sp-1] = op stack[sp-1];
#define OP2(op) stack[sp-2] = stack[sp-2] op stack[sp-1]; sp--;
#define FUNC1(func) stack[sp-1] = func(stack[sp-1]);
#define FUNC2(func) stack[sp-2] = func(stack[sp-2], stack[sp-1]); sp--;
#define FUNC3(func) stack[sp-3] = func(stack[sp-3], stack[sp-2], stack[sp-1]); sp-=2;
static int eval_vmop(uint32_t op, float *stack, int sp)
{
    switch (op) {
    case (VMOP_ADD): OP2(+); break;
    case (VMOP_SUB): OP2(-); break;
    case (VMOP_MUL): OP2(*); break;
    case (VMOP_DIV): OP2(/); break;
    case (VMOP_MOD): FUNC2(fmodf); break;
    case (VMOP_POW): FUNC2(powf); break;
    case (VMOP_NEG): OP1(-); break;
    case (VMOP_SQRT): FUNC1(sqrtf); break;
    case (VMOP_POW_FUNC): FUNC2(powf); break;
    case (VMOP_MOD_FUNC): FUNC2(fmodf); break;
    case (VMOP_SIN): FUNC1(sinf); break;
    case (VMOP_COS): FUNC1(cosf); break;
    case (VMOP_TAN): FUNC1(tanf); break;
    default:
        assert(0); /* unknown opcode, should only happen if bytecode invalid */
        return sp;
    }
    return sp;
}

static void optimize(uint32_t *bytecode)
{
    // TODO detect madd/msub and maybe also mnadd/mnsub
    size_t stack_count = (size_t)bytecode[1]; /* second element is the number of max stack entries */
    float *stack = (float*)alloca(sizeof(float) * stack_count);
    for (;;) {
        int sp = 0; /* points to the next stack entry */
        int count = (int)bytecode[0]; /* first element is the number of instructions */
        stack[0] = 0.0f; /* default return value for 0 instructions */
        uint32_t *bc = bytecode+2; /* pointer to start of instructions for convenience */

        bool done = true;
        uint32_t stack_count = 1;
        uint32_t max_stack_count = 1;
        for (int i = 0; i < count; i++) {
            instruction inst;
            inst.u = bc[i];
            if (inst.code == CODE_VAR) {
                sp++;
                max_stack_count = maxi(max_stack_count, stack_count++);
            }
            else if (inst.code == CODE_OP) {
                sp = eval_vmop(inst.data, stack, sp);
                int nargs = vmops[inst.data].num_args;
                max_stack_count = maxi(max_stack_count, stack_count -= (nargs - 1));
                bool fold = true;
                for (int j = 1; j <= nargs; j++) { /* check if all preceding instructions were constants */
                    inst.u = bc[i-j];
                    if (inst.code == CODE_OP || inst.code == CODE_VAR)
                        fold = false;
                }
                if (fold) { /* all arguments constant, all our functions are pure -> replace code with constant */
                    inst.f = stack[sp-1];
                    bc[i-nargs] = inst.u;
                    memmove(&bc[i-nargs+1], &bc[i+1], sizeof(uint32_t) * (count-i-1));
                    bytecode[0] -= nargs; /* reduce instruction count */
                    done = false;
                    break;
                }
            }
            else { /* constant */
                stack[sp++] = inst.f;
                max_stack_count = maxi(max_stack_count, stack_count++);
            }
        }
        if (done) {
            bytecode[1] = max_stack_count;
            break;
        }
    }
}

int ssre_calc_code_size(const char **constants, float *constant_values, int num_constants,
                        const char **variables, int num_variables,
                        const char *expression_str)
{
    // FIXME adjust for x64
    int count = 2; /* header */

    if (expression_str == NULL || expression_str[0] == '\0')
        return count * sizeof(uint32_t);

    tokenizer tn = {expression_str, {0}};
    parse_context pc = {NULL, 0, constants, constant_values, num_constants, variables, num_variables, 0, &tn};
    for (;;) {
        token t = next_token(&pc);
        if (t.type == TOKEN_END)
            break;
        if (t.type == TOKEN_FUNCTION || t.type == TOKEN_NUMBER || t.type == TOKEN_OPERATOR || t.type == TOKEN_VARIABLE)
            count++;
    }

    return count * sizeof(uint32_t);
}

static int compile_bytecode(uint8_t *code, int code_max,
                                 const char **constants, float *constant_values, int num_constants,
                                 const char **variables, int num_variables,
                                 const char *expression_str)
{
    uint32_t *bytecode = (uint32_t*)code;
    int bytecode_max = code_max * sizeof(uint32_t) - 2; /* account for header */

    if (bytecode_max < 2) /* check if there's enough space for header */
        return SSRE_ERROR_CODE_ARRAY_TOO_SMALL;

    bytecode[0] = 0;
    bytecode[1] = 1; /* reserve at least one stack entry */

    if (expression_str == NULL || expression_str[0] == '\0')
        return SSRE_ERROR_NONE; /* empty expression, nothing to do */

    tokenizer tn = {expression_str, {0}};
    //parse_context pc = {bytecode, bytecode_max, constants, constant_values, num_constants, variables, num_variables, 0, &tn, {0}};
    parse_context pc = {bytecode, bytecode_max, constants, constant_values, num_constants, variables, num_variables, 0, &tn, {{{0}}}};
    int err = setjmp(pc.error_jmp); /* setjmp makes error handling much cleaner */
    if (err)
        return err;

    //next_token(&pc);
    parse_expression(&pc, 1);
    if (!is_token(&pc, TOKEN_END)) {
        if (is_token(&pc, TOKEN_PAREN_CLOSE))
            return SSRE_ERROR_MISSING_OPEN_PAREN;
        return SSRE_ERROR_UNEXPECTED_SYMBOL;
    }
    optimize(bytecode);
    return SSRE_ERROR_NONE;
}

#if !defined(SSRE_X64) || defined(SSRE_FORCE_VM)

/* the first bytecode entry stores the number of instructions and the second stores the needed stack count */
int ssre_compile(uint8_t *code, int code_max,
                 const char **constants, float *constant_values, int num_constants,
                 const char **variables, int num_variables,
                 const char *expression_str)
{
    return compile_bytecode(code, code_max, constants, constant_values, num_constants,
                            variables, num_variables, expression_str);
}

float ssre_evaluate(uint8_t *code, float *variables)
{
    uint32_t *bytecode = (uint32_t*)code;
    int sp = 0; /* points to the next stack entry */
    int count = (int)*bytecode++; /* first element is the number of instructions */
    size_t stack_count = (size_t)*bytecode++; /* second element is the number of max stack entries */
    float *stack = (float*)alloca(sizeof(float) * stack_count);
    stack[0] = 0.0f; /* default return value for 0 instructions */

    for (int i = 0; i < count; i++) {
        instruction inst;
        inst.u = bytecode[i];
        if (inst.code == CODE_VAR)
            stack[sp++] = variables[inst.data];
        else if (inst.code == CODE_OP)
            sp = eval_vmop(inst.data, stack, sp);
        else /* constant */
            stack[sp++] = inst.f;
    }
    return stack[0];
}

#elif defined(SSRE_X64)

static int bytecode_to_x64(uint8_t *code, int code_max, uint32_t *bytecode);

int ssre_compile(uint8_t *code, int code_max,
                        const char **constants, float *constant_values, int num_constants,
                        const char **variables, int num_variables,
                        const char *expression_str)
{
    /* We go through bytecode so we don't have to duplicate parsing and optimization code for now */
    int bytecode_size = ssre_calc_code_size(constants, constant_values, num_constants,
                                             variables, num_variables, expression_str);
    uint8_t *bytecode = (uint8_t*)alloca(bytecode_size);
    int err = compile_bytecode(bytecode, bytecode_size, constants, constant_values, num_constants,
                           variables, num_variables, expression_str);
    if (err != SSRE_ERROR_NONE)
        return err;

    return bytecode_to_x64(code, code_max, (uint32_t*)bytecode);
}

typedef float eval_func(float *variables, float *constants, uintptr_t *functions);

float ssre_evaluate(uint8_t *code, float *variables)
{
    // TODO use (RIP?) relative offsets so we don't have to pass constants and functions as arguments
    uint32_t constants_offset = *((uint32_t*)&code[0]);
    uint32_t functions_offset = *((uint32_t*)&code[4]);
    float *constants = (float*)&code[constants_offset];
    uintptr_t *functions = (uintptr_t*)&code[functions_offset];
    eval_func *f = (eval_func*)&code[8];
    return f(variables, constants, functions);
}

enum { /* special cases: RSP/R12: + sib (0x24), RBP/R13: rip + offset if indirect */
    RAX = 0, RCX = 1, RDX =  2, RBX =  3, RSP =  4, RBP =  5, RSI =  6, RDI =  7,
    R8  = 8, R9  = 9, R10 = 10, R11 = 11, R12 = 12, R13 = 13, R14 = 14, R15 = 15,

    // TODO use distinct values so we can differentiate gp and sse registers
    XMM0 = 0, XMM1 = 1, XMM2  =  2, XMM3  =  3, XMM4  =  4, XMM5  =  5, XMM6  =  6, XMM7  =  7,
    XMM8 = 8, XMM9 = 9, XMM10 = 10, XMM11 = 11, XMM12 = 12, XMM13 = 13, XMM14 = 14, XMM15 = 15,

#ifdef _WIN32 /* Windows calling convention */
    ARG0 = RCX, ARG1 = RDX, ARG2 =  R8, ARG3 =  R9,
#else /* System V calling convention */
    ARG0 = RDI, ARG1 = RSI, ARG2 = RDX, ARG3 = RCX,
#endif
    /* non-volatile registers for both calling conventions */
    VARS = RBX, CONSTS = R14, FUNCS = R15, NV3 = RBP
};

/* these two need to be kept in sync */
enum {
    OP_ADD1, OP_ADD4, OP_SUB1, OP_SUB4, OP_MOV, OP_CALL, OP_PUSH, OP_POP, OP_RET,
    OP_ADDSS, OP_SUBSS, OP_MULSS, OP_DIVSS,
    OP_MAXSS, OP_MINSS,
    OP_SQRTSS, OP_RSQRTSS,
    OP_ROUNDSS, /* sse 4.1 */
    OP_XORPS,
    OP_MOVSS_LOAD, OP_MOVSS_STORE,
};
static struct { uint8_t prefix; uint8_t opcode[4];} opcodes[] = {
    {0x00, {0x83, 0}}, /* add 1 byte immediate */
    {0x00, {0x81, 0}}, /* add 4 byte immediate */
    {0x00, {0x83, 0}}, /* sub 1 byte immediate */
    {0x00, {0x81, 0}}, /* sub 4 byte immediate */
    {0x00, {0x8b, 0}}, /* load (mov) */
    //{0x00, {0x89, 0}}, /* store (mov) */
    {0x00, {0xff, 0}}, /* call */
    {0x00, {0x50, 0}}, /* push */
    {0x00, {0x58, 0}}, /* pop */
    {0x00, {0xc3, 0}}, /* ret */

    {0xf3, {0x0f, 0x58, 0}}, /* addss */
    {0xf3, {0x0f, 0x5c, 0}}, /* subss */
    {0xf3, {0x0f, 0x59, 0}}, /* mulss */
    {0xf3, {0x0f, 0x5e, 0}}, /* divss */

    {0xf3, {0x0f, 0x5f, 0}}, /* maxss */
    {0xf3, {0x0f, 0x5d, 0}}, /* minss */

    {0xf3, {0x0f, 0x51, 0}}, /* sqrtss */
    {0xf3, {0x0f, 0x52, 0}}, /* rsqrtss */

    {0x66, {0x0f, 0x3a, 0x0a, 0}}, /* roundss */

    {0x00, {0x0f, 0x57, 0}}, /* xorps */

    {0xf3, {0x0f, 0x10, 0}}, /* loadss (movss) */
    {0xf3, {0x0f, 0x11, 0}}, /* storess (movss) */
};

static bool op_is_commutative(int op)
{
    if (op == OP_ADDSS || op == OP_MULSS || op == OP_MAXSS || op == OP_MINSS)
        return true;
    return false;
}

typedef struct {
    uint8_t *code;
    int code_max;
    int sp; /* points to the next stack entry */
    int ip; /* points to the next code array entry */
    int reg_sp; /* points to the next register stack entry index */
    int reg_sp_max; /* max number of register stack entries */
    jmp_buf error_jmp;
} codegen_context;

static void write8(codegen_context *C, uint8_t x)
{
    if ((C->ip+1) >= C->code_max)
        longjmp(C->error_jmp, SSRE_ERROR_CODE_ARRAY_TOO_SMALL);
    C->code[C->ip++] = x;
}
static void write32(codegen_context *C, uint32_t x)
{
    if ((C->ip+1) >= C->code_max)
        longjmp(C->error_jmp, SSRE_ERROR_CODE_ARRAY_TOO_SMALL);
    *((uint32_t*)&C->code[C->ip]) = x;
    C->ip += 4;
}
static void write64(codegen_context *C, uint64_t x)
{
    if ((C->ip+1) >= C->code_max)
        longjmp(C->error_jmp, SSRE_ERROR_CODE_ARRAY_TOO_SMALL);
    *((uint64_t*)&C->code[C->ip]) = x;
    C->ip += 8;
}

enum write_flag {
    WRITE_DIRECT = (1<<0),
    WRITE_MEM64 = (1<<1),
    WRITE_FORCE_OFFSET = (1<<2),
    WRITE_SIB = (1<<3),
};
/* if direct adressing is used rx == dst and rm == src registers, otherwise it depends on opcode
 * disp is either an offset or immediate operand */
static void write_instruction(codegen_context *C, int op, uint8_t rx, uint8_t rm, int32_t disp, uint8_t flag)
{
    if (opcodes[op].prefix)
        write8(C, opcodes[op].prefix);

    uint8_t rex = (flag & WRITE_MEM64) ? 0x08 : 0;
    rex |= ((rx & 0x8) >> 1);
    rex |= ((rm & 0x8) >> 3);
    if (rex) { rex |= 0x40; write8(C, rex); }

    uint8_t *b = opcodes[op].opcode;
    while (*b) {
        write8(C, *b);
        b++;
    }

    uint8_t modrm;
    if (flag & WRITE_DIRECT) {
        modrm = 0xc0; /* direct */
    }
    else {
        if (disp > INT8_MAX || disp < INT8_MIN)
            modrm = 0x80; /* indirect 4 byte offset */
        else if (disp != 0)
            modrm = 0x40; /* indirect 1 byte offset */
        else
            modrm = 0; /* indirect */
    }
    modrm |= ((rx & 0x7) << 3);
    modrm |= (rm & 0x7);
    write8(C, modrm);

    if (flag & WRITE_SIB) write8(C, 0x24);

    if (disp != 0 || flag & WRITE_FORCE_OFFSET) {
        if (disp <= INT8_MAX && disp >= INT8_MIN) {
            write8(C, disp);
        }
        else {
            write32(C, disp);
        }
    }
}
static void write_instruction1(codegen_context *C, int op, uint8_t reg)
{
    uint8_t rex = ((reg & 0x8) >> 3);
    if (rex) {
        rex |= 0x40;
        write8(C, rex);
    }
    write8(C, opcodes[op].opcode[0] + (reg & 0x7));
}

#define ADD(reg, val) write_instruction(C, (val > 127) ? OP_ADD4 : OP_ADD1, 0, reg, val, WRITE_DIRECT|WRITE_MEM64)
#define SUB(reg, val) write_instruction(C, (val > 127) ? OP_SUB4 : OP_SUB1, 5, reg, val, WRITE_DIRECT|WRITE_MEM64)
#define MOV(dst, src) write_instruction(C, OP_MOV, dst, src, 0, WRITE_DIRECT|WRITE_MEM64)
//#define STORE(dst, src) write_instruction(C, OP_MOV_STORE, src, dst, 0, WRITE_DIRECT|WRITE_MEM64)
#define CALL(reg, disp) write_instruction(C, OP_CALL, 2, reg, disp, 0)
#define PUSH(reg) write_instruction1(C, OP_PUSH, reg)
#define POP(reg) write_instruction1(C, OP_POP, reg)
#define RET() write8(C, opcodes[OP_RET].opcode[0])
#define ADDSS(dst, src) write_instruction(C, OP_ADDSS, dst, src, 0, WRITE_DIRECT)
#define SUBSS(dst, src) write_instruction(C, OP_SUBSS, dst, src, 0, WRITE_DIRECT)
#define MULSS(dst, src) write_instruction(C, OP_MULSS, dst, src, 0, WRITE_DIRECT)
#define DIVSS(dst, src) write_instruction(C, OP_DIVSS, dst, src, 0, WRITE_DIRECT)
#define MAXSS(dst, src) write_instruction(C, OP_MAXSS, dst, src, 0, WRITE_DIRECT)
#define MINSS(dst, src) write_instruction(C, OP_MINSS, dst, src, 0, WRITE_DIRECT)
#define ADDSS_MEM(dst, src, disp) write_instruction(C, OP_ADDSS, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define SUBSS_MEM(dst, src, disp) write_instruction(C, OP_SUBSS, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define MULSS_MEM(dst, src, disp) write_instruction(C, OP_MULSS, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define DIVSS_MEM(dst, src, disp) write_instruction(C, OP_DIVSS, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define MAXSS_MEM(dst, src, disp) write_instruction(C, OP_MAXSS, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define MINSS_MEM(dst, src, disp) write_instruction(C, OP_MINSS, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define SQRTSS(dst) write_instruction(C, OP_SQRTSS, dst, dst, 0, WRITE_DIRECT)
#define RSQRTSS(dst) write_instruction(C, OP_RSQRTSS, dst, dst, 0, WRITE_DIRECT)
#define FLOORSS(dst) write_instruction(C, OP_ROUNDSS, dst, dst, 1, WRITE_DIRECT)
#define CEILSS(dst) write_instruction(C, OP_ROUNDSS, dst, dst, 2, WRITE_DIRECT)
#define ROUNDSS(dst) write_instruction(C, OP_ROUNDSS, dst, dst, 0, WRITE_DIRECT|WRITE_FORCE_OFFSET)
#define XORPS(dst, src) write_instruction(C, OP_XORPS, dst, src, 0, WRITE_DIRECT)
#define MOVSS(dst, src) write_instruction(C, OP_MOVSS_LOAD, dst, src, 0, WRITE_DIRECT)
#define LOADSS(dst, src, disp) write_instruction(C, OP_MOVSS_LOAD, dst, src, disp, (src == RSP) ? WRITE_SIB : 0)
#define STORESS(dst, src, disp) write_instruction(C, OP_MOVSS_STORE, src, dst, disp, (dst == RSP) ? WRITE_SIB : 0)

enum { SE_VAR, SE_CONST, SE_RET, SE_STACK };
typedef struct {
    int type;
    int index;
} stack_entry;

static void check_args(int *ret_pos, codegen_context *C, stack_entry *stack, int num_args)
{
    assert(C->sp >= num_args);

    *ret_pos = -1;
    bool ret_not_used = true;
    for (int i = 0; i < num_args; i++) {
        if (stack[C->sp-i-1].type == SE_RET) {
            *ret_pos = num_args-i-1;
            ret_not_used = false;
            break;
        }
    }
    if (ret_not_used) {
        /* check if we have a return value and push it on the stack */
        bool found = false;
        for (int i = C->sp - num_args; i >= 0; i--) {
            if (stack[i].type == SE_RET) {
                stack[i].type = SE_STACK;
                stack[i].index = C->reg_sp;
                found = true;
                break;
            }
        }
        if (found) {
            STORESS(RSP, XMM0, C->reg_sp * sizeof(float));
            C->reg_sp++;
            C->reg_sp_max = maxi(C->reg_sp_max, C->reg_sp);
        }
    }
}

static int get_mem(codegen_context *C, stack_entry se)
{
    switch (se.type) {
    case SE_VAR:
        return VARS;
        break;
    case SE_CONST:
        return CONSTS;
        break;
    case SE_RET:
        return XMM0; // hmm...
        break;
    case SE_STACK:
        C->reg_sp--;
        return RSP;
        break;
    }
    assert(0);
    return -1;
}
static void load_reg(codegen_context *C, uint8_t reg, stack_entry se)
{
    int mem = get_mem(C, se);
    if (mem != XMM0) { /* xmm0 handled elsewhere */
        LOADSS(reg, mem, se.index * sizeof(float));
    }
}
static void prepare_func(codegen_context *C, stack_entry *stack, int num_args)
{
    int ret_pos;
    check_args(&ret_pos, C, stack, num_args);
    C->sp -= num_args;
    if (ret_pos > 0) { /* last return value is used in one of the registers */
        MOVSS(ret_pos, XMM0);
    }
    for (int i = 0; i < num_args; i++) {
        load_reg(C, i, stack[C->sp + i]);
    }
    stack[C->sp++].type = SE_RET;
}

#define WRITE_OP2(op) {                                           \
    int ret_pos;                                                  \
    check_args(&ret_pos, C, stack, 2);                            \
    stack_entry arg1 = stack[--C->sp];                            \
    stack_entry arg0 = stack[--C->sp];                            \
    if (arg0.type == SE_RET) {                                    \
        int mem = get_mem(C, arg1);                               \
        op##_MEM(XMM0, mem, arg1.index * sizeof(float));          \
    }                                                             \
    else if (arg1.type == SE_RET && op_is_commutative(OP_##op)) { \
        int mem = get_mem(C, arg0);                               \
        op##_MEM(XMM0, mem, arg0.index * sizeof(float));          \
    }                                                             \
    else if (arg1.type == SE_RET) {                               \
        int mem = get_mem(C, arg0);                               \
        LOADSS(XMM1, mem, arg0.index * sizeof(float));            \
        op(XMM1, XMM0);                                           \
        MOVSS(XMM0, XMM1);                                        \
    }                                                             \
    else {                                                        \
        load_reg(C, XMM0, arg0);                                  \
        int mem = get_mem(C, arg1);                               \
        op##_MEM(XMM0, mem, arg1.index * sizeof(float));          \
    }                                                             \
    stack[C->sp++].type = SE_RET;                                 \
}

static int align_stack_size(int size)
{
    int mod = size % 16; // size & 15
    return mod ? size + (16 - mod) : size;
}
static int add_constant(float *constants, int *num_constants, float f)
{
    for (int i = 0; i < *num_constants; i++) {
        if (constants[i] == f)
            return i;
    }
    /* value not found, add new */
    int r = (*num_constants)++;
    constants[r] = f;
    return r;
}

/* code aray layout: [4 byte constant_offset|4 byte function_offset|code|constants|functions] */
static int bytecode_to_x64(uint8_t *code, int code_max, uint32_t *bytecode)
{
    codegen_context c = {code, code_max, 0, 0, 0, 0, {{{0}}}};
    codegen_context *C = &c;

    int err = setjmp(C->error_jmp); /* setjmp makes error handling much cleaner */
    if (err)
        return err;

    int count = (int)*bytecode++; /* first element is the number of instructions */
    size_t stack_count = (size_t)*bytecode++; /* second element is the number of max stack entries */
    stack_entry *stack = (stack_entry*)alloca(sizeof(stack_entry) * stack_count);

    /* count number of constants and functions */
    int max_constants = 0;
    int max_functions = 0;
    for (int i = 0; i < count; i++) {
        instruction inst;
        inst.u = bytecode[i];
        if (inst.code == CODE_OP) {
                // TODO add vmop_is_function() ?
                if (inst.data == VMOP_MOD || inst.data == VMOP_POW ||
                    inst.data == VMOP_POW_FUNC || inst.data == VMOP_MOD_FUNC ||
                    inst.data == VMOP_SIN || inst.data == VMOP_COS || inst.data == VMOP_TAN) {
                    max_functions++;
                }
        }
        else if (inst.code != CODE_VAR) {
            max_constants++;
        }
    }

    int num_constants = 0;
    float *constants = (float*)alloca(sizeof(float) * (max_constants + 1));
    //int num_functions = 0;
    //float *functions = (uintptr_t*)alloca(sizeof(uintptr_t) * (max_functions));

    enum { SIN_OFFSET = 0, COS_OFFSET = 8, TAN_OFFSET = 16, POW_OFFSET = 24, MOD_OFFSET = 32 };
    uintptr_t functions[] = {(uintptr_t)sinf, (uintptr_t)cosf, (uintptr_t)tanf, (uintptr_t)powf, (uintptr_t)fmodf};

    if (count == 0) { /* empty expression, return 0 */
        write32(C, 8 + 4); /* header + 4 bytes of code */
        write32(C, 0); /* no functions */
        XORPS(XMM0, XMM0);
        RET();
        return SSRE_ERROR_NONE;
    }
    else if (count == 1) {
        write32(C, 8 + 5); /* header + 5 bytes of code */
        write32(C, 0); /* no functions */
        instruction inst;
        inst.u = bytecode[0];
        if (inst.code == CODE_VAR) {
            LOADSS(XMM0, ARG0, inst.data * sizeof(float));
            RET();
        }
        else { /* constant expression */
            LOADSS(XMM0, ARG1, 0);
            RET();
            write32(C, inst.u);
        }
        return SSRE_ERROR_NONE;
    }

    C->ip += 8; /* reserve space for header */

    /* back up function arguments in non-volatile registers, so we don't lose them when calling other functions */
    PUSH(VARS);
    PUSH(CONSTS);
    PUSH(FUNCS);
    MOV(VARS, ARG0);
    MOV(CONSTS, ARG1);
    MOV(FUNCS, ARG2);

    int stack_space_pos = C->ip; /* remember where to put stack space reservation */

    for (int i = 0; i < count; i++) {
        instruction inst;
        inst.u = bytecode[i];
        if (inst.code == CODE_VAR) {
            stack_entry se;
            se.type = SE_VAR;
            se.index = inst.data;
            stack[C->sp++] = se;
        }
        else if (inst.code == CODE_OP) {
            switch (inst.data) {
            case (VMOP_ADD): WRITE_OP2(ADDSS); break;
            case (VMOP_SUB): WRITE_OP2(SUBSS); break;
            case (VMOP_MUL): WRITE_OP2(MULSS); break;
            case (VMOP_DIV): WRITE_OP2(DIVSS); break;
            case (VMOP_MOD): prepare_func(C, stack, 2); CALL(FUNCS, MOD_OFFSET); break;
            case (VMOP_POW): prepare_func(C, stack, 2); CALL(FUNCS, POW_OFFSET); break;
            case (VMOP_NEG): {
                int ret_pos;
                check_args(&ret_pos, C, stack, 1);
                int sign_mask_index = add_constant(constants, &num_constants, -0.0f);
                load_reg(C, XMM0, stack[C->sp-1]);
                LOADSS(XMM1, CONSTS, sign_mask_index * sizeof(float));
                XORPS(XMM0, XMM1);
                stack[C->sp-1].type = SE_RET;
                } break;
            case (VMOP_SQRT): load_reg(C, XMM0, stack[C->sp-1]); SQRTSS(XMM0); stack[C->sp-1].type = SE_RET; break;
            case (VMOP_POW_FUNC): prepare_func(C, stack, 2); CALL(FUNCS, POW_OFFSET); break;
            case (VMOP_MOD_FUNC): prepare_func(C, stack, 2); CALL(FUNCS, MOD_OFFSET); break;
            case (VMOP_SIN): prepare_func(C, stack, 1); CALL(FUNCS, SIN_OFFSET); break;
            case (VMOP_COS): prepare_func(C, stack, 1); CALL(FUNCS, COS_OFFSET); break;
            case (VMOP_TAN): prepare_func(C, stack, 1); CALL(FUNCS, TAN_OFFSET); break;
            }
        }
        else { /* constant */
            stack_entry se;
            se.type = SE_CONST;
            se.index = add_constant(constants, &num_constants, inst.f);
            stack[C->sp++] = se;
        }
    }

    #ifdef _WIN32 // TODO look into if this is really enough
    int so = 32; /* stack offset to account for shadow space */
    #else
    int so = 0;
    #endif

    int stack_space = align_stack_size(C->reg_sp_max * sizeof(float) + so);

    if (stack_space)
        ADD(RSP, stack_space);
    POP(FUNCS);
    POP(CONSTS);
    POP(VARS);

    RET();

    int constants_offset = C->ip;
    for (int i = 0; i < num_constants; i++) { /* write constants */
        write32(C, *(uint32_t*)(&constants[i]));
    }
    int functions_offset = C->ip;
    if (max_functions) { /* write function pointers */
        for (int i = 0; i < (int)arraycount(functions); i++) { // TODO only write used functions
            write64(C, functions[i]);
        }
    }

    int stack_space_offset = 0;
    if (stack_space) { /* write stack space reservation now that we know how much we need */
        int len = C->ip - stack_space_pos;
        C->ip = stack_space_pos;
        stack_space_offset = (stack_space > 127) ? 7 : 4;
        // FIXME check if code array has enough space to fit in len bytes
        memmove(code + C->ip + stack_space_offset, code + C->ip, len);
        SUB(RSP, stack_space);
    }

    C->ip = 0;
    write32(C, constants_offset + stack_space_offset);
    write32(C, functions_offset + stack_space_offset);

    return SSRE_ERROR_NONE;
}

#endif

## expression.h
/*
 * Copyright (c) 2017-2018 Sergej Reich
 *
 * This software is provided 'as-is', without any express or implied
 * warranty. In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 *    1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 *
 *    2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 *
 *    3. This notice may not be removed or altered from any source
 *    distribution.
 */

/* Mathematical expression compiler and evaluator
 * Small, staightforward C code, doesn't use heap allocation.
 * Generates x64 machine code or virtual machine bytecode.
 */

#pragma once

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

enum { /* only error code, we could also return more context (symbol, position) if needed */
    SSRE_ERROR_NONE = 0,
    SSRE_ERROR_CODE_ARRAY_TOO_SMALL = -1,
    SSRE_ERROR_UNKNOWN_IDENTIFIER = -2,
    SSRE_ERROR_UNEXPECTED_SYMBOL = -3,
    SSRE_ERROR_MISSING_OPEN_PAREN = -4,
    SSRE_ERROR_MISSING_CLOSE_PAREN = -5,
    SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS = -6,
    SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS = -7,
};

/* does a dry run of the parser to calculate the size of the code array, doesn't check errors */
int ssre_calc_code_size(const char **constants, float *constant_values, int num_constants,
                        const char **variables, int num_variables,
                        const char *expression_str);


/* Fills code array with vm bytecode or machine code if supported.
 * Will return error if array too small or expression invalid, SSRE_ERROR_NONE on success.
 * If expression_str is empty, generated bytecode will evaluate to 0 */
int ssre_compile(uint8_t *code, int code_max,
                 const char **constants, float *constant_values, int num_constants,
                 const char **variables, int num_variables,
                 const char *expression_str);

/* variable_values has to be in sync with the variables array passed to the compiler
 * code array needs to be in executable memory if machine code is used */
float ssre_evaluate(uint8_t *code, float *variable_values);

#ifdef __cplusplus
}
#endif

## expression_test.c
#if 0
cc ${0} -O3 -Wall -Wextra -Wno-missing-field-initializers -lm -o _script_
./_script_ "${@}"
ret=$?
rm _script_
exit $ret
#endif


#include <stdio.h>
#include <math.h>

#include "expression.h"
#include "expression.c"

float pi = 3.14159265358979323846f;
float e = 2.71828182845904523536f;
float a = 1.1f;
float b = 2.2f;
float c = 3.3f;
float d = 4.4f;
float x = 2.123456f;
float y = 3.123456f;
float z = 4.123456f;
float w = 5.123456f;
const char *constants[] = {"pi", "e"};
float constant_values[] = {3.14159265358979323846f, 2.71828182845904523536f};
const char *variables[] = {"a", "b", "c", "d", "x", "y", "z", "w", "sqrt", "a3"};
float variable_values[] = {1.1f, 2.2f, 3.3f, 4.4f, 2.123456f, 3.123456f, 4.123456f, 5.123456f, 3.1f, 33.456f};
bool expect_error(int err, int expected, int line)
{
    if (err != expected) {
        switch(err) {
        case SSRE_ERROR_NONE:
            printf("Line: %i: error: SSRE_ERROR_NONE", line);
            break;
        case SSRE_ERROR_CODE_ARRAY_TOO_SMALL:
            printf("Line: %i: error: SSRE_ERROR_CODE_ARRAY_TOO_SMALL", line);
            break;
        case SSRE_ERROR_UNKNOWN_IDENTIFIER:
            printf("Line: %i: error: SSRE_ERROR_UNKNOWN_IDENTIFIER", line);
            break;
        case SSRE_ERROR_UNEXPECTED_SYMBOL:
            printf("Line: %i: error: SSRE_ERROR_UNEXPECTED_SYMBOL", line);
            break;
        case SSRE_ERROR_MISSING_OPEN_PAREN:
            printf("Line: %i: error: SSRE_ERROR_MISSING_OPEN_PAREN", line);
            break;
        case SSRE_ERROR_MISSING_CLOSE_PAREN:
            printf("Line: %i: error: SSRE_ERROR_MISSING_CLOSE_PAREN", line);
            break;
        case SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS:
            printf("Line: %i: error: SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS", line);
            break;
        case SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS:
            printf("Line: %i: error: SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS", line);
            break;
        }
        return false;
    }
    return true;
}

void print_x64_code(uint8_t *code)
{
    for (int i = 8;; i++) {
        printf("%02x ", code[i]);
        if (code[i] == 0xc3) /* ret */
            break;
    }
    printf("\n");
}

void parser_test(uint8_t *code, int code_max, const char *expression_str, int expected_error, int line)
{
    int err = ssre_compile(code, code_max,
                           constants, constant_values, arraycount(constants),
                           variables, arraycount(variables),
                           expression_str);
    if (!expect_error(err, expected_error, line)) {
        printf(" in test \"%s\"\n", expression_str);
        abort();
    }
}

void eval_test(uint8_t *code, int code_max, const char *expression_str, float expected_value, int line)
{
    int err = ssre_compile(code, code_max,
                           constants, constant_values, arraycount(constants),
                           variables, arraycount(variables),
                           expression_str);
    if (!expect_error(err, SSRE_ERROR_NONE, line)) {
        printf(" in test \"%s\"\n", expression_str);
        abort();
    }

    float value = ssre_evaluate(code, variable_values);
    if (value != expected_value) {
        printf("Line: %i: test \"%s\" failed, expected: %f, got: %f\n", line, expression_str, expected_value, value);
        abort();
    }
    //else {
        //printf("Test passed, %s = %f = %f\n", expression_str, value, expected_value);
    //}
}

#define EXPECT_ERROR(error, expected) expect_error(error, expected, __LINE__)
#define PARSER_TEST(expression, error) parser_test(code, code_max, expression, error, __LINE__)
#define EVAL_TEST(expression, expected) eval_test(code, code_max, expression, expected, __LINE__)
#define EVAL_TEST1(expression) EVAL_TEST(#expression, expression)

void run_tests(uint8_t *code, int code_max)
{
    PARSER_TEST("bs", SSRE_ERROR_UNKNOWN_IDENTIFIER);
    PARSER_TEST("b)", SSRE_ERROR_MISSING_OPEN_PAREN);
    PARSER_TEST("(b", SSRE_ERROR_MISSING_CLOSE_PAREN);
    PARSER_TEST("+", SSRE_ERROR_UNEXPECTED_SYMBOL);
    PARSER_TEST("b+", SSRE_ERROR_UNEXPECTED_SYMBOL);
    PARSER_TEST("pow(1)", SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS);
    PARSER_TEST("sqrt(1,1)", SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS);

    EVAL_TEST("", 0.0f);
    EVAL_TEST(NULL, 0.0f);

    EVAL_TEST("1", 1.0f);
    EVAL_TEST("(1)", 1.0f);
    EVAL_TEST("a", 1.1f);
    EVAL_TEST("b", 2.2f);
    EVAL_TEST("((((((((((a))))))))))", 1.1f);
    EVAL_TEST("+a", 1.1f);
    EVAL_TEST("++a", 1.1f);
    EVAL_TEST("-a", -1.1f);
    EVAL_TEST("--a", 1.1f);
    EVAL_TEST("---a", -1.1f);
    EVAL_TEST("1---1", 0.0f);
    EVAL_TEST("a---a", 0.0f);
    EVAL_TEST("(1.0+-a)", (1.0f+-a));
    EVAL_TEST("a-a+-a", a-a+-a);
    EVAL_TEST("a+b*c", a+b*c);
    EVAL_TEST("(a+b)*c", (a+b)*c);
    EVAL_TEST("a-b/c", a-b/c);
    EVAL_TEST("(a-b)/c", (a-b)/c);
    EVAL_TEST("a-(b+c)", a-(b+c));
    EVAL_TEST("a/(b+c)", a/(b+c));
    EVAL_TEST("((x/y)-(50.0*w))", ((x/y)-(50.0f*w)));
    EVAL_TEST("a/pow(b, c)", a/powf(b, c));
    EVAL_TEST("a**b**c", powf(a, powf(b, c)));
    EVAL_TEST("a*a**b*b**c*c", a*powf(a, b)*powf(b, c)*c);
    EVAL_TEST("sqrt(b)", sqrtf(b));
    EVAL_TEST("sqrt(sqrt(sqrt(b)))", sqrtf(sqrtf(sqrtf(b))));

    EVAL_TEST("((2*a-b+(d-0.4)/2)*0.5+((w-x)-z+-x-1))/2", ((2.0f*a-b+(d-0.4f)/2.0f)*0.5f+((w-x)-z+-x-1))/2.0f);

    EVAL_TEST("x/y/(((x-y)+(7.321+w))-(x*y))", x/y/(((x-y)+(7.321f+w))-(x*y)));

    EVAL_TEST("((((((x-(y/(7.123*w)))/(((x-y)*7.123)-w))/((((x+y)*7.123)-w)-((x+y)-(7.123*w))))/(((((x-y)*7.123)-w)*((x+y)-(7.123/w)))*(((x+y)-(7.123*w))+((x/y)+(7.123+w)))))/((((x-((y-7.321)*w))+((x/y)-(7.321*w)))+(((x/y)-(7.321/w))*((x-y)+(7.321+w))))+((((x/y)-(7.321*w))/((x-y)*(7.321+w)))/(((x-y)+(7.321+w))-((x*y)*(7.321+w)))))))", ((((((x-(y/(7.123f*w)))/(((x-y)*7.123f)-w))/((((x+y)*7.123f)-w)-((x+y)-(7.123f*w))))/(((((x-y)*7.123f)-w)*((x+y)-(7.123f/w)))*(((x+y)-(7.123f*w))+((x/y)+(7.123f+w)))))/((((x-((y-7.321f)*w))+((x/y)-(7.321f*w)))+(((x/y)-(7.321f/w))*((x-y)+(7.321f+w))))+((((x/y)-(7.321f*w))/((x-y)*(7.321f+w)))/(((x-y)+(7.321f+w))-((x*y)*(7.321f+w))))))));

    EVAL_TEST("(b-cos(sin(tan(cos(((sin(((((sin(((((3.70+(2.48+sin((b/(sin(((3.46*sin(sin(((cos((cos(sin(((tan(((((0.74*((sin((0.10-((cos(cos((tan(a)/a)))+2.68)-b)))*b)*b))+pi)-2.83)-b))+a)+pi)))*0.89))/pi)+b))))+3.72))*a)))))+e)+e)/pi))*1.03)*2.24)+b)/a))/pi)+1.53))))))", (b-cosf(sinf(tanf(cosf(((sinf(((((sinf(((((3.70f+(2.48f+sinf((b/(sinf(((3.46f*sinf(sinf(((cosf((cosf(sinf(((tanf(((((0.74f*((sinf((0.10f-((cosf(cosf((tanf(a)/a)))+2.68f)-b)))*b)*b))+pi)-2.83f)-b))+a)+pi)))*0.89f))/pi)+b))))+3.72f))*a)))))+e)+e)/pi))*1.03f)*2.24f)+b)/a))/pi)+1.53f)))))));

    /* constant expressions */
    EVAL_TEST("(2.2-cos(sin(tan(cos(((sin(((((sin(((((3.70+(2.48+sin((2.2/(sin(((3.46*sin(sin(((cos((cos(sin(((tan(((((0.74*((sin((0.10-((cos(cos((tan(1.1)/1.1)))+2.68)-2.2)))*2.2)*2.2))+pi)-2.83)-2.2))+1.1)+pi)))*0.89))/pi)+2.2))))+3.72))*1.1)))))+e)+e)/pi))*1.03)*2.24)+2.2)/1.1))/pi)+1.53))))))", (2.2f-cosf(sinf(tanf(cosf(((sinf(((((sinf(((((3.70f+(2.48f+sinf((2.2f/(sinf(((3.46f*sinf(sinf(((cosf((cosf(sinf(((tanf(((((0.74f*((sinf((0.10f-((cosf(cosf((tanf(1.1f)/1.1f)))+2.68f)-2.2f)))*2.2f)*2.2f))+pi)-2.83f)-2.2f))+1.1f)+pi)))*0.89f))/pi)+2.2f))))+3.72f))*1.1f)))))+e)+e)/pi))*1.03f)*2.24f)+2.2f)/1.1f))/pi)+1.53f)))))));

    EVAL_TEST("(1.1+0.9)/2-1", (1.1f+0.9f)/2.0f-1.0f);

    EVAL_TEST("a+b+1*2-4", a+b+1*2-4);

    EVAL_TEST("a+b*2/sin(c)", a+b*2/sin(c));

    printf("All tests passed\n");
}

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#else
#include <sys/mman.h>
#endif

int main()
{
#ifdef _WIN32
    uint8_t *code = (uint8_t*)VirtualAlloc(NULL, 4096, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
    uint8_t *code = (uint8_t*)mmap(NULL, 4096, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
#endif
    run_tests(code, 4096);
    return 0;
}
	/*
	* Copyright (c) 2017-2018 Sergej Reich
	*
	* This software is provided 'as-is', without any express or implied
	* warranty. In no event will the authors be held liable for any damages
	* arising from the use of this software.
	*
	* Permission is granted to anyone to use this software for any purpose,
	* including commercial applications, and to alter it and redistribute it
	* freely, subject to the following restrictions:
	*
	* 1. The origin of this software must not be misrepresented; you must not
	* claim that you wrote the original software. If you use this software
	* in a product, an acknowledgment in the product documentation would be
	* appreciated but is not required.
	*
	* 2. Altered source versions must be plainly marked as such, and must not be
	* misrepresented as being the original software.
	*
	* 3. This notice may not be removed or altered from any source
	* distribution.
	*/

	/* Mathematical expression compiler and evaluator
	* Small, staightforward C code, doesn't use heap allocation.
	* Generates x64 machine code or virtual machine bytecode.
	*/

	#pragma once

	#include <stdint.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	enum { /* only error code, we could also return more context (symbol, position) if needed */
	SSRE_ERROR_NONE = 0,
	SSRE_ERROR_CODE_ARRAY_TOO_SMALL = -1,
	SSRE_ERROR_UNKNOWN_IDENTIFIER = -2,
	SSRE_ERROR_UNEXPECTED_SYMBOL = -3,
	SSRE_ERROR_MISSING_OPEN_PAREN = -4,
	SSRE_ERROR_MISSING_CLOSE_PAREN = -5,
	SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS = -6,
	SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS = -7,
	};

	/* does a dry run of the parser to calculate the size of the code array, doesn't check errors */
	int ssre_calc_code_size(const char *constants, float constant_values, int num_constants,
	const char **variables, int num_variables,
	const char *expression_str);


	/* Fills code array with vm bytecode or machine code if supported.
	* Will return error if array too small or expression invalid, SSRE_ERROR_NONE on success.
	* If expression_str is empty, generated bytecode will evaluate to 0 */
	int ssre_compile(uint8_t *code, int code_max,
	const char *constants, float constant_values, int num_constants,
	const char **variables, int num_variables,
	const char *expression_str);

	/* variable_values has to be in sync with the variables array passed to the compiler
	* code array needs to be in executable memory if machine code is used */
	float ssre_evaluate(uint8_t code, float variable_values);

	#ifdef __cplusplus
	}
	#endif
	#if 0
	cc ${0} -O3 -Wall -Wextra -Wno-missing-field-initializers -lm -o _script_
	./_script_ "${@}"
	ret=$?
	rm _script_
	exit $ret
	#endif


	#include <stdio.h>
	#include <math.h>

	#include "expression.h"
	#include "expression.c"

	float pi = 3.14159265358979323846f;
	float e = 2.71828182845904523536f;
	float a = 1.1f;
	float b = 2.2f;
	float c = 3.3f;
	float d = 4.4f;
	float x = 2.123456f;
	float y = 3.123456f;
	float z = 4.123456f;
	float w = 5.123456f;
	const char *constants[] = {"pi", "e"};
	float constant_values[] = {3.14159265358979323846f, 2.71828182845904523536f};
	const char *variables[] = {"a", "b", "c", "d", "x", "y", "z", "w", "sqrt", "a3"};
	float variable_values[] = {1.1f, 2.2f, 3.3f, 4.4f, 2.123456f, 3.123456f, 4.123456f, 5.123456f, 3.1f, 33.456f};
	bool expect_error(int err, int expected, int line)
	{
	if (err != expected) {
	switch(err) {
	case SSRE_ERROR_NONE:
	printf("Line: %i: error: SSRE_ERROR_NONE", line);
	break;
	case SSRE_ERROR_CODE_ARRAY_TOO_SMALL:
	printf("Line: %i: error: SSRE_ERROR_CODE_ARRAY_TOO_SMALL", line);
	break;
	case SSRE_ERROR_UNKNOWN_IDENTIFIER:
	printf("Line: %i: error: SSRE_ERROR_UNKNOWN_IDENTIFIER", line);
	break;
	case SSRE_ERROR_UNEXPECTED_SYMBOL:
	printf("Line: %i: error: SSRE_ERROR_UNEXPECTED_SYMBOL", line);
	break;
	case SSRE_ERROR_MISSING_OPEN_PAREN:
	printf("Line: %i: error: SSRE_ERROR_MISSING_OPEN_PAREN", line);
	break;
	case SSRE_ERROR_MISSING_CLOSE_PAREN:
	printf("Line: %i: error: SSRE_ERROR_MISSING_CLOSE_PAREN", line);
	break;
	case SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS:
	printf("Line: %i: error: SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS", line);
	break;
	case SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS:
	printf("Line: %i: error: SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS", line);
	break;
	}
	return false;
	}
	return true;
	}

	void print_x64_code(uint8_t *code)
	{
	for (int i = 8;; i++) {
	printf("%02x ", code[i]);
	if (code[i] == 0xc3) /* ret */
	break;
	}
	printf("\n");
	}

	void parser_test(uint8_t code, int code_max, const char expression_str, int expected_error, int line)
	{
	int err = ssre_compile(code, code_max,
	constants, constant_values, arraycount(constants),
	variables, arraycount(variables),
	expression_str);
	if (!expect_error(err, expected_error, line)) {
	printf(" in test \"%s\"\n", expression_str);
	abort();
	}
	}

	void eval_test(uint8_t code, int code_max, const char expression_str, float expected_value, int line)
	{
	int err = ssre_compile(code, code_max,
	constants, constant_values, arraycount(constants),
	variables, arraycount(variables),
	expression_str);
	if (!expect_error(err, SSRE_ERROR_NONE, line)) {
	printf(" in test \"%s\"\n", expression_str);
	abort();
	}

	float value = ssre_evaluate(code, variable_values);
	if (value != expected_value) {
	printf("Line: %i: test \"%s\" failed, expected: %f, got: %f\n", line, expression_str, expected_value, value);
	abort();
	}
	//else {
	//printf("Test passed, %s = %f = %f\n", expression_str, value, expected_value);
	//}
	}

	#define EXPECT_ERROR(error, expected) expect_error(error, expected, __LINE__)
	#define PARSER_TEST(expression, error) parser_test(code, code_max, expression, error, __LINE__)
	#define EVAL_TEST(expression, expected) eval_test(code, code_max, expression, expected, __LINE__)
	#define EVAL_TEST1(expression) EVAL_TEST(#expression, expression)

	void run_tests(uint8_t *code, int code_max)
	{
	PARSER_TEST("bs", SSRE_ERROR_UNKNOWN_IDENTIFIER);
	PARSER_TEST("b)", SSRE_ERROR_MISSING_OPEN_PAREN);
	PARSER_TEST("(b", SSRE_ERROR_MISSING_CLOSE_PAREN);
	PARSER_TEST("+", SSRE_ERROR_UNEXPECTED_SYMBOL);
	PARSER_TEST("b+", SSRE_ERROR_UNEXPECTED_SYMBOL);
	PARSER_TEST("pow(1)", SSRE_ERROR_TOO_FEW_FUNCTION_ARGUMENTS);
	PARSER_TEST("sqrt(1,1)", SSRE_ERROR_TOO_MANY_FUNCTION_ARGUMENTS);

	EVAL_TEST("", 0.0f);
	EVAL_TEST(NULL, 0.0f);

	EVAL_TEST("1", 1.0f);
	EVAL_TEST("(1)", 1.0f);
	EVAL_TEST("a", 1.1f);
	EVAL_TEST("b", 2.2f);
	EVAL_TEST("((((((((((a))))))))))", 1.1f);
	EVAL_TEST("+a", 1.1f);
	EVAL_TEST("++a", 1.1f);
	EVAL_TEST("-a", -1.1f);
	EVAL_TEST("--a", 1.1f);
	EVAL_TEST("---a", -1.1f);
	EVAL_TEST("1---1", 0.0f);
	EVAL_TEST("a---a", 0.0f);
	EVAL_TEST("(1.0+-a)", (1.0f+-a));
	EVAL_TEST("a-a+-a", a-a+-a);
	EVAL_TEST("a+bc", a+bc);
	EVAL_TEST("(a+b)c", (a+b)c);
	EVAL_TEST("a-b/c", a-b/c);
	EVAL_TEST("(a-b)/c", (a-b)/c);
	EVAL_TEST("a-(b+c)", a-(b+c));
	EVAL_TEST("a/(b+c)", a/(b+c));
	EVAL_TEST("((x/y)-(50.0w))", ((x/y)-(50.0fw)));
	EVAL_TEST("a/pow(b, c)", a/powf(b, c));
	EVAL_TEST("abc", powf(a, powf(b, c)));
	EVAL_TEST("aabb*cc", apowf(a, b)powf(b, c)*c);
	EVAL_TEST("sqrt(b)", sqrtf(b));
	EVAL_TEST("sqrt(sqrt(sqrt(b)))", sqrtf(sqrtf(sqrtf(b))));

	EVAL_TEST("((2a-b+(d-0.4)/2)0.5+((w-x)-z+-x-1))/2", ((2.0fa-b+(d-0.4f)/2.0f)0.5f+((w-x)-z+-x-1))/2.0f);

	EVAL_TEST("x/y/(((x-y)+(7.321+w))-(xy))", x/y/(((x-y)+(7.321f+w))-(xy)));

	EVAL_TEST("((((((x-(y/(7.123w)))/(((x-y)7.123)-w))/((((x+y)7.123)-w)-((x+y)-(7.123w))))/(((((x-y)7.123)-w)((x+y)-(7.123/w)))(((x+y)-(7.123w))+((x/y)+(7.123+w)))))/((((x-((y-7.321)w))+((x/y)-(7.321w)))+(((x/y)-(7.321/w))((x-y)+(7.321+w))))+((((x/y)-(7.321w))/((x-y)(7.321+w)))/(((x-y)+(7.321+w))-((xy)(7.321+w)))))))", ((((((x-(y/(7.123fw)))/(((x-y)7.123f)-w))/((((x+y)7.123f)-w)-((x+y)-(7.123fw))))/(((((x-y)7.123f)-w)((x+y)-(7.123f/w)))(((x+y)-(7.123fw))+((x/y)+(7.123f+w)))))/((((x-((y-7.321f)w))+((x/y)-(7.321fw)))+(((x/y)-(7.321f/w))((x-y)+(7.321f+w))))+((((x/y)-(7.321fw))/((x-y)(7.321f+w)))/(((x-y)+(7.321f+w))-((xy)(7.321f+w))))))));

	EVAL_TEST("(b-cos(sin(tan(cos(((sin(((((sin(((((3.70+(2.48+sin((b/(sin(((3.46sin(sin(((cos((cos(sin(((tan(((((0.74((sin((0.10-((cos(cos((tan(a)/a)))+2.68)-b)))b)b))+pi)-2.83)-b))+a)+pi)))0.89))/pi)+b))))+3.72))a)))))+e)+e)/pi))1.03)2.24)+b)/a))/pi)+1.53))))))", (b-cosf(sinf(tanf(cosf(((sinf(((((sinf(((((3.70f+(2.48f+sinf((b/(sinf(((3.46fsinf(sinf(((cosf((cosf(sinf(((tanf(((((0.74f((sinf((0.10f-((cosf(cosf((tanf(a)/a)))+2.68f)-b)))b)b))+pi)-2.83f)-b))+a)+pi)))0.89f))/pi)+b))))+3.72f))a)))))+e)+e)/pi))1.03f)2.24f)+b)/a))/pi)+1.53f)))))));

	/* constant expressions */
	EVAL_TEST("(2.2-cos(sin(tan(cos(((sin(((((sin(((((3.70+(2.48+sin((2.2/(sin(((3.46sin(sin(((cos((cos(sin(((tan(((((0.74((sin((0.10-((cos(cos((tan(1.1)/1.1)))+2.68)-2.2)))2.2)2.2))+pi)-2.83)-2.2))+1.1)+pi)))0.89))/pi)+2.2))))+3.72))1.1)))))+e)+e)/pi))1.03)2.24)+2.2)/1.1))/pi)+1.53))))))", (2.2f-cosf(sinf(tanf(cosf(((sinf(((((sinf(((((3.70f+(2.48f+sinf((2.2f/(sinf(((3.46fsinf(sinf(((cosf((cosf(sinf(((tanf(((((0.74f((sinf((0.10f-((cosf(cosf((tanf(1.1f)/1.1f)))+2.68f)-2.2f)))2.2f)2.2f))+pi)-2.83f)-2.2f))+1.1f)+pi)))0.89f))/pi)+2.2f))))+3.72f))1.1f)))))+e)+e)/pi))1.03f)2.24f)+2.2f)/1.1f))/pi)+1.53f)))))));

	EVAL_TEST("(1.1+0.9)/2-1", (1.1f+0.9f)/2.0f-1.0f);

	EVAL_TEST("a+b+12-4", a+b+12-4);

	EVAL_TEST("a+b2/sin(c)", a+b2/sin(c));

	printf("All tests passed\n");
	}

	#ifdef _WIN32
	#define WIN32_LEAN_AND_MEAN
	#include <Windows.h>
	#else
	#include <sys/mman.h>
	#endif

	int main()
	{
	#ifdef _WIN32
	uint8_t code = (uint8_t)VirtualAlloc(NULL, 4096, MEM_RESERVE\|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
	#else
	uint8_t code = (uint8_t)mmap(NULL, 4096, PROT_READ\|PROT_WRITE\|PROT_EXEC, MAP_PRIVATE\|MAP_ANONYMOUS, -1, 0);
	#endif
	run_tests(code, 4096);
	return 0;
	}