Ruhrpottpatriot/lexer.h

## lexer.h
#pragma once

#include <stdint.h>

// Normally located in Parser.c
#define ulex_token(name) Tok_ ## name

#define ulex_token_type uint32_t
#define ulex_trans(name) (0x80000000 | (uint32_t)ulex_token(name) << 16)
#define ulex_eof(name) (uint16_t)ulex_token(name)


struct ulex_lexer {
    const uint32_t *equiv_table;
    const uint32_t *trans_table;
    const uint16_t *eof_table;

    uint32_t state;

    // The fields below make up a string
    // Note: The intptr_t are only ever used inside the lexer
    // They probably should be uintptr_t instead
    const uint8_t *source;
    intptr_t source_index;
    intptr_t source_size;
};

enum {
    ulex_error_none = 0,                // No error
    ulex_error_insufficient_buffer = 1, // Buffer too small
};
typedef uint32_t ulex_error;

enum {
    ulex_flags_chunk = 0x1, // input is partial, no eof
};
typedef uint32_t ulex_flags;

static intptr_t lex(
        struct ulex_lexer *lexer,
        uint32_t *__restrict trans_buffer,
        uint32_t *__restrict offset_buffer,
        intptr_t buffer_size
) {
    const uint32_t *__restrict equiv_table = lexer->equiv_table;
    const uint32_t *__restrict trans_table = lexer->trans_table;

    uint32_t state = lexer->state;

    const uint8_t *source = lexer->source;
    intptr_t source_offset = lexer->source_index;
    intptr_t source_size = lexer->source_size;

    intptr_t buffer_offset = 0;
    buffer_size *= 4;

    // Loop until we have either reached the maximum size, i.e. we have run out of space, for the given buffers,
    // or we have handled the complete buffer
    while ((source_offset < source_size) & (buffer_offset < buffer_size)) {
        // The below code is equivalent to
        // uint32_t equiv = equiv_table[source[source_offset]];
        // uint32_t trans = *(const uint32_t*)((const char*)trans_table + equiv + state);
        //
        // *(uint32_t*)((char*)trans_buffer + buffer_offset) = trans;
        // *(uint32_t*)((char*)offset_buffer + buffer_offset) = (uint32_t)source_offset;
        //
        // state = trans & 0xffff;
        // buffer_offset += trans >> 29;
        // source_offset += 1;

        // source: u8 const *
        uint8_t equiv_index = *(source + source_offset);

        // equiv_table: u32 const *
        uint32_t equiv = *(equiv_table + equiv_index);

        // Cast the trans_table pointer from a u32 to a char so that the addition
        // will move the pointer with `equiv` and `state` by BYTEs and not a DWORDs.
        // After that cast it back to u32, so we have the correct type
        // trans_table: u32 const*
        const char* trans_table_char_ptr = (const char*)trans_table + equiv + state;
        uint32_t trans = *(const uint32_t*)trans_table_char_ptr;

        // Same reason as directly above for the casting to char* and back to u32*
        *(uint32_t *) ((char *) trans_buffer + buffer_offset) = trans;
        *(uint32_t *) ((char *) offset_buffer + buffer_offset) = (uint32_t) source_offset;

        // Take the lower 16 bit
        state = trans & 0xffff;

        // Shift 29 bits to the right
        buffer_offset += trans >> 29;

        // Increment source offset, i.e. move one token ahead
        source_offset += 1;
    }

    // Set the new state of the state machine and update the current index into the source file
    lexer->state = state;
    lexer->source_index = source_offset;

    // Return the number of tokens to the caller
    return buffer_offset / 4;
}

ulex_error ulex_lex(
        struct ulex_lexer *lexer,   // The lexer responsible for the source lexing
        ulex_flags flags,           // The flags describing the source code
        uint32_t *token_buffer,     // Holds a list of all tokens from the source file
        uint32_t *offset_buffer,
        intptr_t buffer_size,       // The size of the two  buffers above
        intptr_t *out_token_count   // Returns how many tokens are in the source file
) {
    // Steps taken:
    // * Desugar `arr[i]` to `*(arr + i)`
    // * Desugar `i++` to `i += 1`;
    // * Added more variables to remove nested array access

    // first transition gets special handling
    if (lexer->state == (uint32_t) -1) {
        // The below code is equivalent to:
        // uint32_t equiv = lexer->equiv_table[lexer->source[lexer->source_index++]];
        // lexer->state = *(const uint32_t*)((const char*)lexer->trans_table + equiv) & 0xffff;


        // Get the current index in the source string, return it, then increment the source index
        uint32_t source_idx = lexer->source_index;
        lexer->source_index += 1;

        // Get the byte/uint8 at the current source index and implicitly cast it to u32 so we can use it to index
        // the equiv_table
        // lexer->source: u8* => lexer->source[i]: u8
        uint32_t current_char = *(lexer->source + source_idx);

        // Access the equiv table with the current char cast to uint32
        uint32_t equiv = *(lexer->equiv_table + current_char);

        // Cast the trans_table pointer from a u32 to a char so that the addition
        // will move the pointer by a BYTE and not a DWORD.
        // After that cast it back to u32, so we have the correct type
        const char *trans_table_char_ptr = (const char *) lexer->trans_table + equiv;
        const uint32_t *trans_table_u32_ptr = (const uint32_t *) trans_table_char_ptr;

        // Dereference the table ptr and use a 16 bit mask to get the lower 16 bits
        // This allows us to use the lower bits as index into the state machine
        lexer->state = *trans_table_u32_ptr & 0xffff;
    }

    // Call the actual lexing function, which returns the number of tokens that are present
    // in the buffer
    intptr_t token_count = lex(lexer, token_buffer, offset_buffer, buffer_size);

    // transform transitions to tokens
    for (intptr_t i = 0; i < token_count; ++i) {
        // The below code is equivalent to:
        // token_buffer[i] = (token_buffer[i] >> 16) & 0xfff;

        // Take the token at index i and shift it 16 bits to the right, then take the lower 12 bits of it
        // This is equivalent to getting bytes 16-27
        // token_buffer:  uint32_t *

        uint32_t element = *(token_buffer + i);
        uint32_t element_shifted = element >> 16;
        uint32_t element_masked = element_shifted & 0xfff;

        *(token_buffer + i) = element_masked;
    }

    // Test if everything fit into the buffers
    int source_end = lexer->source_index == lexer->source_size;

    // handle end of file by adding an end of file token
    // to the token buffer and incrementing the token counter by one
    // It's also rested whether the given source file is a chunk,
    // i.e. not a complete source code file and therefore has no EOF marker
    if ((flags & ulex_flags_chunk) == 0 && source_end) {
        // The below code is equivalent to:
        // token_buffer[token_count] = lexer->eof_table[lexer->state / 4];
        // offset_buffer[token_count] = (uint32_t) lexer->source_size;
        // ++token_count;

        uint32_t index = lexer->state / 4;
        *(token_buffer + token_count) = *(lexer->eof_table + index);
        *(offset_buffer + token_count) = (uint32_t) lexer->source_size;

        token_count += 1;
    }

    // Assign the outbound count with the number of tokens in the source file
    *out_token_count = token_count;

    // If we could store all tokens inside the buffers we are good,
    // else we want to return an error indicating that the provided buffer wasn't big enough
    return source_end ? ulex_error_none : ulex_error_insufficient_buffer;
}

// Predefined tables
static const uint32_t ulex_equiv_table[0x100] = {
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfc, 0x1f8, 0xfc, 0xfc, 0x2f4, 0x0, 0x0,
        /* Remaining members omitted */
};

static const uint32_t ulex_trans_table[0x7e0] = {
        0x0, 0x34, 0x4, 0x2c, 0x10, ulex_trans(Error) | 0x0, ulex_trans(Error) | 0x0, ulex_trans(Error) | 0x0, ulex_trans(Error) | 0x0, ulex_trans(Error) | 0x0, 0x2c, 0x2c, 0x10, 0x34, 0x34, ulex_trans(IntDec) | 0x0, ulex_trans(Newline) | 0x0, ulex_trans(Colon) | 0x0, 0xa4, ulex_trans(Lt) | 0x0, ulex_trans(Assign) | 0x0, ulex_trans(Gt) | 0x0, ulex_trans(Word) | 0x0, ulex_trans(LBrack) | 0x0, ulex_trans(Div) | 0x0, ulex_trans(RBrack) | 0x0, ulex_trans(Dot) | 0x0, ulex_trans(Sub) | 0x0, ulex_trans(Ne) | 0x0, ulex_trans(Comma) | 0x0, ulex_trans(String) | 0x0, ulex_trans(Add) | 0x0, ulex_trans(ModAssign) | 0x0, ulex_trans(Con) | 0x0, ulex_trans(MulAssign) | 0x0, ulex_trans(AddAssign) | 0x0, ulex_trans(SubAssign) | 0x0, ulex_trans(DivAssign) | 0x0, ulex_trans(Mul) | 0x0, ulex_trans(Eq) | 0x0, ulex_trans(IntDec) | 0x0, 0xec, ulex_trans(RParen) | 0x0, ulex_trans(Le) | 0x0, ulex_trans(Ge) | 0x0, ulex_trans(Whitespace) | 0x0, ulex_trans(LParen) | 0x0, ulex_trans(Mod) | 0x0, ulex_trans(DocComment) | 0x0, ulex_trans(Dis) | 0x0, 0x2c, ulex_trans(Float) | 0x0, ulex_trans(IntHex) | 0x0, ulex_trans(BlockComment) | 0x0, ulex_trans(Not) | 0x0, ulex_trans(IntDec) | 0x0, ulex_trans(IntDec) | 0x0, ulex_trans(Word) | 0x0, ulex_trans(IntDec) | 0x0, 0xec, ulex_trans(Float) | 0x0, ulex_trans(Float) | 0x0, ulex_trans(Whitespace) | 0x0,
        /* Remaining members omitted */
};

static const uint16_t ulex_eof_table[0x3f] = {
        ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(IntDec), ulex_eof(Newline), ulex_eof(Colon), ulex_eof(LineComment), ulex_eof(Lt), ulex_eof(Assign), ulex_eof(Gt), ulex_eof(Word), ulex_eof(LBrack), ulex_eof(Div), ulex_eof(RBrack), ulex_eof(Dot), ulex_eof(Sub), ulex_eof(Ne), ulex_eof(Comma), ulex_eof(String), ulex_eof(Add), ulex_eof(ModAssign), ulex_eof(Con), ulex_eof(MulAssign), ulex_eof(AddAssign), ulex_eof(SubAssign), ulex_eof(DivAssign), ulex_eof(Mul), ulex_eof(Eq), ulex_eof(IntDec), ulex_eof(LineComment), ulex_eof(RParen), ulex_eof(Le), ulex_eof(Ge), ulex_eof(Whitespace), ulex_eof(LParen), ulex_eof(Mod), ulex_eof(DocComment), ulex_eof(Dis), ulex_eof(String), ulex_eof(Float), ulex_eof(IntHex), ulex_eof(BlockComment), ulex_eof(Not), ulex_eof(IntDec), ulex_eof(IntDec), ulex_eof(Word), ulex_eof(IntDec), ulex_eof(LineComment), ulex_eof(Float), ulex_eof(Float), ulex_eof(Whitespace),
};

## lexer_v1.rs
use std::ptr::null_mut;

struct Lexer {
    equiv_table: *const u32,
    trans_table: *const u32,
    eof_table: *const u32,

    state: Option<u32>,

    source: *mut u8,

    source_index: isize,
    source_size: isize,
}

impl Lexer {
    fn reset(&mut self) {
        self.state = None;
    }

    unsafe fn new() -> Self {
        Lexer {
            trans_table: ULEX_TRANS_TABLE.as_ptr(),
            equiv_table: ULEX_EQUIV_TABLE.as_ptr(),
            eof_table: ULEX_EOF_TABLE.as_ptr(),

            state: None,
            source: null_mut(),
            source_index: 0,
            source_size: 0,
        }
    }


    unsafe fn ulex_lex(
        &mut self,
        flags: Flags,
        token_buffer: *mut u32,
        offset_buffer: *mut u32,
        buffer_size: isize,
    ) -> Result<isize, Error> {

        // First transition gets special handling
        if self.state.is_none() {
            // Get the index first, then increment it
            // This is equivalent to `x := arr[i++]`
            let idx = self.source_index;
            self.source_index += 1;

            // Get the current char from the source string
            let current_char = *(self.source.offset(idx) as *const isize);
            let equivalence = *(self.equiv_table.offset(current_char) as *const usize);


            let trans = *((self.trans_table as *const char).add(equivalence) as *const u32);
            let trans = trans & 0xffff;

            self.state = Some(trans);
        }

        let mut token_count = self.lex(token_buffer, offset_buffer, buffer_size);

        for i in 0..token_count {
            *token_buffer.offset(i) = (*token_buffer.offset(i) >> (16 as u32)) & 0xfff;
        }

        // handle end of file
        let source_end = self.source_index == self.source_size;
        if flags != Flags::Chunk && source_end {
            let state = (self.state.unwrap() / 4) as usize;
            *token_buffer.offset(token_count) = *self.eof_table.add(state);

            *offset_buffer.offset(token_count) = self.source_size as u32;

            token_count += 1;
        }

        Ok(token_count)
    }

    unsafe fn lex(
        &mut self,
        trans_buffer: *mut u32,
        offset_buffer: *mut u32,
        buffer_size: isize,
    ) -> isize {
        let equiv_table = self.equiv_table;
        let trans_table = self.trans_table;

        // Can only be called by ulex_lex, which will always set the state
        let mut state = self.state.unwrap();

        let source = self.source;
        let mut source_offset = self.source_index;
        let source_size = self.source_size;

        let mut buffer_offset = 0;
        let buffer_size = buffer_size / 4;

        while (source_offset < source_size) & (buffer_offset < buffer_size) {
            let idx = (*source.offset(source_offset)) as isize;
            let equiv = *equiv_table.offset(idx);


            let idx = (equiv + state) as usize;
            let trans = *((trans_table as *const char).add(idx) as *const u32);

            // Cast the buffers to a char ptr so we can walk the offset byte by byte and not by dword by dword

            // unsafe
            *((trans_buffer as *mut char).offset(buffer_offset) as *mut u32) = trans;
            *((offset_buffer as *mut char).offset(buffer_offset) as *mut u32) = source_offset as u32;

            state = trans & 0xffff;
            buffer_offset += (trans >> 29) as isize;
            source_offset += 1;
        }

        self.state = Some(state);
        self.source_index = source_offset;

        return buffer_offset / 4;
    }
}

#[derive(PartialEq, Eq)]
enum Error {
    InsufficientBuffer
}

#[derive(PartialEq, Eq)]
enum Flags {
    Chunk = 0x1
}


// Predefined lexer tables

// Equiv table, each two rows are sixteen elements
static ULEX_EQUIV_TABLE: &'static [u32; 0x100] = &[
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0xfc, 0x1f8, 0xfc, 0xfc, 0x2f4, 0x0, 0x0,
    /* Rest of members omitted for brevity  */
];

// static ULEX_TRANS_TABLE: &'static [u32; 0x7e0] = todo!(); // Replace with actual table
static ULEX_TRANS_TABLE: &'static [u32; 0x1] = &[0x0 as u32];

// static ULEX_EOF_TABLE: &'static [u32; 0x3f] = todo!();    // Replace with actual table
static ULEX_EOF_TABLE: &'static [u32; 0x1] = &[0x0 as u32];

## lexer_v2.rs
// Predefined lexer tables

// Equiv table, each two rows are sixteen elements
static ULEX_EQUIV_TABLE: &'static [u32; 0x100] = &[
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0xfc, 0x1f8, 0xfc, 0xfc, 0x2f4, 0x0, 0x0,
    /* Rest of members omitted for brevity  */
];

// static ULEX_TRANS_TABLE: &'static [u32; 0x7e0] = todo!(); // Replace with actual table
static ULEX_TRANS_TABLE: &'static [u32; 0x1] = &[0x0 as u32];

// static ULEX_EOF_TABLE: &'static [u32; 0x3f] = todo!();    // Replace with actual table
static ULEX_EOF_TABLE: &'static [u32; 0x1] = &[0x0 as u32];struct Lexer<'a> {
    equiv_table: &'a [u32],
    trans_table: &'a [u32],
    eof_table: &'a [u32],

    state: Option<u32>,

    source: Vec<u8>,

    source_index: usize,
    source_size: usize,
}

impl Lexer<'_> {
    fn reset(&mut self) {
        self.state = None;
    }

    unsafe fn new() -> Self {
        Lexer {
            trans_table: ULEX_TRANS_TABLE,
            equiv_table: ULEX_EQUIV_TABLE,
            eof_table: ULEX_EOF_TABLE,

            state: None,
            source: Vec::new(),
            source_index: 0,
            source_size: 0,
        }
    }

    fn ulex_lex(
        &mut self,
        flags: Flags,
        token_buffer: &mut [u32],
        offset_buffer: &mut [u32],
        buffer_size: isize,
    ) -> Result<usize, Error> {

        // First transition gets special handling
        if self.state.is_none() {
            // Get the index first, then increment it
            // This is equivalent to `x := arr[i++]`
            let idx = self.source_index;
            self.source_index += 1;


            // Get the current char from the source string
            let current_char = self.source[idx] as usize;
            let equivalence = self.equiv_table[current_char] as usize;

            let trans = get_middle_index(self.trans_table, equivalence);
            let trans = trans & 0xffff;
            self.state = Some(trans);
        }

        let mut token_count = self.lex(token_buffer, offset_buffer, buffer_size);

        for i in 0..token_count {
            token_buffer[i] = token_buffer[i] >> 16 & 0xfff;
        }

        // handle end of file
        let source_end = self.source_index == self.source_size;
        if flags != Flags::Chunk && source_end {
            let state = (self.state.unwrap() / 4) as usize;

            token_buffer[token_count] = self.eof_table[state];
            offset_buffer[token_count] = self.source_size as u32;

            token_count += 1;
        }

        Ok(token_count)
    }

    fn lex(
        &mut self,
        trans_buffer: &mut [u32],
        offset_buffer: &mut [u32],
        buffer_size: isize,
    ) -> usize {
        let equiv_table = self.equiv_table;
        let trans_table = self.trans_table;

        // Can only be called by ulex_lex, which will always set the state
        let mut state = self.state.unwrap();

        let source = &self.source;
        let mut source_offset = self.source_index as usize;
        let source_size = self.source_size as usize;

        let mut buffer_offset = 0 as usize;
        let buffer_size = buffer_size as usize / 4;

        while (source_offset < source_size) & (buffer_offset < buffer_size) {
            let idx = source[source_offset] as usize;
            let equiv = equiv_table[idx];


            let idx = (equiv + state) as usize;

            let trans = get_middle_index(trans_table, idx);

            assign_in_the_middle(trans_buffer, buffer_offset, trans);
            assign_in_the_middle(offset_buffer, buffer_offset, source_offset as u32);

            state = trans & 0xffff;
            buffer_offset += (trans >> 29) as usize;
            source_offset += 1;
        }

        self.state = Some(state);
        self.source_index = source_offset;

        return buffer_offset / 4;
    }
}

fn assign_in_the_middle(buffer: &mut [u32], offset: usize, data: u32) {
    let ptr = buffer.as_mut_ptr();

    unsafe {
        // Cast the buffers to a char ptr so we can walk the offset by BYTE and not by DWORD
        *((ptr as *mut char).add(offset) as *mut u32) = data;
    }
}

fn get_middle_index(table: &[u32], offset: usize) -> u32 {
    let table_ptr = table.as_ptr();

    unsafe {
        // Cast the buffers to a char ptr so we can walk the offset by BYTE and not by DWORD
        *((table_ptr as *const char).add(offset) as *const u32)
    }
}

#[derive(PartialEq, Eq)]
enum Error {
    InsufficientBuffer
}

#[derive(PartialEq, Eq)]
enum Flags {
    Chunk = 0x1
}

// Predefined lexer tables

// Equiv table, each two rows are sixteen elements
static ULEX_EQUIV_TABLE: &'static [u32; 0x100] = &[
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0xfc, 0x1f8, 0xfc, 0xfc, 0x2f4, 0x0, 0x0,
    /* Rest of members omitted for brevity  */
];

// static ULEX_TRANS_TABLE: &'static [u32; 0x7e0] = todo!(); // Replace with actual table
static ULEX_TRANS_TABLE: &'static [u32; 0x1] = &[0x0 as u32];

// static ULEX_EOF_TABLE: &'static [u32; 0x3f] = todo!();    // Replace with actual table
static ULEX_EOF_TABLE: &'static [u32; 0x1] = &[0x0 as u32];
	#pragma once

	#include <stdint.h>

	// Normally located in Parser.c
	#define ulex_token(name) Tok_ ## name

	#define ulex_token_type uint32_t
	#define ulex_trans(name) (0x80000000 \| (uint32_t)ulex_token(name) << 16)
	#define ulex_eof(name) (uint16_t)ulex_token(name)


	struct ulex_lexer {
	const uint32_t *equiv_table;
	const uint32_t *trans_table;
	const uint16_t *eof_table;

	uint32_t state;

	// The fields below make up a string
	// Note: The intptr_t are only ever used inside the lexer
	// They probably should be uintptr_t instead
	const uint8_t *source;
	intptr_t source_index;
	intptr_t source_size;
	};

	enum {
	ulex_error_none = 0, // No error
	ulex_error_insufficient_buffer = 1, // Buffer too small
	};
	typedef uint32_t ulex_error;

	enum {
	ulex_flags_chunk = 0x1, // input is partial, no eof
	};
	typedef uint32_t ulex_flags;

	static intptr_t lex(
	struct ulex_lexer *lexer,
	uint32_t *__restrict trans_buffer,
	uint32_t *__restrict offset_buffer,
	intptr_t buffer_size
	) {
	const uint32_t *__restrict equiv_table = lexer->equiv_table;
	const uint32_t *__restrict trans_table = lexer->trans_table;

	uint32_t state = lexer->state;

	const uint8_t *source = lexer->source;
	intptr_t source_offset = lexer->source_index;
	intptr_t source_size = lexer->source_size;

	intptr_t buffer_offset = 0;
	buffer_size *= 4;

	// Loop until we have either reached the maximum size, i.e. we have run out of space, for the given buffers,
	// or we have handled the complete buffer
	while ((source_offset < source_size) & (buffer_offset < buffer_size)) {
	// The below code is equivalent to
	// uint32_t equiv = equiv_table[source[source_offset]];
	// uint32_t trans = (const uint32_t)((const char*)trans_table + equiv + state);
	//
	// (uint32_t)((char*)trans_buffer + buffer_offset) = trans;
	// (uint32_t)((char*)offset_buffer + buffer_offset) = (uint32_t)source_offset;
	//
	// state = trans & 0xffff;
	// buffer_offset += trans >> 29;
	// source_offset += 1;

	// source: u8 const *
	uint8_t equiv_index = *(source + source_offset);

	// equiv_table: u32 const *
	uint32_t equiv = *(equiv_table + equiv_index);

	// Cast the trans_table pointer from a u32 to a char so that the addition
	// will move the pointer with `equiv` and `state` by BYTEs and not a DWORDs.
	// After that cast it back to u32, so we have the correct type
	// trans_table: u32 const*
	const char* trans_table_char_ptr = (const char*)trans_table + equiv + state;
	uint32_t trans = (const uint32_t)trans_table_char_ptr;

	// Same reason as directly above for the casting to char* and back to u32*
	(uint32_t ) ((char *) trans_buffer + buffer_offset) = trans;
	(uint32_t ) ((char *) offset_buffer + buffer_offset) = (uint32_t) source_offset;

	// Take the lower 16 bit
	state = trans & 0xffff;

	// Shift 29 bits to the right
	buffer_offset += trans >> 29;

	// Increment source offset, i.e. move one token ahead
	source_offset += 1;
	}

	// Set the new state of the state machine and update the current index into the source file
	lexer->state = state;
	lexer->source_index = source_offset;

	// Return the number of tokens to the caller
	return buffer_offset / 4;
	}

	ulex_error ulex_lex(
	struct ulex_lexer *lexer, // The lexer responsible for the source lexing
	ulex_flags flags, // The flags describing the source code
	uint32_t *token_buffer, // Holds a list of all tokens from the source file
	uint32_t *offset_buffer,
	intptr_t buffer_size, // The size of the two buffers above
	intptr_t *out_token_count // Returns how many tokens are in the source file
	) {
	// Steps taken:
	// * Desugar `arr[i]` to `*(arr + i)`
	// * Desugar `i++` to `i += 1`;
	// * Added more variables to remove nested array access

	// first transition gets special handling
	if (lexer->state == (uint32_t) -1) {
	// The below code is equivalent to:
	// uint32_t equiv = lexer->equiv_table[lexer->source[lexer->source_index++]];
	// lexer->state = (const uint32_t)((const char*)lexer->trans_table + equiv) & 0xffff;


	// Get the current index in the source string, return it, then increment the source index
	uint32_t source_idx = lexer->source_index;
	lexer->source_index += 1;

	// Get the byte/uint8 at the current source index and implicitly cast it to u32 so we can use it to index
	// the equiv_table
	// lexer->source: u8* => lexer->source[i]: u8
	uint32_t current_char = *(lexer->source + source_idx);

	// Access the equiv table with the current char cast to uint32
	uint32_t equiv = *(lexer->equiv_table + current_char);

	// Cast the trans_table pointer from a u32 to a char so that the addition
	// will move the pointer by a BYTE and not a DWORD.
	// After that cast it back to u32, so we have the correct type
	const char trans_table_char_ptr = (const char ) lexer->trans_table + equiv;
	const uint32_t trans_table_u32_ptr = (const uint32_t ) trans_table_char_ptr;

	// Dereference the table ptr and use a 16 bit mask to get the lower 16 bits
	// This allows us to use the lower bits as index into the state machine
	lexer->state = *trans_table_u32_ptr & 0xffff;
	}

	// Call the actual lexing function, which returns the number of tokens that are present
	// in the buffer
	intptr_t token_count = lex(lexer, token_buffer, offset_buffer, buffer_size);

	// transform transitions to tokens
	for (intptr_t i = 0; i < token_count; ++i) {
	// The below code is equivalent to:
	// token_buffer[i] = (token_buffer[i] >> 16) & 0xfff;

	// Take the token at index i and shift it 16 bits to the right, then take the lower 12 bits of it
	// This is equivalent to getting bytes 16-27
	// token_buffer: uint32_t *

	uint32_t element = *(token_buffer + i);
	uint32_t element_shifted = element >> 16;
	uint32_t element_masked = element_shifted & 0xfff;

	*(token_buffer + i) = element_masked;
	}

	// Test if everything fit into the buffers
	int source_end = lexer->source_index == lexer->source_size;

	// handle end of file by adding an end of file token
	// to the token buffer and incrementing the token counter by one
	// It's also rested whether the given source file is a chunk,
	// i.e. not a complete source code file and therefore has no EOF marker
	if ((flags & ulex_flags_chunk) == 0 && source_end) {
	// The below code is equivalent to:
	// token_buffer[token_count] = lexer->eof_table[lexer->state / 4];
	// offset_buffer[token_count] = (uint32_t) lexer->source_size;
	// ++token_count;

	uint32_t index = lexer->state / 4;
	(token_buffer + token_count) = (lexer->eof_table + index);
	*(offset_buffer + token_count) = (uint32_t) lexer->source_size;

	token_count += 1;
	}

	// Assign the outbound count with the number of tokens in the source file
	*out_token_count = token_count;

	// If we could store all tokens inside the buffers we are good,
	// else we want to return an error indicating that the provided buffer wasn't big enough
	return source_end ? ulex_error_none : ulex_error_insufficient_buffer;
	}

	// Predefined tables
	static const uint32_t ulex_equiv_table[0x100] = {
	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfc, 0x1f8, 0xfc, 0xfc, 0x2f4, 0x0, 0x0,
	/* Remaining members omitted */
	};

	static const uint32_t ulex_trans_table[0x7e0] = {
	0x0, 0x34, 0x4, 0x2c, 0x10, ulex_trans(Error) \| 0x0, ulex_trans(Error) \| 0x0, ulex_trans(Error) \| 0x0, ulex_trans(Error) \| 0x0, ulex_trans(Error) \| 0x0, 0x2c, 0x2c, 0x10, 0x34, 0x34, ulex_trans(IntDec) \| 0x0, ulex_trans(Newline) \| 0x0, ulex_trans(Colon) \| 0x0, 0xa4, ulex_trans(Lt) \| 0x0, ulex_trans(Assign) \| 0x0, ulex_trans(Gt) \| 0x0, ulex_trans(Word) \| 0x0, ulex_trans(LBrack) \| 0x0, ulex_trans(Div) \| 0x0, ulex_trans(RBrack) \| 0x0, ulex_trans(Dot) \| 0x0, ulex_trans(Sub) \| 0x0, ulex_trans(Ne) \| 0x0, ulex_trans(Comma) \| 0x0, ulex_trans(String) \| 0x0, ulex_trans(Add) \| 0x0, ulex_trans(ModAssign) \| 0x0, ulex_trans(Con) \| 0x0, ulex_trans(MulAssign) \| 0x0, ulex_trans(AddAssign) \| 0x0, ulex_trans(SubAssign) \| 0x0, ulex_trans(DivAssign) \| 0x0, ulex_trans(Mul) \| 0x0, ulex_trans(Eq) \| 0x0, ulex_trans(IntDec) \| 0x0, 0xec, ulex_trans(RParen) \| 0x0, ulex_trans(Le) \| 0x0, ulex_trans(Ge) \| 0x0, ulex_trans(Whitespace) \| 0x0, ulex_trans(LParen) \| 0x0, ulex_trans(Mod) \| 0x0, ulex_trans(DocComment) \| 0x0, ulex_trans(Dis) \| 0x0, 0x2c, ulex_trans(Float) \| 0x0, ulex_trans(IntHex) \| 0x0, ulex_trans(BlockComment) \| 0x0, ulex_trans(Not) \| 0x0, ulex_trans(IntDec) \| 0x0, ulex_trans(IntDec) \| 0x0, ulex_trans(Word) \| 0x0, ulex_trans(IntDec) \| 0x0, 0xec, ulex_trans(Float) \| 0x0, ulex_trans(Float) \| 0x0, ulex_trans(Whitespace) \| 0x0,
	/* Remaining members omitted */
	};

	static const uint16_t ulex_eof_table[0x3f] = {
	ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(Error), ulex_eof(IntDec), ulex_eof(Newline), ulex_eof(Colon), ulex_eof(LineComment), ulex_eof(Lt), ulex_eof(Assign), ulex_eof(Gt), ulex_eof(Word), ulex_eof(LBrack), ulex_eof(Div), ulex_eof(RBrack), ulex_eof(Dot), ulex_eof(Sub), ulex_eof(Ne), ulex_eof(Comma), ulex_eof(String), ulex_eof(Add), ulex_eof(ModAssign), ulex_eof(Con), ulex_eof(MulAssign), ulex_eof(AddAssign), ulex_eof(SubAssign), ulex_eof(DivAssign), ulex_eof(Mul), ulex_eof(Eq), ulex_eof(IntDec), ulex_eof(LineComment), ulex_eof(RParen), ulex_eof(Le), ulex_eof(Ge), ulex_eof(Whitespace), ulex_eof(LParen), ulex_eof(Mod), ulex_eof(DocComment), ulex_eof(Dis), ulex_eof(String), ulex_eof(Float), ulex_eof(IntHex), ulex_eof(BlockComment), ulex_eof(Not), ulex_eof(IntDec), ulex_eof(IntDec), ulex_eof(Word), ulex_eof(IntDec), ulex_eof(LineComment), ulex_eof(Float), ulex_eof(Float), ulex_eof(Whitespace),
	};
	use std::ptr::null_mut;

	struct Lexer {
	equiv_table: *const u32,
	trans_table: *const u32,
	eof_table: *const u32,

	state: Option<u32>,

	source: *mut u8,

	source_index: isize,
	source_size: isize,
	}

	impl Lexer {
	fn reset(&mut self) {
	self.state = None;
	}

	unsafe fn new() -> Self {
	Lexer {
	trans_table: ULEX_TRANS_TABLE.as_ptr(),
	equiv_table: ULEX_EQUIV_TABLE.as_ptr(),
	eof_table: ULEX_EOF_TABLE.as_ptr(),

	state: None,
	source: null_mut(),
	source_index: 0,
	source_size: 0,
	}
	}


	unsafe fn ulex_lex(
	&mut self,
	flags: Flags,
	token_buffer: *mut u32,
	offset_buffer: *mut u32,
	buffer_size: isize,
	) -> Result<isize, Error> {

	// First transition gets special handling
	if self.state.is_none() {
	// Get the index first, then increment it
	// This is equivalent to `x := arr[i++]`
	let idx = self.source_index;
	self.source_index += 1;

	// Get the current char from the source string
	let current_char = (self.source.offset(idx) as const isize);
	let equivalence = (self.equiv_table.offset(current_char) as const usize);


	let trans = ((self.trans_table as const char).add(equivalence) as *const u32);
	let trans = trans & 0xffff;

	self.state = Some(trans);
	}

	let mut token_count = self.lex(token_buffer, offset_buffer, buffer_size);

	for i in 0..token_count {
	token_buffer.offset(i) = (token_buffer.offset(i) >> (16 as u32)) & 0xfff;
	}

	// handle end of file
	let source_end = self.source_index == self.source_size;
	if flags != Flags::Chunk && source_end {
	let state = (self.state.unwrap() / 4) as usize;
	token_buffer.offset(token_count) = self.eof_table.add(state);

	*offset_buffer.offset(token_count) = self.source_size as u32;

	token_count += 1;
	}

	Ok(token_count)
	}

	unsafe fn lex(
	&mut self,
	trans_buffer: *mut u32,
	offset_buffer: *mut u32,
	buffer_size: isize,
	) -> isize {
	let equiv_table = self.equiv_table;
	let trans_table = self.trans_table;

	// Can only be called by ulex_lex, which will always set the state
	let mut state = self.state.unwrap();

	let source = self.source;
	let mut source_offset = self.source_index;
	let source_size = self.source_size;

	let mut buffer_offset = 0;
	let buffer_size = buffer_size / 4;

	while (source_offset < source_size) & (buffer_offset < buffer_size) {
	let idx = (*source.offset(source_offset)) as isize;
	let equiv = *equiv_table.offset(idx);


	let idx = (equiv + state) as usize;
	let trans = ((trans_table as const char).add(idx) as *const u32);

	// Cast the buffers to a char ptr so we can walk the offset byte by byte and not by dword by dword

	// unsafe
	((trans_buffer as mut char).offset(buffer_offset) as *mut u32) = trans;
	((offset_buffer as mut char).offset(buffer_offset) as *mut u32) = source_offset as u32;

	state = trans & 0xffff;
	buffer_offset += (trans >> 29) as isize;
	source_offset += 1;
	}

	self.state = Some(state);
	self.source_index = source_offset;

	return buffer_offset / 4;
	}
	}

	#[derive(PartialEq, Eq)]
	enum Error {
	InsufficientBuffer
	}

	#[derive(PartialEq, Eq)]
	enum Flags {
	Chunk = 0x1
	}


	// Predefined lexer tables

	// Equiv table, each two rows are sixteen elements
	static ULEX_EQUIV_TABLE: &'static [u32; 0x100] = &[
	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
	0x0, 0xfc, 0x1f8, 0xfc, 0xfc, 0x2f4, 0x0, 0x0,
	/* Rest of members omitted for brevity */
	];

	// static ULEX_TRANS_TABLE: &'static [u32; 0x7e0] = todo!(); // Replace with actual table
	static ULEX_TRANS_TABLE: &'static [u32; 0x1] = &[0x0 as u32];

	// static ULEX_EOF_TABLE: &'static [u32; 0x3f] = todo!(); // Replace with actual table
	static ULEX_EOF_TABLE: &'static [u32; 0x1] = &[0x0 as u32];