Skip to content

Instantly share code, notes, and snippets.

@Chubek
Created July 19, 2023 12:15
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Chubek/6919519bd50e7b9047e68732b3cf0fd6 to your computer and use it in GitHub Desktop.
Save Chubek/6919519bd50e7b9047e68732b3cf0fd6 to your computer and use it in GitHub Desktop.
Pike's Go LL(1) in C
/*
More than a decade ago, Rob Pike demonstrated a method of LL(1) lexing (not LP, just the L) in Sydny. He used Go, as this
was mostly a demonstration of Go's abilities. However, the basis is applicable to C, as you can see in this code.
You can view the video at: https://www.youtube.com/watch?v=HxaD_trXwRE
I have added two sentinels instead of one (NULL, `nil` in Go) and this is currently extremely barebonse. I plan to use it
to parse PDF's DSL. You can use this method to easily hand-write your own DSLs. That is, if the language grammar is
meant to be parsed using Top-Down parsing, and is not left-recursive.
To run, just call `gcc -ggdb3 rob_pike_go_ll1_in.c && gdb a.out`
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include <limits.h>
#include <wchar.h>
#include <sys/types.h>
#ifdef __GNUC__
#define funcattr_ALWAYS_INLINE static inline __attribute__((always_inline))
#elif _MSV_VER
#define funcattr_ALWAYS_INLINE static inline __forceinline
#endif
#define funcattr_REENTRANT
#define funcattr_ADDRESSABLE
#define sentinel_LEX_ERROR ((void*)-1)
#define sentinel_LEX_TERM ((void*)-2)
#define lexstate_OK(STATE) (STATE != sentinel_LEX_ERROR && STATE != sentinel_LEX_TERM)
typedef wchar_t unicode_t, *unicodestr_t;
typedef uint64_t unicodepos_t;
typedef uint8_t unicodewidth_t;
typedef size_t unicodelen_t;
typedef struct {
unicodestr_t text;
unicodelen_t text_size;
unicodelen_t current_size;
unicodepos_t current_index;
unicodewidth_t last_width;
} lexCONTEXT, *lexCONTEXTp;
typedef void *lexSTATEp;
typedef lexSTATEp ((*lexSTATEfn)(lexCONTEXTp));
typedef enum {
LexSuccess,
LexFailure,
} lexRESULT;
funcattr_ALWAYS_INLINE
lexSTATEp lex_state_init(lexCONTEXTp lexctx);
funcattr_ALWAYS_INLINE
lexSTATEp lex_state_validate(lexCONTEXTp lexctx);
funcattr_ALWAYS_INLINE
lexSTATEp lex_state_terminate(lexCONTEXTp lexctx);
lexSTATEp lex_state_init(lexCONTEXTp lexctx) {
lexctx->last_width += 1;
return lex_state_validate;
}
lexSTATEp lex_state_validate(lexCONTEXTp lexctx) {
lexctx->last_width += 1;
return lex_state_terminate;
}
lexSTATEp lex_state_terminate(lexCONTEXTp lexctx) {
lexctx->last_width += 1;
return (lexSTATEp)sentinel_LEX_TERM;
}
funcattr_REENTRANT
lexRESULT run_lex_dfa(lexCONTEXTp lexctx, lexSTATEp initfn) {
lexSTATEp nextstate;
for (nextstate = (lexSTATEfn)initfn; lexstate_OK(nextstate); nextstate = ((lexSTATEfn)nextstate)(lexctx));
return (nextstate == sentinel_LEX_ERROR) ? LexFailure : LexSuccess;
}
int main() {
lexCONTEXT lexctx = {0};
run_lex_dfa(&lexctx, lex_state_init);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment