Created
July 19, 2023 12:15
-
-
Save Chubek/6919519bd50e7b9047e68732b3cf0fd6 to your computer and use it in GitHub Desktop.
Pike's Go LL(1) in C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
More than a decade ago, Rob Pike demonstrated a method of LL(1) lexing (not LP, just the L) in Sydny. He used Go, as this | |
was mostly a demonstration of Go's abilities. However, the basis is applicable to C, as you can see in this code. | |
You can view the video at: https://www.youtube.com/watch?v=HxaD_trXwRE | |
I have added two sentinels instead of one (NULL, `nil` in Go) and this is currently extremely barebonse. I plan to use it | |
to parse PDF's DSL. You can use this method to easily hand-write your own DSLs. That is, if the language grammar is | |
meant to be parsed using Top-Down parsing, and is not left-recursive. | |
To run, just call `gcc -ggdb3 rob_pike_go_ll1_in.c && gdb a.out` | |
*/ | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <stddef.h> | |
#include <limits.h> | |
#include <wchar.h> | |
#include <sys/types.h> | |
#ifdef __GNUC__ | |
#define funcattr_ALWAYS_INLINE static inline __attribute__((always_inline)) | |
#elif _MSV_VER | |
#define funcattr_ALWAYS_INLINE static inline __forceinline | |
#endif | |
#define funcattr_REENTRANT | |
#define funcattr_ADDRESSABLE | |
#define sentinel_LEX_ERROR ((void*)-1) | |
#define sentinel_LEX_TERM ((void*)-2) | |
#define lexstate_OK(STATE) (STATE != sentinel_LEX_ERROR && STATE != sentinel_LEX_TERM) | |
typedef wchar_t unicode_t, *unicodestr_t; | |
typedef uint64_t unicodepos_t; | |
typedef uint8_t unicodewidth_t; | |
typedef size_t unicodelen_t; | |
typedef struct { | |
unicodestr_t text; | |
unicodelen_t text_size; | |
unicodelen_t current_size; | |
unicodepos_t current_index; | |
unicodewidth_t last_width; | |
} lexCONTEXT, *lexCONTEXTp; | |
typedef void *lexSTATEp; | |
typedef lexSTATEp ((*lexSTATEfn)(lexCONTEXTp)); | |
typedef enum { | |
LexSuccess, | |
LexFailure, | |
} lexRESULT; | |
funcattr_ALWAYS_INLINE | |
lexSTATEp lex_state_init(lexCONTEXTp lexctx); | |
funcattr_ALWAYS_INLINE | |
lexSTATEp lex_state_validate(lexCONTEXTp lexctx); | |
funcattr_ALWAYS_INLINE | |
lexSTATEp lex_state_terminate(lexCONTEXTp lexctx); | |
lexSTATEp lex_state_init(lexCONTEXTp lexctx) { | |
lexctx->last_width += 1; | |
return lex_state_validate; | |
} | |
lexSTATEp lex_state_validate(lexCONTEXTp lexctx) { | |
lexctx->last_width += 1; | |
return lex_state_terminate; | |
} | |
lexSTATEp lex_state_terminate(lexCONTEXTp lexctx) { | |
lexctx->last_width += 1; | |
return (lexSTATEp)sentinel_LEX_TERM; | |
} | |
funcattr_REENTRANT | |
lexRESULT run_lex_dfa(lexCONTEXTp lexctx, lexSTATEp initfn) { | |
lexSTATEp nextstate; | |
for (nextstate = (lexSTATEfn)initfn; lexstate_OK(nextstate); nextstate = ((lexSTATEfn)nextstate)(lexctx)); | |
return (nextstate == sentinel_LEX_ERROR) ? LexFailure : LexSuccess; | |
} | |
int main() { | |
lexCONTEXT lexctx = {0}; | |
run_lex_dfa(&lexctx, lex_state_init); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment