Skip to content

Instantly share code, notes, and snippets.

@rain-1 rain-1/grammar-1.rkt
Last active Feb 7, 2019

Embed
What would you like to do?
parsing with grammars
#lang racket
;(start, (ACCEPT))
;(start, (READ #\a, sym))
;(start, (READ #\b, sym))
;(start, (READ #\(, "sexps"), PUSH(#\)))
;(sym, (ACCEPT))
;(sym, (READ #\a, sym))
;(sym, (READ #\b, sym))
;(sexps, (ACCEPT))
;(sexps, (READ #\a, sym, PUSH(sexps))
;(sexps, (READ #\b, sym, PUSH(sexps))
;(sexps, (READ #\(, sexps), PUSH #\), PUSH sexps)
(define (push stack . elements)
(append elements stack))
(define (complete-stack stack)
;; (write `(draining ,stack))(newline)
(if (null? stack)
#t
(let ((head (car stack))
(stack (cdr stack)))
(cond ((char? head)
(let ((ch (read-char)))
(if (equal? ch head)
(complete-stack stack)
(error "xy problem" (cons ch head)))))
((procedure? head)
(head stack))
(else (error "stack"))))))
(define (start stack)
(let ((ch (peek-char)))
(if (eof-object? ch)
(complete-stack stack)
(case ch
((#\a) (read-char) (sym stack))
((#\b) (read-char) (sym stack))
((#\() (read-char) (sexps (push stack #\))))
(else (complete-stack stack))))))
(define (sym stack)
(let ((ch (peek-char)))
(if (eof-object? ch)
(complete-stack stack)
(case ch
((#\a) (read-char) (sym stack))
((#\b) (read-char) (sym stack))
(else (complete-stack stack))))))
(define (sexps stack)
(let ((ch (peek-char)))
(if (eof-object? ch)
(complete-stack stack)
(case ch
((#\a) (read-char) (sym (push stack sexps)))
((#\b) (read-char) (sym (push stack sexps)))
((#\() (read-char) (sexps (push stack #\) sexps)))
(else (complete-stack stack))))))
(define (go str)
(with-input-from-string str
(lambda ()
(start '()))))
(go "ab")
(go "(ababab)")
(go "((a(ba)b(ab))(ab(a)a)(b(b)aaa)a)")
#lang racket
(require racket/trace)
(define (complete-stack stack)
; (write `(draining ,stack))(newline)
(if (null? stack)
#t
(let ((head (car stack))
(stack (cdr stack)))
(cond ((char? head)
(let ((ch (read-char)))
(if (equal? ch head)
(complete-stack stack)
(error "xy problem" (cons ch head)))))
((procedure? head)
(head stack))
(else (error "stack" head))))))
(define-syntax define-parse-rule
(syntax-rules ()
((_ <name> () (<char> . <work>) ...)
(define-parse-rule^ stack <name> (complete-stack stack) (<char> . <work>) ...))
((_ <name> (<char> . <work>) ...)
(define-parse-rule^ stack <name> (error "early-eof" '<name>) (<char> . <work>) ...))))
(define-syntax define-parse-rule^
(syntax-rules ()
((_ stack <name> <eof> (<char> . <work>) ...)
(define (<name> stack)
(let ((ch (peek-char)))
(if (eof-object? ch)
<eof>
(case ch
((<char>) (read-char) (complete-stack (append (list . <work>) stack)))
...
(else <eof>))))))))
(define-parse-rule sexp
(#\a)
(#\' sexp)
(#\( sexps #\)))
(define-parse-rule sexps
()
(#\a sexps1)
(#\' sexp sexps1)
(#\( sexps #\) sexps1))
(define-parse-rule sexps1
()
(#\. sexp)
(#\a sexps1)
(#\' sexp sexps1)
(#\( sexps #\) sexps1))
;(trace sexp)
;(trace sexps)
;(trace sexps^)
(define (go str)
(with-input-from-string str
(lambda ()
(sexp '()))))
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define DEBUG
//
#define die(msg) do { fprintf(stderr, "Oops in %s, line %d: %s\n", __FUNCTION__, __LINE__, msg); exit(1); } while(0)
int fpeek(FILE *fptr) {
int c = fgetc(fptr);
ungetc(c, fptr);
return c;
}
//
typedef enum {
TOKEN_ATOM,
TOKEN_OPEN_PAREN,
TOKEN_CLOSE_PAREN,
TOKEN_DOT,
TOKEN_QUOTE,
TOKEN_EOF,
PARSE_SEXP,
PARSE_SEXPS,
PARSE_SEXPS1,
} scm_token_type;
typedef struct {
scm_token_type ty;
int len;
char *text;
} scm_token;
void display_token(scm_token t) {
switch(t.ty) {
case TOKEN_ATOM:
printf("%s", t.text);
break;
case TOKEN_OPEN_PAREN:
printf("(");
break;
case TOKEN_CLOSE_PAREN:
printf(")");
break;
case TOKEN_DOT:
printf(".");
break;
case TOKEN_QUOTE:
printf(".");
break;
case TOKEN_EOF:
printf("<eof>");
break;
case PARSE_SEXP:
printf("<sexp>");
break;
case PARSE_SEXPS:
printf("<sexps");
break;
case PARSE_SEXPS1:
printf("<sexps1>");
break;
default:
die("No such token.");
}
}
int whitespace_char(int c) {
return c == ' ' || c == '\n';
}
int atom_char(int c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-';
}
scm_token buffered_token;
int have_buffered_token = 0;
void unread_token(scm_token t) {
assert(!have_buffered_token);
have_buffered_token = 1;
buffered_token = t;
}
scm_token next_token(FILE* fptr) {
if(have_buffered_token) {
have_buffered_token = 0;
return buffered_token;
}
// skip whitespace
do {
if(whitespace_char(fpeek(fptr)))
fgetc(fptr);
else if(fpeek(fptr) == ';')
while(fgetc(fptr) != '\n') {}
else
break;
} while(1);
scm_token t;
int c = fgetc(fptr);
if(c == EOF)
t.ty = TOKEN_EOF;
else if(c == '(')
t.ty = TOKEN_OPEN_PAREN;
else if(c == ')')
t.ty = TOKEN_CLOSE_PAREN;
else if(c == '.')
t.ty = TOKEN_DOT;
else if(c == '\'')
t.ty = TOKEN_QUOTE;
else if(atom_char(c)) {
t.ty = TOKEN_ATOM;
t.len = 0;
t.text = malloc(1);
ungetc(c, fptr);
while(atom_char(fpeek(fptr))) {
t.text = realloc(t.text, t.len+1);
t.text[t.len] = fgetc(fptr);
t.len++;
}
t.text[t.len] = 0;
}
else {
die("Invalid character");
}
return t;
}
//
#define STACK_DEPTH 256
scm_token parser_work[STACK_DEPTH];
int parser_work_top = 0;
scm_token parser_res[STACK_DEPTH];
int parser_res_top = 0;
void parse(FILE *fptr) {
scm_token w, t;
while(parser_work_top > 0) {
t = next_token(fptr);
#ifdef DEBUG
printf("T:");
display_token(t);
puts("");
#endif
w = parser_work[--parser_work_top];
switch(w.ty) {
case PARSE_SEXP:
switch(t.ty) {
case TOKEN_ATOM:
break;
case TOKEN_QUOTE:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXP };
break;
case TOKEN_OPEN_PAREN:
parser_work[parser_work_top++] = (scm_token){ .ty = TOKEN_CLOSE_PAREN };
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS };
break;
default:
die("parse sexp: unexpected token");
}
break;
case PARSE_SEXPS:
switch(t.ty) {
case TOKEN_ATOM:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS1 };
break;
case TOKEN_QUOTE:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS1 };
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS };
break;
case TOKEN_OPEN_PAREN:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS1 };
parser_work[parser_work_top++] = (scm_token){ .ty = TOKEN_CLOSE_PAREN };
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS };
break;
default:
unread_token(t);
}
break;
case PARSE_SEXPS1:
switch(t.ty) {
case TOKEN_DOT:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXP };
break;
case TOKEN_ATOM:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS1 };
break;
case TOKEN_QUOTE:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS1 };
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXP };
break;
case TOKEN_OPEN_PAREN:
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS1 };
parser_work[parser_work_top++] = (scm_token){ .ty = TOKEN_CLOSE_PAREN };
parser_work[parser_work_top++] = (scm_token){ .ty = PARSE_SEXPS };
break;
default:
unread_token(t);
}
break;
default:
if(t.ty != w.ty) {
#ifdef DEBUG
puts("wanted: ");
display_token(w);
puts("");
puts("got: ");
display_token(t);
puts("");
#endif
die("parse: xy problem");
}
}
}
}
//
int main(void) {
scm_token t;
t.ty = PARSE_SEXP;
parser_work[parser_work_top++] = t;
parse(stdin);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.