Skip to content

Instantly share code, notes, and snippets.

@luser-dr00g
Created March 3, 2016 19:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luser-dr00g/a2c35f698e1f5f4e23b0 to your computer and use it in GitHub Desktop.
Save luser-dr00g/a2c35f698e1f5f4e23b0 to your computer and use it in GitHub Desktop.
listing of parser code and supplementary headers from https://github.com/luser-dr00g/inca/tree/99518e3d2b82751b5d9910b341c61fda14fcbf2e
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ex.h
// predicate table contains predicate functions
// and associated enum values
#define PREDTAB(_) \
_( ANY = 1, qa, 1 ) \
_( VAR = 2, qp, gettag(x)==PROG \
|| (gettag(x)==PCHAR && getval(x)!=0x2190 /*leftarrow*/ ) ) \
_( NOUN = 4, qn, gettag(x)==LITERAL \
|| gettag(x)==CHAR \
|| gettag(x)==ARRAY ) \
_( VRB = 8, qv, gettag(x)==VERB ) \
_( DEX = 16, qx, 0 ) /*dextri-monadic verb*/\
_( ADV = 32, qo, gettag(x)==ADVERB && ((verb)getptr(x))->monad ) \
_( LEV = 64, qe, 0 ) /*sinister adverb*/\
_( CONJ = 128, qj, gettag(x)==ADVERB && ((verb)getptr(x))->dyad ) \
_( MARK = 256, qm, gettag(x)==MARKOBJ ) \
_( ASSN = 512, qc, gettag(x)==PCHAR && getval(x) == 0x2190 ) \
_( LPAR = 1024, ql, gettag(x)==LPAROBJ ) \
_( RPAR = 2048, qr, gettag(x)==RPAROBJ ) \
_( NUL = 4096, qu, gettag(x)==NULLOBJ ) \
/**/
// declare predicate functions
#define PRED_DECL(X,Y,...) int Y(int);
PREDTAB(PRED_DECL)
// declare predicate enums and composed patterns
#define PRED_ENUM(X,...) X,
enum predicate { PREDTAB(PRED_ENUM)
EDGE = MARK+ASSN+LPAR,
AVN = VRB+NOUN+ADV };
// execute an expression e with environment st
int execute_expression(array e, symtab st);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ex.c
#if 0
/*
* Parsing and Execution
* Execution in APL proceeds right-to-left and this is
* accomplished with a relatively straightforward algorithm.
* We have 2 stacks (it could also be done with a queue and
* a stack) called the left-stack and the right-stack.
* The left stack starts at the left edge and expands
* on the right.
|- 0 1 2 3 top
* The right stack is the opposite, anchored at the right
* and growing to the left.
top 3 2 1 0 -|
* Of course these are just conceptual distinctions: they're
* both just stacks. The left stack is initialized with a
* mark object (illustrated here as ^) to indicate the left
* edge, followed by the entire expression. The right stack
* has a single null object (illustrated here as $) to indicate
* the right edge.
|-^2*1+⍳4 $-|
* At each step, we A) move one object to the right stack,
|-^2*1+⍳ 4$-|
* Until there are at least 4 objects on the right stack, we do
* nothing else.
|-^2*1+ ⍳4$-|
|-^2*1 +⍳4$-|
* If there are at least 4 objects on the right stack, then
* we B) classify the top 4 elements with a set of predicate
* functions and then check through the list of grammatical patterns,
* but this configuration (VERB VERB NOUN NULLOBJ) doesn't match anything.
* Move another object and try again.
|-^2* 1+⍳4$-|
* Now, the above case (NOUN VERB VERB NOUN) matches this production:
/* p[0] p[1] p[2] p[3] func pre x y z post,2*/\
_(L1, EDGE+AVN, VRB, VRB, NOUN, monad, -1, 2,3,-1, 1, 0) \
* application of a monadic verb. The numbers in the production indicate
* which elements should be preserved, and which should be passed to the
* handler function. The result from the handler function is interleaved
* back onto the right stack.
|-^2* 1+A$-| A←⍳4
* where A represents the array object returned from the iota function.
* (Incidentally this is a lazy array, generating its values on-demand.)
|-^2 *1+A$-| dyad
|-^2 *B$-| B←1+A
|-^ 2*B$-|
|- ^2*B$-| dyad
|- ^C$-| C←2*B
* Eventually the expression ought to reduce to 3 objects: a mark,
* some result object, and a null. Anything else is an error
* TODO handle this error.
|- ^C$-|
^
|
result
*/
#endif
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "ar.h"
#include "en.h"
#include "st.h"
#include "wd.h"
#include "vb.h"
#include "ex.h"
typedef int object;
#include "ex_private.h"
#include "debug.h"
// execute expression e using environment st and yield result
//TODO check/handle extra elements on stack (interpolate?, enclose and cat?)
int execute_expression(array e, symtab st){
int i,j,n = e->dims[0];
stack *lstk,*rstk;
int docheck;
init_stacks(&lstk, &rstk, e, n);
while(lstk->top){ //left stack not empty
object x = stackpop(lstk);
DEBUG("->%08x(%d,%d)\n", x, gettag(x), getval(x));
if (qp(x)){ // x is a pronoun?
if (parse_and_lookup_name(lstk, rstk, x, st) == null)
return null;
} else stackpush(rstk,x);
docheck = 1;
while (docheck){ //check rstk with patterns and reduce
docheck = 0;
if (rstk->top>=4){
int c[4];
for (j=0; j<4; j++)
c[j] = classify(rstk->a[rstk->top-1-j]);
for (i=0; i<sizeof ptab/sizeof*ptab; i++)
if (check_pattern(c, ptab, i)) {
object t[4];
move_top_four_to_temp(t, rstk);
switch(i){
PARSETAB(PARSETAB_ACTION)
}
docheck = 1; //stack changed: check again
break;
}
}
}
}
return extract_result_and_free_stacks(lstk,rstk);
}
size_t sum_symbol_lengths(array e, int n){
int i,j;
for (i=j=0; i<n; i++) { // sum symbol lengths
if (gettag(e->data[i])==PROG) {
//printf("%p\n", getptr(e->data[i]));
j+=((array)getptr(e->data[i]))->dims[0];
}
}
return j;
}
void init_stacks(stack **lstkp, stack **rstkp, array e, int n){
int i,j;
#define lstk (*lstkp) /* by-reference */
#define rstk (*rstkp)
j=sum_symbol_lengths(e,n);
stackinit(lstk,n+j+1);
stackpush(lstk,mark);
for (i=0; i<n; i++) stackpush(lstk,e->data[i]); // push expression
stackinit(rstk,n+j+1);
stackpush(rstk,null);
#undef lstk
#undef rstk
}
object extract_result_and_free_stacks(stack *lstk, stack *rstk){
object x;
stackpop(rstk); // pop mark
x = stackpop(rstk);
free(lstk);
free(rstk);
return x;
}
int check_pattern(int *c, parsetab *ptab, int i){
return c[0] & ptab[i].c[0]
&& c[1] & ptab[i].c[1]
&& c[2] & ptab[i].c[2]
&& c[3] & ptab[i].c[3];
}
void move_top_four_to_temp(object *t, stack *rstk){
t[0] = stackpop(rstk);
t[1] = stackpop(rstk);
t[2] = stackpop(rstk);
t[3] = stackpop(rstk);
}
/* Parser Actions,
each function is called with x y z parameters defined in PARSETAB
*/
int monad(int f, int y, int dummy, symtab st){
DEBUG("monad\n");
verb v = getptr(f);
if (!v->monad) {
printf("monad undefined\n");
return null;
}
return v->monad(y,v);
}
int dyad(int x, int f, int y, symtab st){
DEBUG("dyad\n");
verb v = getptr(f);
if (!v->dyad) {
printf("dyad undefined\n");
return null;
}
return v->dyad(x,y,v);
}
int adv(int f, int g, int dummy, symtab st){
DEBUG("adverb\n");
verb v = getptr(g);
if (!v->monad) {
printf("adv undefined\n");
return null;
}
return v->monad(f,v);
}
int conj_(int f, int g, int h, symtab st){
DEBUG("conj\n");
verb v = getptr(g);
if (!v->dyad) {
printf("conj undefined\n");
return null;
}
return v->dyad(f,h,v);
}
//specification
int spec(int name, int v, int dummy, symtab st){
def(st, name, v);
return v;
}
int punc(int x, int dummy, int dummy2, symtab st){
return x;
}
// lookup name in environment unless to the left of assignment
// if the full name is not found, but a defined prefix is found,
// push the prefix back to the left stack and continue lookup
// with remainder. push value to right stack.
int parse_and_lookup_name(stack *lstk, stack *rstk, object x, symtab st){
if (rstk->top && qc(stacktop(rstk))){ //assignment: no lookup
stackpush(rstk,x);
} else {
DEBUG("lookup\n");
int *s;
int n;
switch(gettag(x)){
case PCHAR: { // single char
s = &x;
n = 1;
} break;
case PROG: { // longer name
array a = getptr(x);
s = a->data;
n = a->dims[0];
} break;
}
int *p = s;
symtab tab = findsym(st,&p,&n,0);
if (tab->val == null) {
printf("error undefined prefix\n");
return null;
}
while (n){ //while name
DEBUG("%d\n", n);
stackpush(lstk,tab->val); //pushback value
s = p;
tab = findsym(st,&p,&n,0); //lookup remaining name
if (tab->val == null) {
printf("error undefined internal\n");
return null;
}
}
//replace name with defined value
DEBUG("==%08x(%d,%d)\n", tab->val, gettag(tab->val), getval(tab->val));
stackpush(rstk,tab->val);
}
return 0;
}
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ex_private.h
/* stack type
the size is generously pre-calculated
and so we can skip all bounds checking.
stkp->top is the size (index of next empty slot for next push)
stkp->top-1 is the topmost element
*/
typedef struct stack { int top; object a[1];} stack; /* top==0::empty */
#define stackinit(stkp,sz) (stkp=malloc(sizeof*stkp + (sz)*sizeof*stkp->a)), \
(stkp->top=0)
#define stackpush(stkp,el) ((stkp)->a[(stkp)->top++]=(el))
#define stackpop(stkp) ((stkp)->a[--((stkp)->top)])
#define stacktop(stkp) ((stkp)->a[(stkp)->top-1])
/* predicate functions are instantiated according to the table
defined in the ex.h and are also exported.
the q[] function array is used by classify to apply all
predicate functions yielding a sum of all applicable codes
defined in the table. Specific qualities or combinations
may then be determined easily by masking.
*/
#define PRED_FUNC(X,Y,...) int Y(object x){ return __VA_ARGS__; }
PREDTAB(PRED_FUNC)
#define PRED_ENT(X,Y,...) Y,
static int (*q[])(object) = { PREDTAB(PRED_ENT) };
/* encode predicate applications into a binary number
which can be compared to a pattern with a mask */
static inline int classify(object x){
int i,v,r;
for (i=0, v=1, r=0; i<sizeof q/sizeof*q; i++, v*=2)
if (q[i](x))
r |= v;
return r;
}
// the Parse Table defines the grammar of the language
// At each stack move, the top four elements of the right stack
// are checked against each of these patterns. A matching pattern
// returns element t[pre] from the temp area to the right stack
// then calls func(t[x],t[y],t[z]) and pushes the result to the
// right stack, then pushes t[post] and t[post2].
// A -1 in any of these positions means do nothing, or do not
// bother to pass anything meaningful. That is, any of the x,y,z
// parameters marked -1 correspond to a "dummy" argument of
// the function which is there in order that all handler
// functions have the same signature.
#define PARSETAB(_) \
/* p[0] p[1] p[2] p[3] func pre x y z post,2*/\
_(L0, EDGE, VRB, NOUN, ANY, monad, 3, 1,2,-1, 0,-1) \
_(L1, EDGE+AVN, VRB, VRB, NOUN, monad, -1, 2,3,-1, 1, 0) \
_(L2, ANY, NOUN, DEX, ANY, monad, 3, 2,1,-1, 0,-1) \
_(L3, EDGE+AVN, NOUN, VRB, NOUN, dyad, -1, 1,2,3, 0,-1) \
_(L4, EDGE+AVN, NOUN+VRB, ADV, ANY, adv, 3, 1,2,-1, 0,-1) \
_(L5, ANY, LEV, NOUN+VRB, ANY, adv, 3, 2,1,-1, 0,-1) \
_(L6, EDGE+AVN, NOUN+VRB, CONJ, NOUN+VRB, conj_, -1, 1,2,3, 0,-1) \
_(L7, VAR, ASSN, AVN, ANY, spec, 3, 0,2,-1, -1,-1) \
_(L8, LPAR, ANY, RPAR, ANY, punc, 3, 1,-1,-1, -1,-1) \
_(L9, MARK, ANY, RPAR, ANY, punc, 3, 1,-1,-1, 0,-1) \
_(L10,ANY, LPAR, ANY, NUL, punc, 3, 2,-1,-1, 0,-1) \
/**/
// generate labels to coordinate table and execution
#define PARSETAB_INDEX(label, ...) label,
enum { PARSETAB(PARSETAB_INDEX) };
// create parsetab array of structs containing the patterns
#define PARSETAB_PAT(label, pat1, pat2, pat3, pat4, ...) \
{pat1, pat2, pat3, pat4},
typedef struct parsetab { int c[4]; } parsetab;
static parsetab ptab[] = { PARSETAB(PARSETAB_PAT) };
// perform the grammar production, transforming the stack
#define PARSETAB_ACTION(label,p1,p2,p3,p4, func, pre,x,y,z,post,post2) \
case label: { \
if (pre>=0) stackpush(rstk,t[pre]); \
stackpush(rstk,func(x>=0?t[x]:0,y>=0?t[y]:0,z>=0?t[z]:0,st)); \
if (post>=0) stackpush(rstk,t[post]); \
if (post2>=0) stackpush(rstk,t[post2]); \
} break;
void init_stacks(stack **lstkp, stack **rstkp, array e, int n);
object extract_result_and_free_stacks(stack *lstk, stack *rstk);
int parse_and_lookup_name(stack *lstk, stack *rstk, object x, symtab st);
int check_pattern(int *c, parsetab *ptab, int i);
void move_top_four_to_temp(object *t, stack *rstk);
size_t sum_symbol_lengths(array e, int n);
int monad(int f, int y, int dummy, symtab st);
int dyad(int x, int f, int y, symtab st);
int adv(int f, int g, int dummy, symtab st);
int conj_(int f, int g, int h, symtab st);
int spec(int name, int v, int dummy, symtab st);
int punc(int x, int dummy, int dummy2, symtab st);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ar.h
#ifndef AR_H_
#define AR_H_
#include "../ppnarg.h"
typedef struct ar {
int type;
int rank; // number of dimensions
int *dims; // size of each dimension
int cons; // constant term of the indexing formula
int *weight; // corresponding coefficient in the indexing formula
int *data; // address of first array element
int *(*func)(struct ar *,int); // data function (if function type)
} *array;
enum type {
normal,
indirect,
function
};
int productdims(int rank, int dims[]);
array array_new_dims(int rank, int dims[]);
array array_new_function(int rank, int dims[],
int *data, int datan, int *(*func)(array,int)); // type=function
int *constant(array a,int idx);
int *j_vector(array a,int idx);
void loaddimsv(int rank, int dims[], va_list ap);
array (array_new)(int rank, ...);
#define array_new(...) (array_new)(PP_NARG(__VA_ARGS__),__VA_ARGS__)
array cast_dims(int data[], int rank, int dims[]); // type=indirect
array (cast)(int data[], int rank, ...); // type=indirect
#define cast(data,...) (cast)(data,PP_NARG(__VA_ARGS__),__VA_ARGS__)
array clone(array a); // type=indirect
array copy(array a);
int *elema(array a, int ind[]);
int *elemv(array a, va_list ap);
int *elem(array a, ...);
int *vector_index(int ind, int dims[], int n, int vec[]);
int ravel_index(int vec[], int dims[], int n);
void transpose2(array a);
void transpose(array a, int shift);
void transposea(array a, int spec[]);
array slice(array a, int i); // type=indirect
array slicea(array a, int spec[]); // type=indirect
array slices(array a, int s[], int f[]); // type=indirect
array extend(array a, int extra); // type=indirect
array cat(array x, array y);
array iota(int n); // type=function
array scalar(int n);
array (vector)(int n, ...);
#define vector(...) (vector)(PP_NARG(__VA_ARGS__),__VA_ARGS__)
int issolid(array a);
array makesolid(array a);
#endif
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat en.h
typedef struct datum { // these two should be reversed for Big-Endian
unsigned int val:24;
unsigned int tag:8; // hi-bit of tag should overlay the sign bit
} datum;
typedef union integer {
datum data;
int32_t int32;
} integer;
enum tag {
LITERAL, /* val is a 24-bit 2's comp integer */
NUMBER, /* val is an index in the number table */
CHAR, /* val is a 21-bit Unicode code point padded with zeros */
PCHAR, /* val is a an executable char */
PROG, /* val is an (index to an) executable code fragment (ARRAY of PCHAR)*/
ARRAY, /* val is a(n index to a) boxed array */
SYMTAB, /* val is a(n index to a) symbol table */
NULLOBJ, /* val is irrelevant (s.b. 0) */
VERB, /* val is a(n index to a) verb object */
ADVERB, /* val is a(n index to a) verb object */
MARKOBJ, /* val is irrelevant (s.b. 0) */
LPAROBJ,
RPAROBJ,
};
extern int null;
extern int mark;
void init_en();
int gettag(int d);
int getval(int d);
int newdata(int tag, int val);
int cache(int tag, void *ptr);
void *getptr(int d);
int getfill(int d);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat st.h
/* symbol table */
typedef struct st {
int key;
int val;
int n;
struct st **tab /*[n]*/ ;
} *symtab;
symtab makesymtab(int n);
/* mode=0: prefix match
mode=1: defining search */
symtab findsym(symtab st, int **spp, int *n, int mode);
void def(symtab st, int name, int v);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat wd.h
// scan up to n chars from s and produce 1D array of encoded expression
array scan_expression(int *s, int n);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat vb.h
#define MODE1(x) (x|1<<7)
#define VERBTAB(_) \
/*base monad dyad f g h mr lr rr*/ \
_('+', vid, vplus, 0, 0, 0, 0, 0, 0 ) \
_('-', vneg, vminus, 0, 0, 0, 0, 0, 0 ) \
_('*', vsignum, vtimes, 0, 0, 0, 0, 0, 0 ) \
_(MODE1('+'), vrecip, vdivide, 0, 0, 0, 0, 0, 0 ) \
_(0x2374/*rho*/, vshapeof, vreshape, 0, 0, 0, 0, 0, 0 ) \
_('#', vtally, 0, 0, 0, 0, 0, 0, 0 ) \
_(0x2373/*iota*/, viota, 0, 0, 0, 0, 0, 0, 0 ) \
_('{', vhead, vtake, 0, 0, 0, 0, 1, 0 ) \
_('}', vbehead, vdrop, 0, 0, 0, 0, 0, 0 ) \
_(',', vravel, vcat, 0, 0, 0, 0, 0, 0 ) \
_(';', vprenul, vlink, 0, 0, 0, 0, 0, 0 ) \
_('[', 0, vindexright,0, 0, 0, 0, 0, 0 ) \
_(']', 0, vindexleft, 0, 0, 0, 0, 0, 0 ) \
/**/
typedef struct verb {
int id;
int (*monad)(int,struct verb*);
int (*dyad)(int,int,struct verb*);
int f,g,h; /* operator arguments */
int mr,lr,rr; /* monadic,left,right rank*/
} *verb;
void init_vb(symtab st);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat debug.h
#ifdef DEBUGMODE
#define DEBUG(...) fprintf(stderr, __VA_ARGS__)
#else
#define DEBUG(...)
#endif
josh@LAPTOP-ILO10OOF ~/inca/olmec
$
@luser-dr00g
Copy link
Author

for completeness, here is the one other referenced header file:

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ../ppnarg.h
/*
 * The PP_NARG macro evaluates to the number of arguments that have been
 * passed to it.
 *
 * Laurent Deniau, "__VA_NARG__," 17 January 2006, <comp.std.c> (29 November 2007).
 */
#define PP_NARG(...)    PP_NARG_(__VA_ARGS__,PP_RSEQ_N())
#define PP_NARG_(...)   PP_ARG_N(__VA_ARGS__)

#define PP_ARG_N( \
        _1, _2, _3, _4, _5, _6, _7, _8, _9,_10,  \
        _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
        _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
        _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
        _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
        _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
        _61,_62,_63,N,...) N

#define PP_RSEQ_N() \
        63,62,61,60,                   \
        59,58,57,56,55,54,53,52,51,50, \
        49,48,47,46,45,44,43,42,41,40, \
        39,38,37,36,35,34,33,32,31,30, \
        29,28,27,26,25,24,23,22,21,20, \
        19,18,17,16,15,14,13,12,11,10, \
        9,8,7,6,5,4,3,2,1,0

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ 

@luser-dr00g
Copy link
Author

The following script will download this gist and convert it to a self-extracting shell archive which can be run with . ./parsing.shar to extract the files into the current directory (no special clobbering controls: be careful).

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat extract.sh
wget https://gist.githubusercontent.com/luser-dr00g/a2c35f698e1f5f4e23b0/raw/069e1c4701243d665ef009a451e6f77248fdf4bd/parsing.typescript

sed -e 1d parsing.typescript |\
        sed -e 's/^\$ cat \(.*\)/cat >\1 <<ENDOFFILE/' |\
        sed -e 's/josh@LAPTOP-ILO10OOF ~\/inca\/olmec/ENDOFFILE/' |\
        sed -e 's/^\$//' >parsing.shar


josh@LAPTOP-ILO10OOF ~/inca/olmec
$ . ./extract.sh
--2016-03-03 15:00:21--  https://gist.githubusercontent.com/luser-dr00g/a2c35f698e1f5f4e23b0/raw/069e1c4701243d665ef009a451e6f77248fdf4bd/parsing.typescript
Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 23.235.40.133
Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|23.235.40.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18579 (18K) [text/plain]
Saving to: ‘parsing.typescript.5’

parsing.typescript. 100%[===================>]  18.14K  --.-KB/s    in 0.03s   

2016-03-03 15:00:22 (592 KB/s) - ‘parsing.typescript.5’ saved [18579/18579]


josh@LAPTOP-ILO10OOF ~/inca/olmec
$ head parsing.shar
cat >ex.h <<ENDOFFILE

// predicate table contains predicate functions
// and associated enum values
#define PREDTAB(_) \
_( ANY  =    1, qa, 1 ) \
_( VAR  =    2, qp, gettag(x)==PROG \
                || (gettag(x)==PCHAR && getval(x)!=0x2190 /*leftarrow*/ ) ) \
_( NOUN =    4, qn, gettag(x)==LITERAL \
                 || gettag(x)==CHAR \

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ 

@luser-dr00g
Copy link
Author

This gist was created to accompany this comp.lang.c message: https://groups.google.com/d/msg/comp.lang.c/X7BzyIKpogg/uKB3MsPwAwAJ

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment