Skip to content

Instantly share code, notes, and snippets.

@aap

aap/obc.b Secret

Last active June 23, 2023 10:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aap/6df9b4c53c63592437d97dadab533649 to your computer and use it in GitHub Desktop.
Save aap/6df9b4c53c63592437d97dadab533649 to your computer and use it in GitHub Desktop.
attempt at B compiler reconstruction
main() {
extrn printf, xprintf;
extrn argv, fout, fin, nerror, eof, chain;
printf = xprintf; /* hack */
if(argv[0]<3) {
error("Arg count");
exit(1);
}
if((fin=open(argv[2],0))<0) {
error("Can't find %s", argv[2]);
exit(1);
}
if((fout=creat(argv[3], 0664))<0) {
error("Can't create %s", argv[3]);
exit(1);
}
keyw("auto", Auto);
keyw("extrn", Extern);
keyw("goto", Goto);
keyw("return", Return);
keyw("if", If);
keyw("while", While);
keyw("else", Else);
keyw("switch", Switch);
keyw("case", Case);
while(!eof) {
extdef();
enddef();
}
printf("f l%d*n", chain);
exit(nerror!=0);
}
keyw(s, t) {
extrn namsiz, symbuf;
auto n, i, j, c, np;
n = namsiz;
i = j = 0;
while(n--) {
lchar(symbuf, i++, c = char(s, j++));
if(c == '*e')
j--;
}
np = lookup();
np[0] = KeyType;
np[1] = t;
}
extdef() {
extrn csym, cval, loc, chain, peeksym, stksym, nauto, setstk;
auto o, i, n;
if(((o=symbol())==0) | o==Semi)
return;
if(o!=Name)
goto syntax;
csym[0] = Extern;
printf("g %p*nl %p*n", csym+2, csym+2);
n = 1;
if((peeksym=symbol())==LBrack) {
peeksym = -1;
n = 0;
if((o=symbol())==Const) {
n = cval;
o = symbol();
}
if(o!=RBrack)
goto syntax;
label(loc);
printf("v l%d*n", chain);
chain=loc++;
}
loop:
switch(o=symbol()) {
case LParen:
/* Stack layout:
* auton
* ....
* auto0
* argn
* 3 ....
* 2 arg0
* 1 return address
* 0 old fp <- fp
*/
nauto = 2;
stksym = loc++;
printf("v 1f*nl 1*n");
declare(Param);
setstk = 1;
stmt();
etc(11);
printf("q l%d,%d*n", stksym, nauto);
return;
case Semi:
done:
if(n > 0)
printf("s %d*n", n);
return;
case Const:
printf("w %o*n", cval);
semi:
n--;
if((o=symbol()) == Semi)
goto done;
if(o != Comma)
goto syntax;
goto loop;
case String:
getstr();
goto semi;
/* cannot handle names */
case 0:
return;
}
syntax:
error("Definition syntax");
errflush(o);
}
enddef() {
extrn symsz, symtab, stablen, symused;
auto s;
s = symtab;
while(s < &symtab[stablen]) {
if(s[2]) {
if(s[0] == 0)
error("%p undefined", s+2);
if(s[0] == Intern)
printf("l l%d*nv l%d*n", s[1], s[1]+1);
}
if(s[0] != KeyType) {
s[2] = 0;
symused--;
}
s =+ symsz;
}
putchar('*n');
}
declare(kw) {
extrn csym, cval, nauto;
auto o;
while((o=symbol())==Name) {
if(*csym!=0)
error("%p redeclared", csym+2);
*csym = kw;
if(kw!=Extern) {
*csym = Auto;
csym[1] = nauto++;
}
o = symbol();
if(kw==Auto & o==Const) {
printf("*ty %d*n", csym[1]);
nauto =+ cval;
o = symbol();
}
if(o != Comma)
goto done;
}
done:
if(o==Semi & kw!=Param | o==RParen & kw==Param)
return;
syntax:
error("Declaration syntax");
errflush(o);
}
stmt() {
extrn pop, stksym, setstk, loc, csym, cval, eof, peekc, peeksym,
swp, swtab, swsiz, nsw;
auto o, l1, l2, l3, p, sswp, snsw;
pop = '*ti';
switch(o = symbol()) {
case 0:
error("Unexpected EOF");
return;
case RBrace:
peeksym = o;
case Semi:
return;
case LBrace:
while(!eof) {
if((o=symbol())==RBrace)
return;
peeksym = o;
stmt();
}
error("Missing '}'");
return;
case Keyword:
switch(cval) {
case Auto:
case Extern:
declare(cval);
goto stmt;
case Goto:
expr();
etc(6);
goto semi;
case Return:
if((peeksym=symbol())==LParen) {
pexpr();
etc(7);
goto semi;
}
etc(11);
goto semi;
case If:
pexpr();
jmpc(l1=loc++);
stmt();
if((o=symbol())==Keyword & cval==Else) {
jmp(l2=loc++);
label(l1);
stmt();
label(l2);
return;
}
peeksym = o;
label(l1);
return;
case While:
l1 = loc++;
label(l2 = loc++);
pexpr();
jmpc(l1);
stmt();
jmp(l2);
label(l1);
return;
case Case:
if((o=symbol())!=Const)
goto syntax;
if((o=symbol())!=Colon)
goto syntax;
if(swp==0) {
error("Not in switch");
goto stmt;
}
if(swp>=swtab+swsiz) {
error("Switch table overflow");
} else {
*swp++ = loc;
*swp++ = cval;
nsw++;
label(loc++);
}
goto stmt;
case Switch:
expr();
printf("*tz l%d*n", l2=loc++);
sswp = swp;
snsw = nsw;
if(swp==0)
swp = swtab;
p = swp;
nsw = 0;
stmt();
jmp(l1=loc++);
/* PDP-11 runtime can't handle this */
if(nsw==0)
error("No cases in switch");
printf("l l%d*nw %o*n", l2, nsw);
while(nsw--) {
printf("w %o*nv l%d*n", p[1], p[0]);
p =+ 2;
}
label(l1);
swp = sswp;
nsw = snsw;
return;
}
error("Unknown keyword");
goto syntax;
case Name:
if(peekc==':') {
peekc = 0;
if(csym[0]>0) {
error("Redefinition");
goto stmt;
}
csym[0] = Intern;
if(csym[1]==0) {
csym[1] = loc++;
loc++;
}
label(csym[1]+1);
setstk = 1;
goto stmt;
}
}
/* Expression statement */
peeksym = o;
expr();
semi:
if((o=symbol())==Semi)
return;
syntax:
error("Statement syntax");
errflush(o);
goto stmt;
}
label(n) printf("l l%d*n", n);
jmp(n) printf("*tt l%d*n", n);
jmpc(n) {
extrn pop;
printf("*tf l%d*n", n);
pop = '*ti';
}
etc(n) { printf("*tn%d*n", n); }
leaf(l) {
extrn pop, xlv;
putchar(pop);
pop = '*t';
if(xlv)
if(l) putchar('v');
else error("Illegal lvalue");
}
xop;
xarg;
xlv;
build(op, arg) {
extrn pop, xop, xarg, xlv, opdope;
/* Check if last operator and this one should be combined.
* A bit awkward. */
switch(op) {
case Star:
if(xop==Amp) {
xop = 0;
return;
}
if(xop==Plus) {
xop = Vector;
return;
}
if(xop==Name & xlv==1) {
xlv = 0;
return;
}
goto gen;
case Amp:
if(xop==Star) {
xop = 0;
return;
}
if(xop==Vector) {
xop = Plus;
return;
}
if(xop==Name & xlv==0) {
xlv = 1;
return;
}
error("Illegal lvalue");
return;
case Neg:
if(xop == Const) {
xarg = -xarg;
return;
}
goto gen;
case Call:
if(xop == MCall) {
etc(1);
xop = 0;
return;
}
goto gen;
}
gen:
/* unary lvalues */
if((opdope[op]&5)==4)
xlv = 1;
/* Output previous operator so we can remember new one */
if(xop) {
switch(xop) {
case Name:
leaf(1);
switch(xarg[0]) {
case Extern:
printf("x %p*n", xarg+2);
goto ret;
case 0: /* undefined label */
case Intern:
printf("x l%d*n", xarg[1]);
goto ret;
case Auto:
printf("a %d*n", xarg[1]);
goto ret;
}
error("invalid type*n");
exit(1);
case Const:
leaf(0);
printf("c %o*n", xarg);
goto ret;
case String:
leaf(0);
printf("vx 1f*n*tt 2f*nl 1*n");
getstr();
printf("l 2*n");
goto ret;
case Star:
if(xlv)
goto ret;
goto out;
case Vector:
if(xlv)
printf("*tb14*n");
else
etc(4);
goto ret;
case MCall:
etc(2);
goto ret;
case Call:
etc(3);
case Comma: /* TODO? should pop stack if not in call */
goto ret;
}
if(xlv)
error("Illegal lvalue");
out:
if(xop < Assign)
error("UNKNOWN OP %d", xop);
else if(xop <= Div)
printf("*tb%o*n", xop-Assign+1);
else if(xop <= ADiv)
printf("*tb%o*n", xop-AOr+0102);
else if(xop <= Postdec)
printf("*tu%o*n", xop-Amp+1);
}
ret:
xop = op;
xarg = arg;
xlv = 0;
}
pexpr() {
auto t;
if((t = symbol()) != LParen)
goto syntax;
expr();
if((t = symbol()) == RParen)
return;
syntax:
error("Statement syntax");
errflush(t);
}
expr() {
extrn csym, cval, peekc, peeksym, loc, opdope;
extrn pop, xop, xlv, setstk, stksym;
auto op, opst 20, pp, prst 20, andflg, o, p, os, l;
op = opst;
pp = prst;
*op = 0;
*pp = 6;
andflg = 0;
if(setstk) {
printf("*ts l%d*n", stksym);
setstk = 0;
pop = '*t';
}
advanc:
switch(o=symbol()) {
case Name:
/* Have to be very careful here because symbol()
* can peek a token itself after a '='.
* In particular it can overwrite csym then.
* e.g. 'a =b' will return '=' and peek b,
* overwriting a's csym. */
if(*csym==0) {
/* this is the best we can do to recognize a call */
if(peekc=='(')
csym[0] = Extern;
else
/* otherwise assume it's a label */
if(csym[1]==0) {
csym[1] = loc++;
loc++;
}
}
build(o, csym);
goto tand;
case Const:
build(o, cval);
goto tand;
case String:
build(o);
build(0); /* have to flush */
tand:
if(andflg)
goto syntax;
andflg = 1;
goto advanc;
/* unary operators */
case Preinc:
case Predec:
if(andflg)
o =+ 2;
goto oponst;
case Not:
if(andflg)
goto syntax;
goto oponst;
case Minus:
if(andflg)
andflg = 0;
else
o = Neg;
goto oponst;
case And:
case Times:
if(andflg)
andflg = 0;
else if(o==And)
o = Amp;
else
o = Star;
goto oponst;
/* special tokens */
case LParen:
if(andflg) {
/* function call */
if((o=symbol())==RParen) {
/* empty call */
o = MCall;
} else {
peeksym = o;
o = Call;
andflg = 0;
}
}
goto oponst;
case RParen:
case RBrack:
if(!andflg)
goto syntax;
goto oponst;
}
/* binary operators */
if(!andflg)
goto syntax;
andflg = 0;
oponst:
p = (opdope[o]>>6) & 077;
/* new op binds tighter, so push onto stack */
if(p>*pp | p==*pp & (opdope[o]&2)!=0) {
/* Mark last thing we generated as lvalue if we
* see an assignment operator. */
if((opdope[o]&5)==5)
xlv = 1;
switch(o) {
case Quest:
build(0);
jmpc(l=loc++);
*++op = l;
goto putin;
case Colon:
if(*op != Quest) {
error("Illegal conditional");
goto syntax;
}
build(0);
jmp(l=loc++);
pp--;
label(*--op);
*op = l;
pop = '*ti';
goto putin;
/* Make parens (and calls) such low precedence
* that only closing paren can pop. */
case Call:
build(MCall);
case LParen:
case LBrack:
p = 04;
}
putin:
if(op>=opst+20) {
error("expression overflow");
exit(1);
}
*++op = o;
*++pp = p;
goto advanc;
}
/* new op binds less tightly, so pop off stack */
--pp;
switch(os = *op--) {
case 0:
peeksym = o;
build(0);
return;
case Colon:
build(0);
label(*op--);
goto oponst;
case Call:
if(o!=RParen)
goto syntax;
build(os);
goto advanc;
case MCall:
build(os);
os = Call;
goto fbuild;
case LParen:
if(o!=RParen)
goto syntax;
goto advanc;
case LBrack:
if(o!=RBrack)
goto syntax;
build(Vector);
goto advanc;
}
fbuild:
build(os);
goto oponst;
syntax:
error("Expression syntax");
errflush(o);
}
errflush(t) {
extrn peeksym;
while(t != Semi & t != LBrace & t != RBrace)
t = symbol();
peeksym = t;
}
/*
* Lexical analysis
*/
lookup() {
extrn symbuf, nwps, symtab, stabsz, stablen, symused, symsz;
auto i, j, np, sp, rp;
i = 0;
sp = symbuf;
j = nwps;
while(j--)
i =+ *sp++;
if(i < 0)
i = -i;
i =% stabsz;
i =* symsz;
while(*(np = &symtab[i+2])) {
sp = symbuf;
j = nwps;
while(j--)
if(*np++ != *sp++)
goto no;
return(&symtab[i]);
no:
if((i =+ symsz) >= stablen)
i = 0;
}
if(++symused >= stabsz) {
error("Symbol table overflow");
exit(1);
}
rp = np = &symtab[i];
sp = symbuf;
*np++ = 0;
*np++ = 0;
j = nwps;
while(j--)
*np++ = *sp++;
return(rp);
}
symbol() {
extrn peeksym, eof, ctab, symbuf, namsiz, csym, cval;
auto b, c, i;
next:
if(peeksym >= 0) {
c = peeksym;
peeksym = -1;
return(c);
}
c = getchr();
if(eof)
return(0);
if(c >= 128) {
error("unknown char <%c>", c);
goto next;
}
switch(ctab[c]) {
case Space:
goto next;
case Plus:
return(subseq('+', Plus, Preinc));
case Minus:
return(subseq('-', Minus, Predec));
case Not:
return(subseq('=', Not, Neq));
case Less:
if(subseq('<', 0, 1))
return(LShift);
return(subseq('=', Less, Leq));
case Greater:
if(subseq('>', 0, 1))
return(RShift);
return(subseq('=', Greater, Geq));
case Assign:
/* First handle ==, === to avoid recursion later */
if((c = getchr()) == '=')
return(subseq('=', Eq, AEq));
/* NB: does not handle comments! */
if(ctab[c] == Space)
return(Assign);
ungetchr(c);
/* BUG: this is a big kludge! */
c = symbol();
if(c >= Or & c <= Div)
return(c - Or + AOr);
if(peeksym >= 0) error("OVERWRITING peeked symbol (%o %o)", peeksym, c);
peeksym = c;
return(Assign);
case Div:
if(subseq('**', 1, 0))
return(Div);
/* find end of comment */
while(1) {
c = getchr();
if(eof) {
error("Unterminated comment");
return(0);
}
if(c == '**') {
c = getchr();
if(c == '/')
goto next;
}
}
case Digit:
cval = 0;
if(c=='0')
b = 8;
else
b = 10;
while(ctab[c]==Digit) {
cval = cval*b + c-'0';
c = getchr();
}
ungetchr(c);
return(Const);
case SQuote:
return(getcc());
case Letter:
i = 0;
while(ctab[c]==Letter | ctab[c]==Digit) {
if(i < namsiz) lchar(symbuf, i++, c);
c = getchr();
}
while(i<namsiz)
lchar(symbuf, i++, 0);
ungetchr(c);
csym = lookup();
if(csym[0]==KeyType) {
cval = csym[1];
return(Keyword);
}
return(Name);
}
return(ctab[c]);
}
subseq(c,a,b) {
auto chr;
if((chr=getchr())==c)
return(b);
ungetchr(chr);
return(a);
}
getstr() {
extrn cval, loc;
auto c, i, buf;
buf = i = 0;
while((c=mapch('"'))>=0) {
lchar(&buf, i++, c);
if(i==NCPW) {
printf("w %o*n", buf);
buf = i = 0;
}
}
printf("w %o*n", buf);
}
getcc() {
extrn cval;
auto c, i;
i = 0;
cval = 0;
while((c = mapch('*'')) >= 0)
if(i < NCPW)
lchar(&cval, i++, c);
return(Const);
}
mapch(d) {
extrn eof;
auto c;
c = getchr();
if(eof | c == '*n') {
error("Unterminated string");
ungetchr(c);
return(-1);
}
if(c == d)
return(-1);
if(c == '**') {
c = getchr();
switch(c) {
case '0':
case 'e': return('*0');
case 't': return('*t');
case 'n': return('*n');
}
}
return(c);
}
/*
* Input/Output
*/
getchr() {
extrn peekc, eof, line;
auto c;
if(peekc) {
c = peekc;
peekc = 0;
} else
c = getchar();
if(c <= 0) {
eof = 1;
return(0);
}
if(c == '*n')
line++;
return(c);
}
ungetchr(c) {
extrn peekc, line;
if(c == '*n')
line--;
peekc = c;
}
xprintf(fmt, x1,x2,x3,x4,x5,x6,x7,x8,x9) {
extrn printn, char, putchar, namsiz;
auto adx, x, c, i, j;
i = 0; /* fmt index */
adx = &x1; /* argument pointer */
loop:
while((c=char(fmt,i++)) != '%') {
if(c == '*e')
return;
putchar(c);
}
x = *adx++;
switch(c = char(fmt, i++)) {
case 'd': /* decimal */
case 'o': /* octal */
printn(x, c=='o'?8:10);
goto loop;
case 'c': /* char */
putchar(x);
goto loop;
case 's': /* string */
j = 0;
while((c=char(x,j++)) != '*e')
putchar(c);
goto loop;
case 'p': /* symbol */
c = namsiz;
j = 0;
putchar('_');
while(c--)
if(char(x,j) != '*e')
putchar(char(x,j++));
goto loop;
}
putchar('%');
i--;
adx--;
goto loop;
}
error(s, p1, p2) {
extrn line, fout, nerror;
auto f;
nerror++;
f = fout;
fout = 1;
printf("%d: ", line);
printf(s, p1, p2);
putchar('*n');
fout = f;
}
symbuf[NWPS];
nwps NWPS;
namsiz NCPS;
symsz SYMSZ;
symused 0;
stabsz NSYMS;
stablen SYMTABSZ;
symtab[SYMTABSZ]; /* SYMSZ*NSYMS */
loc 1;
chain 0;
swsiz 120;
swtab[120];
swp 0;
nsw 0;
nauto 0;
stksym 0;
setstk 0; /* have to set stack */
pop '*t'; /* ignore last stack value */
peeksym 01777777777777777777777; /* -1 */
peekc 0;
eof 0;
line 1;
csym 0;
cval 0;
nerror 0;
ctab[128]
Invalid, Invalid, Invalid, Invalid, Invalid, Invalid, Invalid, Invalid,
Invalid, Space, Space, Space, Invalid, Space, Invalid, Invalid,
Invalid, Invalid, Invalid, Invalid, Invalid, Invalid, Invalid, Invalid,
Invalid, Invalid, Invalid, Invalid, Invalid, Invalid, Invalid, Invalid,
Space, Not, String, Invalid, Invalid, Mod, And, SQuote,
LParen, RParen, Times, Plus, Comma, Minus, Letter, Div,
Digit, Digit, Digit, Digit, Digit, Digit, Digit, Digit,
Digit, Digit, Colon, Semi, Less, Assign, Greater, Quest,
Invalid, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
Letter, Letter, Letter, LBrack, Invalid, RBrack, Invalid, Letter,
Invalid, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
Letter, Letter, Letter, LBrace, Or, RBrace, Invalid, Invalid;
opdope[]
/* 00pp00 is precedence.
* 000001 is binary
* 000002 is right assoc.
* 000004 is lval */
000000, /* None */
000000, /* Space */
000000, /* DQuote ---- unused*/
000000, /* SQuote */
003600, /* LParen */
000200, /* RParen */
003600, /* LBrack */
000200, /* RBrack */
000000, /* LBrace */
000000, /* RBrace */
000000, /* Semi */
000000, /* Digit */
000000, /* Letter */
000000, /* Const */
000000, /* String */
000000, /* Name */
000000, /* Keyword */
000703, /* Comma */
001403, /* Colon */
001403, /* Quest */
003601, /* Call */
003601, /* MCall */
000001, /* Vector */
001207, /* Assign */
001601, /* Or */
002001, /* And */
002201, /* Eq */
002201, /* Neq */
002401, /* Leq */
002401, /* Less */
002401, /* Geq */
002401, /* Greater */
002601, /* RShift */
002601, /* LShift */
003001, /* Plus */
003001, /* Minus */
003201, /* Mod */
003201, /* Times */
003201, /* Div */
001207, /* AOr */
001207, /* AAnd */
001207, /* AEq */
001207, /* ANeq */
001207, /* ALeq */
001207, /* ALess */
001207, /* AGeq */
001207, /* AGreater */
001207, /* ARShift */
001207, /* ALShift */
001207, /* APlus */
001207, /* AMinus */
001207, /* AMod */
001207, /* ATimes */
001207, /* ADiv */
003406, /* Amp */
003402, /* Neg */
003402, /* Star */
003402, /* Not */
003406, /* Preinc */
003406, /* Predec */
003406, /* Postinc */
003406 /* Postdec */
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment