Skip to content

Instantly share code, notes, and snippets.

@rondorkerin
Created July 18, 2014 17:31
Show Gist options
  • Save rondorkerin/cb8c5ad6db72adc94658 to your computer and use it in GitHub Desktop.
Save rondorkerin/cb8c5ad6db72adc94658 to your computer and use it in GitHub Desktop.
/*
PL0 Compiler
By: Stephen Bryant
Last update:
October 24, 2010
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_IDENTIFIER_LENGTH 11
#define MAX_NUMBER_LENGTH 5
#define NUM_RESERVED_WORDS 15
#define NUM_SPECIAL_SYMBOLS 14
#define MAX_TABLE_SIZE 256
#define MAX_LEXEME_SIZE 1000
#define MAX_STACK_HEIGHT 2000
#define MAX_LEXI_LEVELS 3
#define NUMBER_TOKEN_TYPES 36
#define MAX_CODE_LENGTH 500
typedef enum { //p-machine operation enumerations
LIT = 1, OPR = 2, LOD = 3, STO = 4, CAL = 5, INC = 6,
JMP = 7, JPC = 8, WRT = 9, SIO = 10} operation;
typedef enum { //tokens used in scanner
nul = 1,nulsym, identsym, numbersym, plussym, minussym,
multsym, slashsym, oddsym, eqsym, neqsym, lessym, leqsym,
gtrsym, geqsym, lparentsym, rparentsym, commasym, semicolonsym,
periodsym, becomessym, beginsym, endsym, ifsym, thensym,
whilesym, dosym, callsym, constsym, varsym, procsym, outsym,
insym , elsesym, colonsym, exclamsym } token_type;
//maps token names
char* token_names[NUMBER_TOKEN_TYPES] = {"","nulsym", "identsym", "numbersym", "plussym", "minussym", //1-6
"multsym", "slashsym", "oddsym", "eqsym", "neqsym", "lessym", "leqsym", //7-13
"gtrsym", "geqsym", "lparentsym", "rparentsym", "commasym", "semicolonsym", //14-19
"periodsym", "becomessym", "beginsym", "endsym", "ifsym", "thensym", //20-25
"whilesym", "dosym", "callsym", "constsym", "varsym", "procsym", "outsym", //26-32
"insym" , "elsesym", "colonsym", "exclamsym"}; //33-36
typedef struct //used in the scanner
{
token_type id; //id of token
char name[MAX_IDENTIFIER_LENGTH]; //name of token
int index; //index in token array
} token;
typedef struct //used in the p-machine
{
operation OP;
int L;
int M;
} instruction;
typedef struct //used in the parser
{
int kind; //const = 1, var = 2, proc = 3
char name[MAX_IDENTIFIER_LENGTH];
char val[MAX_NUMBER_LENGTH]; //number (ascii value)
int level; //L level
int addr; //M addr
} symbol;
/*
Scanner stuff
*/
//reserved word names
char* reservedWords[] = {"null","call","begin","end","if","then","else","while","do","in","out", "odd","const","procedure", "var"};
//internal representation of reserved words
token_type wsym[] = {nulsym,callsym, beginsym, endsym, ifsym, thensym, elsesym, whilesym, dosym, insym, outsym, oddsym, constsym,procsym,varsym};
//special symbol names
char specialSymbols[] = {'+','-','*','/','(',')','=',',','.','<','>',';',':','!'};
//internal representation of special symbols
token_type ssym[] = {plussym,minussym,multsym,slashsym,lparentsym,rparentsym,eqsym,commasym,periodsym,lessym,gtrsym,semicolonsym,colonsym,exclamsym};
token tokenList[MAX_TABLE_SIZE];
int crntTokenListIndex = 0; //current index in token list
int tokenListSize = 0;
/*
Parser Stuff
*/
symbol symbol_table[MAX_TABLE_SIZE];
symbol crntSymbol; //current symbol.
token_type crntToken; //current token
int crntLevel = 0; //current L level
int symbolTableSize = 0;
int tempMaddr = 0; //temp M used for calculating M addressess
/*
Codegen stuff
*/
int tempLvalue;
int mainLineNumber = 0; //the line number of our main method
int mainFlag = 0; //basically, when i set this to 1, it means we've already changed the main function's address and we don't need to do it anymore.
/*
P-machine stuff
*/
int stack[MAX_STACK_HEIGHT];
int SP = 0, BP = 1, PC = 0; //stack pointer, base pointer, program counter
instruction* IR; //instruction register
instruction codeBase[MAX_CODE_LENGTH]; //the code
int codeBaseSize; //current size of the code base
FILE* inputFile;
FILE* outputFile;
/*
basic function declarations
*/
void scan();
void parse();
token_type isReservedWord(char* tempIdentifier);
token_type isSpecialSymbol(char temp);
symbol findSymbol(char* name, int level);
void printSymbolTable();
void insertSymbol();
void addConstantNumber(int temp);
void gettok();
int accept(token_type s);
void error(int num);
void program();
void block();
void statement();
void condition();
void expression();
void term();
void factor();
void codeGen(operation opr, int M, int L);
void changeMvalue(int address, int M);
void printCodeBase();
void printCodeLine(int i);
void printTopOfStack();
void printStack(int tempBP,int tempSP);
void inputToStack();
void fetch();
void execute();
void pmachine();
/*
Program driving function
*/
int main()
{
int i;
printf("scanning:\n");
scan();
for (i = 0; i < tokenListSize; i++)
{
token x = tokenList[i];
printf("%d\n", x.id);
}
printf("parsing:\n");
//
parse();
printf("symbol table:\n");
//
for (i = 0; i < symbolTableSize; i++)
{
printf("name %s kind %d level %d addr %d val %s\n",symbol_table[i].name,symbol_table[i].kind,symbol_table[i].level,symbol_table[i].addr,symbol_table[i].val);
}
printf("printing code base:\n");
//
printCodeBase();
printf("Executing code in p machine!\n");
//
pmachine();
printf("success!\n");
//
}
/*
Populates tokenList with tokens from the program.
*/
void scan()
{
inputFile = fopen("programinput.txt","r");
outputFile = fopen("lexoutput.txt","w");
char temp;
int i; //counter to make sure identifiers and numbers are correct length
while ((temp = fgetc(inputFile)) != EOF) //Token reading loop
{
token t;
token_type type;
i = 0;
if (isalpha(temp))
{
char tempIdentifier[MAX_IDENTIFIER_LENGTH];
do //read characters into identifier
{
tempIdentifier[i] = temp;
i++;
temp = fgetc(inputFile);
} while (isalnum(temp) && temp != EOF);
if (i > MAX_IDENTIFIER_LENGTH) //if the identifier becomes too long
{
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
error(27);
}
else //we've reached something that isn't alphanumeric
{
ungetc(temp,inputFile);
tempIdentifier[i] = 0; //add null
type = isReservedWord(tempIdentifier);
if (strcmp(tempIdentifier,"null") == 0)
{
t.id = nulsym;
}
else if (type == nulsym) //it's an identifier
{
t.id = identsym;
strcpy(t.name,tempIdentifier);
}
else
{
t.id = type; //it's a reserved word
strcpy(t.name,tempIdentifier);
}
}
}
else if (isdigit(temp))
{
char value[MAX_NUMBER_LENGTH+1]; //temporary identifier
do
{
value[i] = temp;
i++;
temp = fgetc(inputFile);
}while (isdigit(temp) && temp != EOF);
if (i > MAX_NUMBER_LENGTH)
{
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
error(25); //so i made this ad-hoc fix.
}
else
{
ungetc(temp,inputFile);
value[i] = 0; //add null char
t.id = numbersym;
strcpy(t.name,value);
}
}
else if ( (type = isSpecialSymbol(temp)) != nulsym) //assign type to the token_type of temp, make sure it's a token
{
switch (type) //int ssym[] = {plussym,minussym,multsym,slashsym,lparentsym,rparentsym,identsym,commasym,periodsym,lessym,gtrsym,semicolonsym,colonsym};
{
case (slashsym):
{
temp = fgetc(inputFile);
if (isSpecialSymbol(temp) == multsym) //This is a comment
{
do
{
temp = fgetc(inputFile);
}while(isSpecialSymbol(temp) != multsym);
temp = fgetc(inputFile);
t.id = nulsym; //we disregard this token because it's a comment
if (isSpecialSymbol(temp) != slashsym)
{
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
error(28);
}
}
else //this is a slash symbol
{
strcpy(t.name,"/");
t.id = slashsym;
ungetc(temp,inputFile);
}
break;
}
case (colonsym):
{
temp = fgetc(inputFile);
if (isSpecialSymbol(temp) == eqsym) //if it is a becoming token.
{
t.id = becomessym;
strcpy(t.name,":=");
}
else
{
ungetc(temp,inputFile);
error(1);
}
break;
}
case (lessym):
{
temp = fgetc(inputFile);
if (isSpecialSymbol(temp) == eqsym) // <= token
{
t.id = leqsym;
strcpy(t.name,"<=");
}
else
{
t.id = lessym;
strcpy(t.name,"<");
ungetc(temp,inputFile);
}
break;
}
case (gtrsym):
{
temp = fgetc(inputFile);
if (isSpecialSymbol(temp) == eqsym) //>= token
{
t.id = geqsym;
strcpy(t.name,">=");
}
else // > token
{
t.id = gtrsym;
strcpy(t.name,">");
ungetc(temp,inputFile);
}
break;
}
case (exclamsym):
{
temp = fgetc(inputFile);
if (isSpecialSymbol(temp) == eqsym) //!= token
{
t.id = neqsym;
strcpy(t.name,"!=");
}
else // ! token
{
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
error(29);
ungetc(temp,inputFile);
}
break;
}
default:
{
int j;
t.id = type;
for (j = 0; j < NUM_SPECIAL_SYMBOLS; j++)
{
if (ssym[j] == type)
{
t.name[0] = specialSymbols[j]; //set the name equal to the
t.name[1] = 0; //nulsym
}
}
}
}
}
else if (temp == 32 || temp == 10 || temp == 13 || temp == 9) //it is a whitespace, ignore it.
{
t.id = nulsym; //whitespace
}
else if (temp != 32 && temp != 10 && temp != 13 && temp != 9) //if it's not a white space character
{
fprintf(outputFile,"invalid symbol.[%d]",temp);
}
if (t.id != nulsym) //if id is nulsym, it's a comment and therefore should not be added to a token list. Otherwise, add it to the list.
{
tokenList[crntTokenListIndex] = t;
t.index = crntTokenListIndex;
crntTokenListIndex++;
}
}
tokenListSize = crntTokenListIndex;
fclose(inputFile);
fclose(outputFile);
return;
}
/*
* Scanner helper function.
* returns nulsym if the identifier is not found, the identifier otherwise
**/
token_type isReservedWord(char* tempIdentifier)
{
int i;
for (i = 0; i < NUM_RESERVED_WORDS; i++) //check if its a reserved word
{
if (strcmp(tempIdentifier,reservedWords[i]) == 0)
{
return wsym[i]; //return token_type
}
}
return nulsym;
}
/*
* Scanner helper function
* returns nulsym if the character is not a special symbol, returns the token_type if it is a special symbol
**/
token_type isSpecialSymbol(char temp)
{
int i;
for (i = 0; i < NUM_SPECIAL_SYMBOLS; i++)
{
if (temp == specialSymbols[i])
{
return ssym[i];
}
}
return nulsym;
}
/*
Recursive Descent Parser functions
*/
/*
Parser helper function.
prints entire symbol table
*/
void printSymbolTable()
{
int i;
for (i = 0; i < symbolTableSize; i++)
{
symbol s = symbol_table[i];
if (s.kind == 1)
{
printf("constant %s, value %s\n", s.name, s.val);
}
else if (s.kind == 2)
{
printf("variable %s, L level %d, M addr %d\n", s.name, s.level, s.addr);
}
else if (s.kind == 3)
{
printf("procedure %s, L level %d, M addr %d\n", s.name, s.level, s.addr);
}
}
}
/*
* parser helper function.
* inserts crnt symbol into the table. Updates table if the symbol is already there.
*/
void insertSymbol()
{
int i;
for (i = 0; i < symbolTableSize; i++)
{
if (strcmp(symbol_table[i].name,crntSymbol.name) == 0 && symbol_table[i].level == crntSymbol.level) //if they have the same name AND level, error
{
error(34);
}
}
if (symbolTableSize < MAX_TABLE_SIZE) //the non-duplicate case.
{
symbol_table[symbolTableSize] = crntSymbol;
symbolTableSize++;
return;
}
else
{
fprintf(outputFile,"Error: max symbolTableSize exceeded.");
return;
}
}
/*
* returns the symbol from the symbol table with the specified name and level.
* if there is no symbol of that name at that level, we keep searching deeper until we find one.
*/
symbol findSymbol(char* name, int level)
{
int i, lvl;
for (lvl = level; lvl >= 0; lvl--)
{
for (i = 0; i < symbolTableSize; i++)
{
if (strcmp(symbol_table[i].name,name) == 0 && symbol_table[i].level == lvl)
{
return symbol_table[i];
}
}
}
}
/*
* counts the variables on the current level in the symbol table
*/
int numVariables(int crntLevel)
{
int i, counter = 0;
for (i = 0; i < symbolTableSize; i++)
{
if (symbol_table[i].level == crntLevel && symbol_table[i].kind == 2)
{
counter++;
}
}
return counter;
}
/*
* Parser helper function
* gets the next token from the input and sets it to the "crntToken" variable.
* Also gets name if its an identifier and gets the value if its a number and sets it to the current symbol.
*/
void gettok()
{
if (crntTokenListIndex < tokenListSize) //if we haven't gone through all of the tokens
{
crntToken = (token_type)tokenList[crntTokenListIndex].id;
if (crntToken == identsym) //this is an identifier, so we need to get the name
{
strcpy(crntSymbol.name,tokenList[crntTokenListIndex].name);
}
if (crntToken == numbersym) //Since this is a number token, we're going to update the current symbol with it
{
strcpy(crntSymbol.val,tokenList[crntTokenListIndex].name);
}
crntTokenListIndex++;
}
}
/*
* parser helper function
* returns 1 if the current symbol equals the symbol we're looking or
*/
int accept(token_type s) {
if (crntToken == s) {
return 1;
}
return 0;
}
/*
*
* error: pass in the error number and it prints out the error and quits parsing
*/
void error(int num)
{
printf("parsing failed! error number %d. See parseroutput.txt for details.\n",num);
switch (num)
{
case 1: {fprintf(outputFile, "syntax error: Use = instead of :=");break;} //implemented
case 2: {fprintf(outputFile, "syntax error: = must be followed by a number");break;} //implemented
case 3: {fprintf(outputFile, "syntax error: Identifier must be followed by =");break;} //implemented
case 4: {fprintf(outputFile, "syntax error: const, var, procedure must be followed by identifier");break;} //implemented
case 5: {fprintf(outputFile, "syntax error: semicolon or comma missing");break;} //implemented
case 6: {fprintf(outputFile, "syntax error: incorrect symbol after procedure declaration");break;} //KIND OF implemented?
case 7: {fprintf(outputFile, "syntax error: statement expected.");break;}
case 8: {fprintf(outputFile, "syntax error: incorrect symbol after statement part in block.");break;}
case 9: {fprintf(outputFile, "syntax error: period expected");break;} //IMPLEMENTED
case 10: {fprintf(outputFile, "syntax error: semicolon between statements missing");break;}
case 11: {fprintf(outputFile, "syntax error: undeclared identifier");break;}
case 12: {fprintf(outputFile, "syntax error: assignment to constant or procedure not allowed");break;}
case 13: {fprintf(outputFile, "syntax error: assignment operator expected");break;}
case 14: {fprintf(outputFile, "syntax error: call must be followed by an identifier");break;} //implemented
case 15: {fprintf(outputFile, "syntax error: call of a constant or variable is meaningless");break;}
case 16: {fprintf(outputFile, "syntax error: then expected");break;} //implemented
case 17: {fprintf(outputFile, "syntax error: semicolon or } expected");break;} //semicolon one implemented
case 18: {fprintf(outputFile, "syntax error: do expected");break;} //implemented
case 19: {fprintf(outputFile, "syntax error: incorrect symbol following statement");break;}
case 20: {fprintf(outputFile, "syntax error: relational operator expected");break;} //implemented
case 21: {fprintf(outputFile, "syntax error: expression must not contain a procedure identifier");break;} //implemented
case 22: {fprintf(outputFile, "syntax error: right parentheses missing.");break;} //implemented
case 23: {fprintf(outputFile, "syntax error: the preceding factor cannot begin with this symbol.");break;} //implemented
case 24: {fprintf(outputFile, "syntax error: an expression cannot begin with this symbol");break;} //implemented
case 25: {fprintf(outputFile, "syntax error: the number is too large.");break;} //implemented
case 27: {fprintf(outputFile,"syntax error: maximum identifier length exceeded.");break;} //implemented
case 28: {fprintf(outputFile,"syntax error: comment not terminated properly!");break;} //implemented
case 29: {fprintf(outputFile,"syntax error: '!' not followed by '='");break;} //implemented
case 30: {fprintf(outputFile,"syntax error: begin not followed by end");break;} //implemented
case 31: {fprintf(outputFile,"syntax error: if without then");break;} //implemented
case 32: {fprintf(outputFile,"syntax error: comma expected");break;} //implemented
case 33: {fprintf(outputFile,"syntax error: can not assign a value to a const");break;} //implemented
case 34: {fprintf(outputFile,"syntax error: multiple declarations of the same variable");break;} //implemented
}
//
exit(0);
}
/*
* this is the function we call that does the parser.
*/
void parse()
{
outputFile = fopen("parseroutput.txt","w");
crntTokenListIndex = 0;
program();
fprintf(outputFile,"No errors, program is syntactically correct.\n");
fclose(outputFile);
}
void program()
{
codeGen(JMP, 0, 0); //create a JMP variable, which we will later set to the begin statement of the main program.
gettok();
block();
if (!accept(periodsym))
{
error(9);
}
codeGen(OPR,0,0); //always add a RTN at the end
}
void block()
{
if (accept(constsym)) //insert a const into the table. No code need be generated - we just replace the name of references
{ //with the value of the const, which we find from the symbol table.
do{ //const loop
gettok();
crntSymbol.kind = 1; //since its a var, set its type to 2
crntSymbol.level = 0; //constants have no level
crntSymbol.addr = 0; //constants have no addr
if (!accept(identsym))
{
error(4);
}
gettok();
if (!accept(eqsym))
{
error(3);
}
gettok();
if (!accept(numbersym))
{
error(2);
}
insertSymbol(); //insert constant into symbol table
gettok();
}while (accept(commasym));
if (accept(identsym)) //identifier without a comma. comma expected.
{
error(32);
}
if (!accept(semicolonsym))
{
error(17);
}
gettok();
}
if (accept(varsym)) //insert a var into the table
{
do{
gettok();
crntSymbol.kind = 2; //since its a var, set its type to 2
crntSymbol.level = crntLevel; //this doesnt need to be changed
crntSymbol.addr = tempMaddr + 3; //adds 3 to the address since the first 3 addresses are links
tempMaddr++;
if (!accept(identsym))
{
error(4);
}
insertSymbol(); //insert var into symbol table
gettok();
}while(accept(commasym));
if (accept(identsym)) //identifier without a comma. comma expected.
{
error(32);
}
if (!accept(semicolonsym))
{
error(17);
}
gettok();
}
while(accept(procsym)) //insert a procedure into the table
{
crntLevel++;
crntSymbol.kind = 3; //since its a proc, set its type to 3
crntSymbol.level = crntLevel; //this doesnt need to be changed
crntSymbol.addr = codeBaseSize; //we set the address to codebase size, as a kind of hack. this way we can look it up when we use "call"
tempMaddr = 0; //currently 0, we may need to change it?
gettok();
if (!accept(identsym))
{
error(4);
}
insertSymbol(); //insert procedure into symbol table
gettok();
if (!accept(semicolonsym))
{
error(6);
}
gettok();
block();
codeGen(OPR,0,0); //always add a RTN at the end
if (!accept(semicolonsym))
{
error(6);
}
gettok();
crntLevel--;
}
//int tempAddr = codeBaseSize;
if (mainFlag == 0 && crntLevel == 0)
{
changeMvalue(0,codeBaseSize); //at this point, we've declared everything, so the next line has to be the main line.
printf("%d",codeBaseSize);
mainFlag = 1; //we know the JMP will always be at 0 on the codebase.
}
codeGen(INC, 0, 3 + numVariables(crntLevel)); //create an increment variable
//changeMvalue(tempAddr,3 + numVariables(crntLevel)); //increment size
statement();
}
void statement()
{
if (accept(identsym)) //PARSER: some variable becomes an expression
{
//CODEGEN: find the symbol of the current identifier.
//we pass in the crnt level and we try to get the variable on the current level,
//but if there isn't one there, we look at larger and larger scope
symbol s = findSymbol(tokenList[crntTokenListIndex - 1].name,crntLevel);
if (s.kind == 1) //assigning a value to a const!
{
error(33);
}
gettok();
if (!accept(becomessym))
{
error(1);
}
gettok();
expression();
codeGen(STO,crntLevel - s.level,s.addr);
//CODEGEN: store the result of the expression in the current token's address
//if the current token is say, a global variable (level 0), the L value of the
//generated code will be crnt level - 0.
//For example if crntlevel = 1, then the L value is 1 - 0, which is how many levels
//we must go down when storing.
}
else if (accept(callsym)) //call
{
gettok(); //identifier
if (!accept(identsym))
{
error(14);
}
symbol s = findSymbol(tokenList[crntTokenListIndex - 1].name, crntLevel + 1);
//we find a symbol at the level we're at + 1, because we can call functions at
//higher levels. This will break down if we use multiple nested functions so we'll
//have to do a findprocedure function which finds the procedure from the symbol table.
if (s.kind == 1 || s.kind == 2) //we must only call procedures, these two types are var and const
{
error(15);
}
codeGen(CAL, 0, s.addr); //call a certain function by jumping to it in the code
printf("%d",s.addr);
gettok();
}
else if (accept(beginsym)) //begins a code block
{
gettok();
statement();
while (accept(semicolonsym))
{ //get statements
gettok();
statement();
}
if (!accept(endsym))
{
error(30); //begin not followed by end
}
gettok();
}
else if (accept(ifsym)) //if statement
{
gettok();
condition();
if (!accept(thensym))
{
error(16);
}
gettok();
int tempAddress = codeBaseSize;
//CODEGEN: generate JPC code.
codeGen(JPC,0,0);
codeGen(INC,0,-1); //decrease the top of the stack.
statement();
changeMvalue(tempAddress,codeBaseSize); //change the jumpconditional to whatever the current address is.
}
else if (accept(whilesym)) //while statement
{
gettok();
int conditionAddress = codeBaseSize; //get the address of the condition
condition();
if (!accept(dosym))
{
error(18);
}
//CODEGEN: generate JPC code.
int tempAddress = codeBaseSize; //get the address of the conditional jump
codeGen(JPC,0,0);
codeGen(INC,0,-1); //decrease the top of the stack.
gettok();
statement();
codeGen(JMP,0,conditionAddress); //create a JMP to JMP back to the condition.
changeMvalue(tempAddress,codeBaseSize); //change the jumpconditional to whatever the current address is so we can check the condition again and loop
}
}
void condition()
{
if (accept(oddsym))
{
gettok();
expression();
//CODEGEN: odd operation
codeGen(OPR,0,6);
}
else
{
expression(); //this is the first operand
token_type tempToken = crntToken; //CODEGEN: save this so we can gen the code for it (this is the relation)
if (!accept(eqsym) && !accept(neqsym) && !accept(lessym) && !accept(leqsym) && !accept(gtrsym) && !accept(geqsym))
{
error(20); //not a relation
}
gettok();
expression(); //second operand
switch (tempToken) //CODEGEN: generate code for conditional
{
case (eqsym):
codeGen(OPR,0,8);
break;
case (neqsym):
codeGen(OPR,0,9);
break;
case (lessym):
codeGen(OPR,0,10);
break;
case (leqsym):
codeGen(OPR,0,11);
break;
case (gtrsym):
codeGen(OPR,0,12);
break;
case (geqsym):
codeGen(OPR,0,13);
break;
default: error(20); break;
}
}
}
void expression()
{
if (accept(plussym) || accept(minussym))
{
gettok();
if (accept(minussym))
{
codeGen(OPR,0,1); //CODEGEN: make the current top of the stack negative
}
}
if (accept(slashsym) || accept(multsym)) //expressions cannot begin with these
{
error(24);
}
term();
while (accept(plussym) || accept(minussym))
{
if (accept(plussym))
{
gettok();
if (accept(procsym))
{
error(21);
}
term();
codeGen(OPR,0,2); //CODEGEN: addition
}
else if (accept(minussym))
{
gettok();
if (accept(procsym))
{
error(21);
}
term();
codeGen(OPR,0,3); //CODEGEN: subtraction
}
}
}
void term()
{
factor();
while (accept(multsym) || accept(slashsym))
{
if (accept(multsym))
{
gettok();
factor();
codeGen(OPR,0,4); //CODEGEN: multiplication
}
else if (accept(slashsym))
{
gettok();
factor();
codeGen(OPR,0,5); //CODEGEN: division
}
}
}
//can be an identifier, a number, or an expression
void factor()
{
if (accept(identsym))
{
char* tempIdentName = tokenList[crntTokenListIndex-1].name;
symbol s = findSymbol(tempIdentName,crntLevel);
if (s.kind == 1) //CODEGEN: constant
{
codeGen(LIT,0,atoi(s.val));
}
else if (s.kind == 2) //CODEGEN: var
{
codeGen(LOD,crntLevel - s.level,s.addr); //CODEGEN: load the token given by the current identifier
}
gettok();
}
else if(accept(numbersym))
{
codeGen(LIT,0,atoi(tokenList[crntTokenListIndex - 1].name)); //CODEGEN: load the number associated with this token as a literal
//we have to decrement the index because getTok already incremented it.
gettok();
}
else if (accept(lparentsym)) //expression inside parentheses
{
gettok();
expression();
if (!accept(rparentsym))
{
error(22); //no rparen error
}
gettok();
}
else
{
error(23);
}
}
/**
generates assembly code for a specified operation, M, and L value. Adds it to instruction list.
*/
void codeGen(operation opr, int L, int M)
{
instruction temp;
temp.OP = opr;
temp.L = L;
temp.M = M;
codeBase[codeBaseSize] = temp; //the code
codeBaseSize++;
}
/*
change M value of code at address "address" to "M"
*/
void changeMvalue(int address, int M)
{
codeBase[address].M = M;
}
void printCodeBase()
{
outputFile = fopen("codegenoutput.txt","w");
int i;
for (i = 0; i < codeBaseSize; i++)
{
fprintf(outputFile,"%d %d %d\n",codeBase[i].OP,codeBase[i].L, codeBase[i].M);
printf("%d %d %d\n",codeBase[i].OP,codeBase[i].L, codeBase[i].M);
}
}
/*
P-Code translation machine
By: Stephen Bryant
Thursday, September 02, 2010.
*/
void pmachine(void)
{
//open files
inputFile = fopen("codegenoutput.txt","r");
outputFile = fopen("pmachineoutput.txt","w");
/*Init stack*/
stack[1] = 0;
stack[2] = 0;
stack[3] = 0;
//temp struct
instruction temp;
codeBaseSize = 0;
//print header
fprintf(outputFile,"%s\t%s\t%s\t%s\n","Line","OP","L","M");
while (fscanf(inputFile,"%d %d %d",((int)(codeBase[codeBaseSize].OP)),&(codeBase[codeBaseSize].L),&(codeBase[codeBaseSize].M)) != EOF) //read input.txt into codebase
{
printCodeLine(codeBaseSize); //print current line being read
fprintf(outputFile,"\n");
codeBaseSize++;
}
//header for execution
fprintf(outputFile,"\n\n\n");
fprintf(outputFile,"\t\t\t\t\tPC\tBP\tSP\tStack\n");
fprintf(outputFile,"Initial Values\t\t\t\t%d\t%d\t%d\t",PC,BP,SP);
printStack(BP,SP);
fprintf(outputFile,"\n");
//loop fetch/execute
int flag = 0;
do
{
printCodeLine(PC);
fetch();
//we're done executing after this loop
if (IR->OP == 2 && IR->M == 0 && (BP == 1))
{
flag = 1;
}
execute();
fprintf(outputFile,"\t%d\t%d\t%d\t",PC,BP,SP);
printStack(BP,SP);
fprintf(outputFile,"\n");
} while(flag == 0); //if this were true, it would mean the program is returning and there is nothing left on the stack
fclose(inputFile);
fclose(outputFile);
}
void printCodeLine(int i)
{
fprintf(outputFile,"%d\t", i); //print line number
switch (codeBase[i].OP)
{
case 1: //LIT: push literal "M" to top of the stack
fprintf(outputFile,"lit\t");
break;
case 2: //OPR: perform operation
fprintf(outputFile,"opr\t");
break;
case 3: //LOD: load item at offset M, L levels down to the top of the stack.
fprintf(outputFile,"lod\t");
break;
case 4: //STO: store item at offset M, L levels down from the top of the stack.
fprintf(outputFile,"sto\t");
break;
case 5: //CAL: create a new frame.
fprintf(outputFile,"cal\t");
break;
case 6: //INC: allocate M locals (first 3 are SL, DL, RA)
fprintf(outputFile,"inc\t");
break;
case 7: //JMP: jump to line M of the code base
fprintf(outputFile,"jmp\t");
break;
case 8: //JPC: jump to line M of the code base if the item on top of the stack is 0
fprintf(outputFile,"jpc\t");
break;
case 9: //SOI: print the top of the stack
fprintf(outputFile,"soi\t");
break;
case 10: //SIO: store input to the stack at point SP.
fprintf(outputFile,"sio\t");
break;
default:
break;
}
fprintf(outputFile,"%d\t%d\t",codeBase[i].L,codeBase[i].M);
}
//print entire populated stack.
void printStack(int tempBP, int tempSP)
{
int i;
if (tempBP == 0)
{
return;
}
else
{
printStack(stack[tempBP + 1],tempBP-1);
if (tempBP != 1)
{
fprintf(outputFile,"| ");
}
if (tempBP == tempSP + 1) //this would be if someone used "CAL" recently, the stack pointer would not have incremented and the new AR would need to show up.
{
for (i = tempBP; i < tempBP + 3; i++)
{
fprintf(outputFile,"%d ",stack[i]);
}
}
else //this is a normal AR without a recent "CAL"
{
for (i = tempBP; i <= tempSP; i++)
{
fprintf(outputFile,"%d ",stack[i]);
}
}
}
}
//fetch cycle, retrieves code from codebase to IR
void fetch()
{
IR = &codeBase[PC];
PC++;
}
//execute cycle, executes code in IR
void execute()
{
int op = IR->OP;
switch (op)
{
case 1: //LIT: push literal "M" to top of the stack
SP++;
stack[SP] = IR->M;
break;
case 2: //OPR: perform operation
switch(IR->M)
{
case 0: //RTN: return operation
SP = BP - 1; //top of the stack is the base pointer of the current frame - 1 (since the frame is deleted)
PC = stack[SP + 3]; //new program counter is the return address from the frame
BP = stack[SP + 2]; //new base pointer is the dynamic link from the current frame
break;
case 1: //NEG: negative of the top of the stack
stack[SP] = - stack[SP];
break;
case 2: //ADD: add top two fields, store result on the top of the stack
SP--;
stack[SP] = stack[SP] + stack[SP+1];
break;
case 3: //SUB: subtract top field from the second field, store result on top of the stack
SP--;
stack[SP] = stack[SP] - stack[SP+1];
break;
case 4: //MUL: multiply top two items, store result on the top of the stack
SP--;
stack[SP] = stack[SP] * stack[SP+1];
break;
case 5: //DIV: divide second field by top field, store result
SP--;
stack[SP] = stack[SP] / stack[SP+1];
break;
case 6: //ODD: returns true if the top field is odd
stack[SP] = stack[SP] % 2;
break;
case 7: //MOD: mod the field one level down by the field on top of the stack, store on top of the stack.
SP--;
stack[SP] = stack[SP] % stack[SP+1];
break;
case 8: //EQL: whether the top two fields are equal, adds a boolean value to the top of the stack.
SP--;
stack[SP] = (stack[SP] == stack[SP+1]);
break;
case 9: //NEQ: when the top two fields are not equal, adds a boolean value true to the top of the stack, or false.
SP--;
stack[SP] = !(stack[SP] == stack[SP+1]);
case 10: //LSS: adds to the top of the stack true if the first argument is less than the second argument.
SP--;
stack[SP] = stack[SP] < stack[SP+1];
break;
case 11: //LEQ: adds to the top of the stack true if the first argument is less than or equal to the second argument.
SP--;
stack[SP] = stack[SP] <= stack[SP+1];
break;
case 12: //GTR: adds to the top of the stack true if the first argument is greater than the second argument.
SP--;
stack[SP] = stack[SP] > stack[SP+1];
break;
case 13: //GEQ: adds to the top of the stack "true" if the first argument is greater than or equal to the second argument.
SP--;
stack[SP] = stack[SP] >= stack[SP+1];
break;
default:
exit(0); //error
break;
}
break;
case 3: //LOD: load item at offset M, L levels down to the top of the stack.
SP++;
stack[SP] = stack[base(IR->L,BP) + IR->M];
break;
case 4: //STO: store item at offset M, L levels down from the top of the stack.
stack[base(IR->L,BP) + IR->M] = stack[SP];
SP--;
break;
case 5: //CAL: create a new frame.
stack[SP + 1] = base(IR->L,BP); //static link
stack[SP + 2] = BP; //dynamic link: points to previous stack frame
stack[SP + 3] = PC; //Address in code of next code segment
BP = SP + 1; //base pointer to the bottom of the new frame
PC = IR->M;
break;
case 6: //INC: allocate M locals (first 3 are SL, DL, RA)
SP += IR->M;
break;
case 7: //JMP: jump to line M of the code base
PC = IR->M;
break;
case 8: //JPC: jump to line M of the code base if the item on top of the stack is 0
if (stack[SP] == 0)
{
PC = IR->M;
SP--;
}
break;
case 9: //SOI: print the top of the stack
fprintf(outputFile,"\nTop Of the stack:%d",stack[SP]);
SP--;
break;
case 10: //SIO: store input to the stack at point SP.
fscanf(inputFile,"%d ",&stack[SP]);
SP++;
break;
default:
break;
}
}
/**********************************************/
/* Find base L levels down */
/* */
/**********************************************/
int base(int l,int base) // l stand for L in the instruction format
{
int b1; //find base L levels down
b1 = base;
while (l > 0)
{
b1 = stack[b1]; //base pointer equals the current base pointer (which points down to the previous frame's base)
l--;
}
return b1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment