Skip to content

Instantly share code, notes, and snippets.

@reubenjohn
Last active April 6, 2017 17:16
Show Gist options
  • Save reubenjohn/295e4534d35103e785619c9d7195beb5 to your computer and use it in GitHub Desktop.
Save reubenjohn/295e4534d35103e785619c9d7195beb5 to your computer and use it in GitHub Desktop.
99 line lex analyser
#include <stdio.h>
#include <string.h>
#include <regex.h>
FILE *in,*out;
int nextChar(char buf[], int* cnt){
buf[*cnt+1] = '\0';
return fscanf(in,"%c",&buf[(*cnt)++]) > 0;
}
typedef struct{
const char *pattern;
const char* follow[100];
int toBeOutput;
}TokenPattern;
int compare(const char* pattern, const char* follow, char* s){
char patternFollow[120];
regex_t regex;
switch(follow[0]){
case '+':case '-':case '*':case '/':case '%':case '=':case '(':case ')':case '{':case '}':case '\n':
sprintf(patternFollow,"^%s\\%s$",pattern,follow);
//getchar();
//printf("%s",patternFollow);
break;
default:
sprintf(patternFollow,"^%s%s$",pattern,follow);
}
if (regcomp(&regex, patternFollow, REG_EXTENDED)) {
printf("Could not compile regex\n");
return 0;
}
if(!regexec(&regex,s,0,NULL,0)){
printf("Matched (%s) == (%s) with result: %d\n",patternFollow,s,1);
return 1;
}
return 0;
}
#define SPECIALS " ",";",",","+","-","*","/","%","=","(",")","{","}","<",">"
#define NUMS "0","1","2","3","4","5","6","7","8","9"
#define ALPHAS "q","w","e","r","t","y","u","i","o","p","a","s","d","f","g","h","j","k","l","z","x","c","v","b","n","m"
#define RELOPS "==","!=","<",">","<=",">="
const TokenPattern patterns[] = {
{"//(.)*\n|/\\*(.)*\\*/",{"","$"},0},
{"asdl",{"","$"},0},
{"#include",{"","$"},1},
{"<(.)+>",{"\n","$"},1},
{"(if|int|float|char|double|void|for|while|do)",{" ","(","\n","$"},1},
{"[a-zA-Z_][a-zA-Z_0-9]*",{SPECIALS,RELOPS,"$"},1},
{"[1-9][0-9]*",{SPECIALS,RELOPS,"$"},1},
{"(;|,|\\+|\\-|\\*|\\/|\\%|\\=|\\(|\\)|\\{|\\}|==|!=|<|>|<=|>=|\\+\\+|\\-\\-)",{"\n",")",";",ALPHAS,NUMS,"$"},1},
{"( |\t)",{"","$"},0},
{"\n",{"","$"},0},
//{"(.)+",{"\n","$"},0},
{"$",{}}
};
const char* tokenIds[] = {"mcom","com","include","header","key","id","num","sp","nl","*"};
typedef enum{MCOM, COM, INCLUDE, HEADER, KEY, ID, NUM, SP, NL}PatternType;
int tokenize(char buf[], int *cnt){
for(unsigned int i=0;patterns[i].pattern[0]!='$';i++) {
TokenPattern pattern = patterns[i];
for(unsigned int j=0;pattern.follow[j][0]!='$';j++) {
char patternBuf[100], followBuf[10];
strcpy(followBuf, &buf[*cnt - strlen(pattern.follow[j])]);
strcpy(patternBuf,buf);
patternBuf[*cnt - strlen(followBuf)] = '\0';
if(compare(pattern.pattern,pattern.follow[j],buf)) {
if(pattern.toBeOutput)
fprintf(out,"<%s>%s\t",patternBuf,tokenIds[i]);
strcpy(buf, followBuf);
*cnt = strlen(pattern.follow[j]);
return 1;
}
}
}
return 0;
}
int main(){
in = fopen("in.c","r");
out = fopen("out.c","w");
char buf[100];int cnt=0;
while(nextChar(buf,&cnt)){
printf("(%s,%d)\n",buf,cnt);
while(tokenize(buf,&cnt));
}
printf("---\n");
fclose(in);
fclose(out);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment