-
-
Save scharf/a9e7a1ab41faf0896a96 to your computer and use it in GitHub Desktop.
python parser written in lex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%{ | |
#include "ParserInterface.h" | |
#include <malloc.h> | |
#include <stdio.h> | |
#undef yywrap | |
#define yywrap() 1 | |
#define ADV gFrom=gPos; gPos+=yyleng; gTo=gPos-1 | |
#define TABSIZE 8 | |
#define STACKSIZE 20 | |
#define STATE_STACKSIZE 20 | |
static int gFrom=0; | |
static int gTo=0; | |
static int gPos=0; | |
static int gIndent; | |
static int gIgnoreIndent=0; | |
static void handle_string(char*str,int len,char c) | |
{ | |
int i; | |
for(i=0;i<len;i++) | |
str[i]=c; | |
printf("%s",str); | |
} | |
#define eClass 1 | |
#define eFunc 2 | |
typedef struct stack_struct { | |
int itemType; | |
int indent; | |
pi_data data; | |
int free_name; | |
} Stack; | |
Stack stack[STACKSIZE]; | |
static int stackp=0; | |
static pi_data* gData; | |
static void push(int itemType) | |
{ | |
stackp++; | |
if(stackp>=STACKSIZE) { | |
pi_error_msg("stack overflow"); | |
stackp--; | |
} | |
gData=&stack[stackp].data; | |
pi_reset(gData); | |
stack[stackp].indent=gIndent; | |
stack[stackp].itemType=itemType; | |
stack[stackp].free_name=0; | |
switch (itemType) { | |
case eClass: | |
gData->from=gFrom; | |
break; | |
case eFunc: | |
gData->from=gFrom; | |
break; | |
default: | |
pi_error_msg("???"); | |
break; | |
} | |
} | |
static int try_pop(int indent) | |
{ | |
if(stackp>0&&indent<=stack[stackp].indent) { | |
switch (stack[stackp].itemType) { | |
case eClass: | |
gData->to=gTo; | |
pi_end_class(gData); | |
break; | |
case eFunc: | |
/*end_func();*/ | |
break; | |
default: | |
pi_error_msg("???"); | |
break; | |
} | |
if (stack[stackp].free_name) { | |
if (gData->name) | |
free(gData->name); | |
} | |
stackp--; | |
gData=&stack[stackp].data; | |
return 1; | |
} | |
return 0; | |
} | |
static int calc_indent(char*str,int len) | |
{ | |
int i; | |
int indent=0; | |
for(i=1;i<len;i++) { | |
if (str[i]=='\t') | |
indent+=TABSIZE; | |
else if (str[i]==' ') | |
indent++; | |
else | |
break; | |
} | |
return indent; | |
} | |
static void reset_all() | |
{ | |
gPos=1; | |
gFrom=0; | |
gTo=0; | |
gIndent=0; | |
gIgnoreIndent=0; | |
stackp=0; | |
yy_init=1; | |
gData=&stack[stackp].data; | |
} | |
/****************************************************************************/ | |
pi_data data; | |
static void comment_found() | |
{ | |
pi_reset(&data); | |
data.from = gFrom; | |
data.to = gTo; | |
pi_comment(&data); | |
} | |
static int gStringFrom; | |
static void start_string() | |
{ | |
gStringFrom=gFrom; | |
} | |
static void end_string() | |
{ | |
pi_reset(&data); | |
data.from = gStringFrom; | |
data.to = gTo; | |
data.name="?"; | |
pi_macro(&data); | |
} | |
static void function_found(char*name) | |
{ | |
pi_reset(&data); | |
data.name = name; | |
data.from = gFrom; | |
data.to = gTo; | |
data.flags = PI_DECLARATION|PI_PUBLIC|PI_VIRTUAL; | |
data.type = "*"; | |
pi_function(&data); | |
} | |
static void class_name(char *name) | |
{ | |
pi_reset(&data); | |
data.name = name; | |
data.from = gFrom; | |
data.to = gTo; | |
data.flags = PI_CLASS; | |
pi_start_class(&data); | |
} | |
static int pos_of_name(char* name,int len) | |
{ | |
int i; | |
for(i=len-1;i>=0;i--) | |
switch(name[i]) { | |
case '.': | |
case ' ': | |
case '\t': | |
case '\n': | |
return i+1; | |
default: | |
break; | |
} | |
return 0; | |
} | |
static void add_base_class(char *name,int len) | |
{ | |
pi_reset(&data); | |
data.name = &name[pos_of_name(name,len)]; | |
data.from = gFrom; | |
data.to = gTo; | |
data.flags = PI_PUBLIC|PI_VIRTUAL; | |
pi_base(&data); | |
} | |
static void member_variable_found(char* name,int len) | |
{ | |
pi_reset(&data); | |
data.name = &name[pos_of_name(name,len)]; | |
data.from = gFrom; | |
data.to = gTo; | |
data.type = "*"; | |
/* this is a bad hack! */ | |
data.flags = PI_VIRTUAL|PI_PUBLIC; | |
pi_variable(&data); | |
} | |
static void keyword(char* name) | |
{ | |
pi_reset(&data); | |
data.name = name; | |
data.type = name; | |
data.from = gFrom; | |
data.to = gTo; | |
data.type = "*"; | |
/* this is a bad hack! */ | |
pi_typedef(&data); | |
} | |
static void variable_found(char* name,int len) | |
{ | |
pi_reset(&data); | |
data.name = &name[pos_of_name(name,len)]; | |
data.from = gFrom; | |
data.to = gTo; | |
data.type = "*"; | |
pi_variable(&data); | |
} | |
static void parameter_found(char* name) | |
{ | |
pi_reset(&data); | |
data.name = name; | |
data.from = gFrom; | |
data.to = gTo; | |
data.type = name; | |
pi_parameter(&data); | |
} | |
static void import_found(char* name) | |
{ | |
static char buffer[256]; | |
int i; | |
buffer[0]='<'; | |
for(i=0;i<240&&name[i];i++) | |
buffer[i+1]=name[i]; | |
buffer[++i]='.'; | |
buffer[++i]='p'; | |
buffer[++i]='y'; | |
buffer[++i]='>'; | |
buffer[++i]=0; | |
pi_reset(&data); | |
pi_include(&data); | |
} | |
int main() | |
{ | |
pi_job *job; | |
FILE *fp; | |
if(pi_init(0)) | |
exit(1); | |
fprintf(stderr,"START\n"); | |
while (job = pi_getjob()) | |
{ | |
fprintf(stderr,"File to parse: \"%s\"\n",job->source_file_name); | |
fp=fopen(job->source_file_name,"r"); | |
if(fp) { | |
yyin=fp; | |
reset_all(); | |
yylex(job); | |
fclose(fp); | |
} else { | |
pi_error_msg("Can't open file"); | |
} | |
pi_endjob(job); | |
} | |
exit(0); | |
} | |
%} | |
%option stack | |
%x SQUOTE DQUOTE DEF CLASS IMPORT | |
%s PARAM SUPER | |
ws ([\t ]|\\\n) | |
id [A-Za-z_][A-Za-z_0-9]* | |
dot ([\t ]|\\\n)*\.([\t ]|\\\n)* | |
ind \n[\t ]* | |
sq ''' | |
dq \"\"\" | |
%% | |
{dq} { ADV; yy_push_state(DQUOTE); start_string(); } | |
<DQUOTE>{dq} { ADV; yy_pop_state(); end_string();} | |
<DQUOTE>.|\n { ADV; } | |
{sq} { ADV; yy_push_state(SQUOTE); start_string(); } | |
<SQUOTE>{sq} { ADV; yy_pop_state(); end_string();} | |
<SQUOTE>.|\n { ADV; } | |
\"([^\n\\\"]|\\.|\\\n)*\" { ADV;start_string();end_string();} | |
'([^\n\\']|\\.|\\\n)*' { ADV;start_string();end_string();} | |
#.* { ADV; comment_found();} | |
def { ADV; BEGIN(DEF); keyword(yytext); push(eFunc); } | |
class { ADV; BEGIN(CLASS); keyword(yytext);push(eClass); } | |
import|from { ADV; BEGIN(IMPORT);keyword(yytext);} | |
<IMPORT>import { ADV; BEGIN(INITIAL);keyword(yytext);/* from . import :-) */} | |
<IMPORT>{id} { ADV; import_found(yytext);} | |
<IMPORT>{ws}+ { ADV;} | |
<IMPORT>.|\n { ADV; BEGIN(INITIAL);yyless(1);} | |
and | | |
del | | |
for | | |
in | | |
print | | |
break | | |
elif | | |
is | | |
raise | | |
else | | |
global | | |
not | | |
return | | |
continue | | |
except | | |
if | | |
or | | |
try | | |
finally | | |
pass | | |
while { ADV; keyword(yytext);} | |
<DEF>{ws}* { ADV; } | |
<DEF>{id} { ADV; function_found(yytext);} | |
<DEF>\( { ADV; BEGIN(PARAM); gIgnoreIndent=1; } | |
<DEF>.|\n { ADV; pi_error_msg("parse error");} | |
<PARAM>{id} { ADV; parameter_found(yytext);} | |
<PARAM>\){ws}*: { ADV; BEGIN(INITIAL); gIgnoreIndent=0;} | |
<CLASS>{ws}* { ADV; } | |
<CLASS>{id} { ADV; class_name(yytext);} | |
<CLASS>\( { ADV; BEGIN(SUPER);gIgnoreIndent=1;} | |
<CLASS>: { ADV; BEGIN(INITIAL);gIgnoreIndent=0;} | |
<CLASS>.|\n { ADV; pi_error_msg("parse error");} | |
<SUPER>{id}({dot}{id})* { ADV; add_base_class(yytext,yyleng);} | |
<SUPER>\){ws}*: { ADV; BEGIN(INITIAL);gIgnoreIndent=0; } | |
"self."{id}/{ws}*= { ADV; member_variable_found(yytext,yyleng);} | |
^{id}/{ws}*= { ADV; variable_found(yytext,yyleng);} | |
{id}+ { ADV; } | |
{ind}/\n { ADV; /* ignore empty lines */ } | |
{ind}/# { ADV; /* ignore comment only lines */ } | |
{ind} { ADV; | |
if(!gIgnoreIndent) { | |
gIndent=calc_indent(yytext,yyleng); | |
while(try_pop(gIndent)) | |
; | |
} | |
} | |
.|\n { ADV; } | |
<<EOF>> { yyterminate(); | |
gIndent=0; | |
while(try_pop(gIndent)) | |
; | |
} | |
%% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment