Skip to content

Instantly share code, notes, and snippets.

@scharf
Created June 16, 2015 08:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scharf/a9e7a1ab41faf0896a96 to your computer and use it in GitHub Desktop.
Save scharf/a9e7a1ab41faf0896a96 to your computer and use it in GitHub Desktop.
python parser written in lex
%{
#include "ParserInterface.h"
#include <malloc.h>
#include <stdio.h>
#undef yywrap
#define yywrap() 1
#define ADV gFrom=gPos; gPos+=yyleng; gTo=gPos-1
#define TABSIZE 8
#define STACKSIZE 20
#define STATE_STACKSIZE 20
static int gFrom=0;
static int gTo=0;
static int gPos=0;
static int gIndent;
static int gIgnoreIndent=0;
static void handle_string(char*str,int len,char c)
{
int i;
for(i=0;i<len;i++)
str[i]=c;
printf("%s",str);
}
#define eClass 1
#define eFunc 2
typedef struct stack_struct {
int itemType;
int indent;
pi_data data;
int free_name;
} Stack;
Stack stack[STACKSIZE];
static int stackp=0;
static pi_data* gData;
static void push(int itemType)
{
stackp++;
if(stackp>=STACKSIZE) {
pi_error_msg("stack overflow");
stackp--;
}
gData=&stack[stackp].data;
pi_reset(gData);
stack[stackp].indent=gIndent;
stack[stackp].itemType=itemType;
stack[stackp].free_name=0;
switch (itemType) {
case eClass:
gData->from=gFrom;
break;
case eFunc:
gData->from=gFrom;
break;
default:
pi_error_msg("???");
break;
}
}
static int try_pop(int indent)
{
if(stackp>0&&indent<=stack[stackp].indent) {
switch (stack[stackp].itemType) {
case eClass:
gData->to=gTo;
pi_end_class(gData);
break;
case eFunc:
/*end_func();*/
break;
default:
pi_error_msg("???");
break;
}
if (stack[stackp].free_name) {
if (gData->name)
free(gData->name);
}
stackp--;
gData=&stack[stackp].data;
return 1;
}
return 0;
}
static int calc_indent(char*str,int len)
{
int i;
int indent=0;
for(i=1;i<len;i++) {
if (str[i]=='\t')
indent+=TABSIZE;
else if (str[i]==' ')
indent++;
else
break;
}
return indent;
}
static void reset_all()
{
gPos=1;
gFrom=0;
gTo=0;
gIndent=0;
gIgnoreIndent=0;
stackp=0;
yy_init=1;
gData=&stack[stackp].data;
}
/****************************************************************************/
pi_data data;
static void comment_found()
{
pi_reset(&data);
data.from = gFrom;
data.to = gTo;
pi_comment(&data);
}
static int gStringFrom;
static void start_string()
{
gStringFrom=gFrom;
}
static void end_string()
{
pi_reset(&data);
data.from = gStringFrom;
data.to = gTo;
data.name="?";
pi_macro(&data);
}
static void function_found(char*name)
{
pi_reset(&data);
data.name = name;
data.from = gFrom;
data.to = gTo;
data.flags = PI_DECLARATION|PI_PUBLIC|PI_VIRTUAL;
data.type = "*";
pi_function(&data);
}
static void class_name(char *name)
{
pi_reset(&data);
data.name = name;
data.from = gFrom;
data.to = gTo;
data.flags = PI_CLASS;
pi_start_class(&data);
}
static int pos_of_name(char* name,int len)
{
int i;
for(i=len-1;i>=0;i--)
switch(name[i]) {
case '.':
case ' ':
case '\t':
case '\n':
return i+1;
default:
break;
}
return 0;
}
static void add_base_class(char *name,int len)
{
pi_reset(&data);
data.name = &name[pos_of_name(name,len)];
data.from = gFrom;
data.to = gTo;
data.flags = PI_PUBLIC|PI_VIRTUAL;
pi_base(&data);
}
static void member_variable_found(char* name,int len)
{
pi_reset(&data);
data.name = &name[pos_of_name(name,len)];
data.from = gFrom;
data.to = gTo;
data.type = "*";
/* this is a bad hack! */
data.flags = PI_VIRTUAL|PI_PUBLIC;
pi_variable(&data);
}
static void keyword(char* name)
{
pi_reset(&data);
data.name = name;
data.type = name;
data.from = gFrom;
data.to = gTo;
data.type = "*";
/* this is a bad hack! */
pi_typedef(&data);
}
static void variable_found(char* name,int len)
{
pi_reset(&data);
data.name = &name[pos_of_name(name,len)];
data.from = gFrom;
data.to = gTo;
data.type = "*";
pi_variable(&data);
}
static void parameter_found(char* name)
{
pi_reset(&data);
data.name = name;
data.from = gFrom;
data.to = gTo;
data.type = name;
pi_parameter(&data);
}
static void import_found(char* name)
{
static char buffer[256];
int i;
buffer[0]='<';
for(i=0;i<240&&name[i];i++)
buffer[i+1]=name[i];
buffer[++i]='.';
buffer[++i]='p';
buffer[++i]='y';
buffer[++i]='>';
buffer[++i]=0;
pi_reset(&data);
pi_include(&data);
}
int main()
{
pi_job *job;
FILE *fp;
if(pi_init(0))
exit(1);
fprintf(stderr,"START\n");
while (job = pi_getjob())
{
fprintf(stderr,"File to parse: \"%s\"\n",job->source_file_name);
fp=fopen(job->source_file_name,"r");
if(fp) {
yyin=fp;
reset_all();
yylex(job);
fclose(fp);
} else {
pi_error_msg("Can't open file");
}
pi_endjob(job);
}
exit(0);
}
%}
%option stack
%x SQUOTE DQUOTE DEF CLASS IMPORT
%s PARAM SUPER
ws ([\t ]|\\\n)
id [A-Za-z_][A-Za-z_0-9]*
dot ([\t ]|\\\n)*\.([\t ]|\\\n)*
ind \n[\t ]*
sq '''
dq \"\"\"
%%
{dq} { ADV; yy_push_state(DQUOTE); start_string(); }
<DQUOTE>{dq} { ADV; yy_pop_state(); end_string();}
<DQUOTE>.|\n { ADV; }
{sq} { ADV; yy_push_state(SQUOTE); start_string(); }
<SQUOTE>{sq} { ADV; yy_pop_state(); end_string();}
<SQUOTE>.|\n { ADV; }
\"([^\n\\\"]|\\.|\\\n)*\" { ADV;start_string();end_string();}
'([^\n\\']|\\.|\\\n)*' { ADV;start_string();end_string();}
#.* { ADV; comment_found();}
def { ADV; BEGIN(DEF); keyword(yytext); push(eFunc); }
class { ADV; BEGIN(CLASS); keyword(yytext);push(eClass); }
import|from { ADV; BEGIN(IMPORT);keyword(yytext);}
<IMPORT>import { ADV; BEGIN(INITIAL);keyword(yytext);/* from . import :-) */}
<IMPORT>{id} { ADV; import_found(yytext);}
<IMPORT>{ws}+ { ADV;}
<IMPORT>.|\n { ADV; BEGIN(INITIAL);yyless(1);}
and |
del |
for |
in |
print |
break |
elif |
is |
raise |
else |
global |
not |
return |
continue |
except |
if |
or |
try |
finally |
pass |
while { ADV; keyword(yytext);}
<DEF>{ws}* { ADV; }
<DEF>{id} { ADV; function_found(yytext);}
<DEF>\( { ADV; BEGIN(PARAM); gIgnoreIndent=1; }
<DEF>.|\n { ADV; pi_error_msg("parse error");}
<PARAM>{id} { ADV; parameter_found(yytext);}
<PARAM>\){ws}*: { ADV; BEGIN(INITIAL); gIgnoreIndent=0;}
<CLASS>{ws}* { ADV; }
<CLASS>{id} { ADV; class_name(yytext);}
<CLASS>\( { ADV; BEGIN(SUPER);gIgnoreIndent=1;}
<CLASS>: { ADV; BEGIN(INITIAL);gIgnoreIndent=0;}
<CLASS>.|\n { ADV; pi_error_msg("parse error");}
<SUPER>{id}({dot}{id})* { ADV; add_base_class(yytext,yyleng);}
<SUPER>\){ws}*: { ADV; BEGIN(INITIAL);gIgnoreIndent=0; }
"self."{id}/{ws}*= { ADV; member_variable_found(yytext,yyleng);}
^{id}/{ws}*= { ADV; variable_found(yytext,yyleng);}
{id}+ { ADV; }
{ind}/\n { ADV; /* ignore empty lines */ }
{ind}/# { ADV; /* ignore comment only lines */ }
{ind} { ADV;
if(!gIgnoreIndent) {
gIndent=calc_indent(yytext,yyleng);
while(try_pop(gIndent))
;
}
}
.|\n { ADV; }
<<EOF>> { yyterminate();
gIndent=0;
while(try_pop(gIndent))
;
}
%%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment