Skip to content

Instantly share code, notes, and snippets.

@juntalis
Created August 26, 2012 19:01
Show Gist options
  • Save juntalis/3482631 to your computer and use it in GitHub Desktop.
Save juntalis/3482631 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <io.h>
#include "symbols.h"
typedef unsigned int uint;
typedef unsigned char uchar;
#define BSIZE 8192
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RET(i) {s->cur = cursor; return i;}
typedef struct Scanner {
int fd;
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uint line;
} Scanner;
uchar *fill(Scanner *s, uchar *cursor){
if(!s->eof) {
uint cnt = s->tok - s->bot;
if(cnt){
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
s->pos -= cnt;
s->lim -= cnt;
}
if((s->top - s->lim) < BSIZE){
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->pos = &buf[s->pos - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BSIZE];
free(s->bot);
s->bot = buf;
}
if((cnt = _read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
}
s->lim += cnt;
}
return cursor;
}
int scan(Scanner *s){
uchar *cursor = s->cur;
std:
s->tok = cursor;
/*!re2c
any = [\000-\377];
O = [0-7];
D = [0-9];
L = [a-zA-Z_];
H = [a-fA-F0-9];
E = [Ee] [+-]? D+;
FS = [fFlL];
IS = [uUlL]*;
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
*/
/*!re2c
"/*" { goto comment; }
"//" { goto line_comment; }
"#" { goto preprocessor; }
"auto" { RET(AUTO); }
"break" { RET(BREAK); }
"case" { RET(CASE); }
"char" { RET(CHAR); }
"const" { RET(CONST); }
"continue" { RET(CONTINUE); }
"default" { RET(DEFAULT); }
"do" { RET(DO); }
"double" { RET(DOUBLE); }
"else" { RET(ELSE); }
"enum" { RET(ENUM); }
"extern" { RET(EXTERN); }
"float" { RET(FLOAT); }
"for" { RET(FOR); }
"goto" { RET(GOTO); }
"if" { RET(IF); }
"int" { RET(INT); }
"long" { RET(LONG); }
"register" { RET(REGISTER); }
"return" { RET(RETURN); }
"short" { RET(SHORT); }
"signed" { RET(SIGNED); }
"sizeof" { RET(SIZEOF); }
"static" { RET(STATIC); }
"struct" { RET(STRUCT); }
"switch" { RET(SWITCH); }
"typedef" { RET(TYPEDEF); }
"union" { RET(UNION); }
"unsigned" { RET(UNSIGNED); }
"void" { RET(VOID); }
"volatile" { RET(VOLATILE); }
"while" { RET(WHILE); }
"__declspec" { RET(DECLSPEC_DECL); }
"__cdecl" { RET(DECLSPEC_CDECL); }
"__stdcall" { RET(DECLSPEC_STDCALL); }
"__fastcall" { RET(DECLSPEC_FASTCALL); }
"dllimport" { RET(DECLSPEC_DLLIMPORT); }
"dllexport" { RET(DECLSPEC_DLLEXPORT); }
"__VA_ARGS__" { RET(PRE_MACRO_VAARGS); }
"__STDC__" { RET(PRE_MACRO_STDC); }
"__DATE__" { RET(PRE_MACRO_DATE); }
"__FILE__" { RET(PRE_MACRO_FILE); }
"__LINE__" { RET(PRE_MACRO_LINE); }
"__TIMESTAMP__" { RET(PRE_MACRO_TIMESTAMP); }
"__COUNTER__" { RET(PRE_MACRO_COUNTER); }
"__cplusplus" { RET(PRE_MACRO_CPP); }
"__FUNCTION__" { RET(PRE_MACRO_FUNC); }
"__FUNCSIG__" { RET(PRE_MACRO_FUNCSIG); }
"__FUNCDNAME__" { RET(PRE_MACRO_FUNCDN); }
"_DEBUG" { RET(PRE_MACRO_DEBUG); }
"_NDEBUG" { RET(PRE_MACRO_NDEBUG); }
"_WIN32" { RET(PRE_MACRO_WIN32); }
"_WIN64" { RET(PRE_MACRO_WIN64); }
L (L|D)* { RET(ID); }
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
(['] (ESC|any\[\n\\'])* ['])
{ RET(ICON); }
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
{ RET(FCON); }
(["] (ESC|any\[\n\\"])* ["])
{ RET(SCON); }
"..." { RET(ELLIPSIS); }
">>=" { RET(RSHIFTEQ); }
"<<=" { RET(LSHIFTEQ); }
"+=" { RET(ADDEQ); }
"-=" { RET(SUBEQ); }
"*=" { RET(MULEQ); }
"/=" { RET(DIVEQ); }
"%=" { RET(MODEQ); }
"&=" { RET(ANDEQ); }
"^=" { RET(XOREQ); }
"|=" { RET(OREQ); }
">>" { RET(RSHIFT); }
"<<" { RET(LSHIFT); }
"++" { RET(INCR); }
"--" { RET(DECR); }
"->" { RET(DEREF); }
"&&" { RET(ANDAND); }
"||" { RET(OROR); }
"<=" { RET(LEQ); }
">=" { RET(GEQ); }
"==" { RET(EQL); }
"!=" { RET(NEQ); }
";" { RET(SEMICOLON_SYM); }
"{" { RET(LBRACE_SYM); }
"}" { RET(RBRACE_SYM); }
"," { RET(COMMA_SYM); }
":" { RET(COLON_SYM); }
"=" { RET(SETEQ_SYM); }
"(" { RET(LPARN_SYM); }
")" { RET(RPARN_SYM); }
"[" { RET(LBRACK_SYM); }
"]" { RET(RBRACK_SYM); }
"." { RET(DOT_SYM); }
"&" { RET(AND_SYM); }
"!" { RET(EXCLA_SYM); }
"~" { RET(TILDE_SYM); }
"-" { RET(MINUS_SYM); }
"+" { RET(PLUS_SYM); }
"*" { RET(MULTI_SYM); }
"/" { RET(DIVIDE_SYM); }
"%" { RET(PRCNT_SYM); }
"<" { RET(LT_SYM); }
">" { RET(RT_SYM); }
"^" { RET(PWROF_SYM); }
"|" { RET(OR_SYM); }
"?" { RET(QUMRK); }
[ \t\v\f]+ { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->pos = cursor; s->line++;
goto std;
}
any
{
printf("unexpected character: %c\n", *s->tok);
goto std;
}
*/
comment:
/*!re2c
"*/" { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->tok = s->pos = cursor; s->line++;
goto comment;
}
any { goto comment; }
*/
line_comment:
/*!re2c
"\n"
{
if(cursor == s->eof) RET(EOI);
s->tok = s->pos = cursor; s->line++;
goto std;
}
any { goto line_comment; }
*/
preprocessor:
/*!re2c
"/*" { goto comment; }
"//" { goto line_comment; }
"include" { RET(PRE_INC); }
"import" { RET(PRE_IMP); }
"using" { RET(PRE_USING); }
"line" { RET(PRE_LINE); }
"pragma" { RET(PRE_PRAGMA); }
"error" { RET(PRE_ERROR); }
"error" { RET(PRE_); }
"defined" { RET(PRE_ISDEF); }
"define" { RET(PRE_DEF); }
"ifdef" { RET(PRE_IFDEF); }
"ifndef" { RET(PRE_IFNDEF); }
"elif" { RET(PRE_ELIF); }
"if" { RET(PRE_IF); }
"else" { RET(PRE_ELSE); }
"endif" { RET(PRE_ENDIF); }
"##" { RET(PRE_PASTETOK); }
"#@" { RET(PRE_CHRTOK); }
"#" { RET(PRE_STRTOK); }
"alloc_text" { RET(PRE_PRAGMA_ALLOC_TEXT); }
"auto_inline" { RET(PRE_PRAGMA_AUTO_INLINE); }
"bss_seg" { RET(PRE_PRAGMA_BSS_SEG); }
"check_stack" { RET(PRE_PRAGMA_CHECK_STACK); }
"code_seg" { RET(PRE_PRAGMA_CODE_SEG); }
"comment" { RET(PRE_PRAGMA_COMMENT); }
"component" { RET(PRE_PRAGMA_COMPONENT); }
"conform1" { RET(PRE_PRAGMA_CONFORM1); }
"const_seg" { RET(PRE_PRAGMA_CONST_SEG); }
"data_seg" { RET(PRE_PRAGMA_DATA_SEG); }
"deprecated" { RET(PRE_PRAGMA_DEPRECATED); }
"fenv_access" { RET(PRE_PRAGMA_FENV_ACCESS); }
"float_control" { RET(PRE_PRAGMA_FLOAT_CONTROL); }
"fp_contract" { RET(PRE_PRAGMA_FP_CONTRACT); }
"function" { RET(PRE_PRAGMA_FUNCTION); }
"hdrstop" { RET(PRE_PRAGMA_HDRSTOP); }
"include_alias" { RET(PRE_PRAGMA_INCLUDE_ALIAS); }
"init_seg1" { RET(PRE_PRAGMA_INIT_SEG1); }
"inline_depth" { RET(PRE_PRAGMA_INLINE_DEPTH); }
"inline_recursion" { RET(PRE_PRAGMA_INLINE_RECURSION); }
"intrinsic" { RET(PRE_PRAGMA_INTRINSIC); }
"make_public" { RET(PRE_PRAGMA_MAKE_PUBLIC); }
"managed" { RET(PRE_PRAGMA_MANAGED); }
"message" { RET(PRE_PRAGMA_MESSAGE); }
"omp" { RET(PRE_PRAGMA_OMP); }
"once" { RET(PRE_PRAGMA_ONCE); }
"optimize" { RET(PRE_PRAGMA_OPTIMIZE); }
"pack" { RET(PRE_PRAGMA_PACK); }
"pointers_to_members1" { RET(PRE_PRAGMA_POINTERS_TO_MEMBERS1); }
"pop_macro" { RET(PRE_PRAGMA_POP_MACRO); }
"push_macro" { RET(PRE_PRAGMA_PUSH_MACRO); }
"region" { RET(PRE_PRAGMA_REGION); }
"endregion" { RET(PRE_PRAGMA_ENDREGION); }
"runtime_checks" { RET(PRE_PRAGMA_RUNTIME_CHECKS); }
"section" { RET(PRE_PRAGMA_SECTION); }
"setlocale" { RET(PRE_PRAGMA_SETLOCALE); }
"strict_gs_check" { RET(PRE_PRAGMA_STRICT_GS_CHECK); }
"unmanaged" { RET(PRE_PRAGMA_UNMANAGED); }
"vtordisp1" { RET(PRE_PRAGMA_VTORDISP1); }
"warning" { RET(PRE_PRAGMA_WARNING); }
"auto" { RET(AUTO); }
"break" { RET(BREAK); }
"case" { RET(CASE); }
"char" { RET(CHAR); }
"const" { RET(CONST); }
"continue" { RET(CONTINUE); }
"default" { RET(DEFAULT); }
"do" { RET(DO); }
"double" { RET(DOUBLE); }
"else" { RET(ELSE); }
"enum" { RET(ENUM); }
"extern" { RET(EXTERN); }
"float" { RET(FLOAT); }
"for" { RET(FOR); }
"goto" { RET(GOTO); }
"if" { RET(IF); }
"int" { RET(INT); }
"long" { RET(LONG); }
"register" { RET(REGISTER); }
"return" { RET(RETURN); }
"short" { RET(SHORT); }
"signed" { RET(SIGNED); }
"sizeof" { RET(SIZEOF); }
"static" { RET(STATIC); }
"struct" { RET(STRUCT); }
"switch" { RET(SWITCH); }
"typedef" { RET(TYPEDEF); }
"union" { RET(UNION); }
"unsigned" { RET(UNSIGNED); }
"void" { RET(VOID); }
"volatile" { RET(VOLATILE); }
"while" { RET(WHILE); }
"__declspec" { RET(DECLSPEC_DECL); }
"__cdecl" { RET(DECLSPEC_CDECL); }
"__stdcall" { RET(DECLSPEC_STDCALL); }
"__fastcall" { RET(DECLSPEC_FASTCALL); }
"dllimport" { RET(DECLSPEC_DLLIMPORT); }
"dllexport" { RET(DECLSPEC_DLLEXPORT); }
"__VA_ARGS__" { RET(PRE_MACRO_VAARGS); }
"__STDC__" { RET(PRE_MACRO_STDC); }
"__DATE__" { RET(PRE_MACRO_DATE); }
"__FILE__" { RET(PRE_MACRO_FILE); }
"__LINE__" { RET(PRE_MACRO_LINE); }
"__TIMESTAMP__" { RET(PRE_MACRO_TIMESTAMP); }
"__COUNTER__" { RET(PRE_MACRO_COUNTER); }
"__cplusplus" { RET(PRE_MACRO_CPP); }
"__FUNCTION__" { RET(PRE_MACRO_FUNC); }
"__FUNCSIG__" { RET(PRE_MACRO_FUNCSIG); }
"__FUNCDNAME__" { RET(PRE_MACRO_FUNCDN); }
"_DEBUG" { RET(PRE_MACRO_DEBUG); }
"_NDEBUG" { RET(PRE_MACRO_NDEBUG); }
"_WIN32" { RET(PRE_MACRO_WIN32); }
"_WIN64" { RET(PRE_MACRO_WIN64); }
L (L|D)* { RET(ID); }
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
(['] (ESC|any\[\n\\'])* ['])
{ RET(ICON); }
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
{ RET(FCON); }
(["] (ESC|any\[\n\\"])* ["])
{ RET(SCON); }
"..." { RET(ELLIPSIS); }
">>=" { RET(RSHIFTEQ); }
"<<=" { RET(LSHIFTEQ); }
"+=" { RET(ADDEQ); }
"-=" { RET(SUBEQ); }
"*=" { RET(MULEQ); }
"/=" { RET(DIVEQ); }
"%=" { RET(MODEQ); }
"&=" { RET(ANDEQ); }
"^=" { RET(XOREQ); }
"|=" { RET(OREQ); }
">>" { RET(RSHIFT); }
"<<" { RET(LSHIFT); }
"++" { RET(INCR); }
"--" { RET(DECR); }
"->" { RET(DEREF); }
"&&" { RET(ANDAND); }
"||" { RET(OROR); }
"<=" { RET(LEQ); }
">=" { RET(GEQ); }
"==" { RET(EQL); }
"!=" { RET(NEQ); }
";" { RET(SEMICOLON_SYM); }
"{" { RET(LBRACE_SYM); }
"}" { RET(RBRACE_SYM); }
"," { RET(COMMA_SYM); }
":" { RET(COLON_SYM); }
"=" { RET(SETEQ_SYM); }
"(" { RET(LPARN_SYM); }
")" { RET(RPARN_SYM); }
"[" { RET(LBRACK_SYM); }
"]" { RET(RBRACK_SYM); }
"." { RET(DOT_SYM); }
"&" { RET(AND_SYM); }
"!" { RET(EXCLA_SYM); }
"~" { RET(TILDE_SYM); }
"-" { RET(MINUS_SYM); }
"+" { RET(PLUS_SYM); }
"*" { RET(MULTI_SYM); }
"/" { RET(DIVIDE_SYM); }
"%" { RET(PRCNT_SYM); }
"<" { RET(LT_SYM); }
">" { RET(RT_SYM); }
"^" { RET(PWROF_SYM); }
"|" { RET(OR_SYM); }
"?" { RET(QUMRK); }
[ \t\v\f]+ { goto preprocessor; }
[\\] "\n"
{
if(cursor == s->eof) RET(EOI);
s->pos = cursor; s->line++;
goto preprocessor;
}
"\n"
{
if(cursor == s->eof) RET(EOI);
s->pos = cursor; s->line++;
goto std;
}
any { goto preprocessor; }
*/
}
main(){
Scanner in;
int t;
FILE* outf;
memset((char*) &in, 0, sizeof(in));
in.fd = 0;
in.line = 0;
outf = fopen("out.bc", "wb");
while((t = scan(&in)) != EOI) {
fputc(t, outf);
/*printf("%0u10 [%d]\t\"%.*s\"\n", in.line, t, in.cur - in.tok, in.tok);*/
}
fclose(outf);
_close(in.fd);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment