Skip to content

Instantly share code, notes, and snippets.

@od0x0
Created May 21, 2011 23:20
Show Gist options
  • Save od0x0/984994 to your computer and use it in GitHub Desktop.
Save od0x0/984994 to your computer and use it in GitHub Desktop.
Lexer (not sure if this is my broken one)
#pragma once
#include <stdbool.h>
#include <iso646.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef enum{
LX1LexerErrorNone,
LX1LexerErrorUnknown
}LX1LexerError;
typedef enum{
LX1LexerTokenTypeNone,
LX1LexerTokenTypeComma,
LX1LexerTokenTypeIdentifier,
LX1LexerTokenTypeOperator,
LX1LexerTokenTypeBar,
LX1LexerTokenTypeNumber,
LX1LexerTokenTypeString,
LX1LexerTokenTypeAssign,
LX1LexerTokenTypeOpeningParenthesis,
LX1LexerTokenTypeClosingParenthesis,
LX1LexerTokenTypeTerminal
}LX1LexerTokenType;
typedef struct LX1Lexer LX1Lexer;
struct LX1Lexer{
//LX1Lexer State
size_t currentLineNumber;
long currentCharacterOfLineNumber;
LX1LexerTokenType lastTokenType;
//Buffering text
char* buffer;
size_t bufferUsed, bufferAllocated;
//Error handling
LX1LexerError lastError;
//Input
char (*pump)(LX1Lexer* self);
char (*peek)(LX1Lexer* self);
void* userdata;
//LX1Lexer configurations
};
char LX1LexerPump(LX1Lexer* self){
char current = 0;
if(self->pump) current = self->pump(self);
if (current == '\n') {
self->currentLineNumber++;
self->currentCharacterOfLineNumber=1;
}
else self->currentCharacterOfLineNumber++;
return current;
}
char LX1LexerPeek(LX1Lexer* self){
if(self->peek) return self->peek(self);
return 0;
}
void LX1LexerAddStringToBuffer(LX1Lexer* self, const char* string){
if(string==NULL) return;
if(string[0]==0) return;
size_t length = strlen(string);
if(self->bufferUsed + length + 1 > self->bufferAllocated){
self->bufferAllocated = self->bufferUsed + length + 15;
self->buffer = realloc(self->buffer, self->bufferAllocated);
}
memcpy(self->buffer + self->bufferUsed, string, length);
self->bufferUsed += length;
self->buffer[self->bufferUsed] = '\0';
}
void LX1LexerAddCharacterToBuffer(LX1Lexer* self, const char c){
if(c==0) return;
const char string[2] = {c, 0};
LX1LexerAddStringToBuffer(self, string);
}
void LX1LexerClearBuffer(LX1Lexer* self){
free(self->buffer);
self->buffer = NULL;
self->bufferUsed = 0;
self->bufferAllocated = 0;
}
static inline void LX1LexerSetBuffer(LX1Lexer* self, const char* string){
LX1LexerClearBuffer(self);
LX1LexerAddStringToBuffer(self, string);
}
static inline bool IsAlpha(char c){
if('A' <= c and c <= 'Z') return true;
if('a' <= c and c <= 'z') return true;
return false;
}
static inline bool IsDigit(char c){
if('0' <= c and c <= '9') return true;
return false;
}
static bool LX1LexerLexNumber(LX1Lexer* self, char c){
while (c) {
if(not IsDigit(c)) break;
LX1LexerAddCharacterToBuffer(self, c);
char next = LX1LexerPeek(self);
if(not (IsDigit(next) or next=='.')) return true;
c = LX1LexerPump(self);
}
return false;
}
static bool LX1LexerLexIdentifier(LX1Lexer* self, char c){
bool firstCharacter = true;
while (c) {
if(IsAlpha(c) or c == '_' or (firstCharacter and c == '#') or ((not firstCharacter) and IsDigit(c))){
LX1LexerAddCharacterToBuffer(self, c);
char next = LX1LexerPeek(self);
if(not (IsAlpha(next) or next == '_' or IsDigit(next))) return true;
}
else break;
firstCharacter=false;
c = LX1LexerPump(self);
}
return false;
}
static bool LX1LexerLexString(LX1Lexer* self, char c){
if(c not_eq '"') return false;
while (c = LX1LexerPump(self)) {
switch (c) {
case '"':
return true;
case '\\':
switch (LX1LexerPeek(self)) {
case 'n':
LX1LexerAddCharacterToBuffer(self, '\n');
LX1LexerPump(self);
break;
case 't':
LX1LexerAddCharacterToBuffer(self, '\t');
LX1LexerPump(self);
break;
case '\\':
LX1LexerAddCharacterToBuffer(self, '\\');
LX1LexerPump(self);
break;
case '"':
LX1LexerAddCharacterToBuffer(self, '"');
LX1LexerPump(self);
break;
}
break;
default:
LX1LexerAddCharacterToBuffer(self, c);
break;
}
}
return false;
}
static void LX1LexerEatSingleLineComment(LX1Lexer* self, char c){
while (c) {
if(LX1LexerPeek(self) == '\n') return;
c = LX1LexerPump(self);
}
}
LX1LexerError LX1LexerLex(LX1Lexer* self){
char c = 0;
LX1LexerTokenType tokenType = LX1LexerTokenTypeNone;
LX1LexerError error = LX1LexerErrorNone;
LX1LexerClearBuffer(self);
while(c = LX1LexerPump(self)){
//char s[]={'>','>',' ',c,'\0'};puts(s);
switch (c) {
case '\n':
case ';':
tokenType = LX1LexerTokenTypeTerminal;
goto end;
case '=':
if(LX1LexerPeek(self) == '='){
LX1LexerPump(self);
tokenType = LX1LexerTokenTypeOperator;
LX1LexerSetBuffer(self, "==");
}
else tokenType = LX1LexerTokenTypeAssign;
goto end;
case '<':
tokenType = LX1LexerTokenTypeOperator;
if(LX1LexerPeek(self)=='='){
LX1LexerPump(self);
LX1LexerSetBuffer(self, "<=");
}
else LX1LexerSetBuffer(self, "<");
goto end;
case '>':
tokenType = LX1LexerTokenTypeOperator;
if(LX1LexerPeek(self) == '='){
LX1LexerPump(self);
LX1LexerSetBuffer(self, "<=");
}
else LX1LexerSetBuffer(self, "<");
goto end;
break;
case ',':
tokenType = LX1LexerTokenTypeComma;
goto end;
case '|':
tokenType = LX1LexerTokenTypeBar;
goto end;
case '+':
tokenType = LX1LexerTokenTypeOperator;
LX1LexerSetBuffer(self, "+");
goto end;
case '-':
tokenType = LX1LexerTokenTypeOperator;
LX1LexerSetBuffer(self, "-");
goto end;
case '*':
tokenType = LX1LexerTokenTypeOperator;
LX1LexerSetBuffer(self, "*");
goto end;
case '/':
if(LX1LexerPeek(self) not_eq '/'){
//It's a division operator
tokenType = LX1LexerTokenTypeOperator;
LX1LexerSetBuffer(self, "/");
//goto end;
}
else LX1LexerEatSingleLineComment(self, c);
goto end;
case '(':
tokenType = LX1LexerTokenTypeOpeningParenthesis;
goto end;
case ')':
tokenType = LX1LexerTokenTypeClosingParenthesis;
goto end;
case '\"':
if(LX1LexerLexString(self, c)) tokenType=LX1LexerTokenTypeString;
else error=LX1LexerErrorUnknown;
goto end;
case ' ':
case '\t':
break;
default:
if(LX1LexerLexNumber(self, c)) tokenType = LX1LexerTokenTypeNumber;
else if(LX1LexerLexIdentifier(self, c)) tokenType = LX1LexerTokenTypeIdentifier;
else error=LX1LexerErrorUnknown;
if(tokenType or error) goto end;
break;
}
};
//This is where all the gotos go to.
end:
self->lastTokenType = tokenType;
self->lastError = error;
return error;
}
void LX1LexerInit(LX1Lexer* self){
memset(self, 0, sizeof(LX1Lexer));
self->currentLineNumber=1;
}
void LX1LexerDeinit(LX1Lexer* self){
LX1LexerClearBuffer(self);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment