Created
August 28, 2017 20:15
-
-
Save decriptor/c4236ee7b17d3806909308bddeec4941 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* tokenizer.c | |
* | |
* Created on: June 17, 2011 | |
* Author: Stephen Shaw | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <ctype.h> | |
#include <string.h> | |
#include "logger.h" | |
#include "queue.h" | |
#include "tokenizer.h" | |
/* This holds the line currently read | |
* in from the source code | |
*/ | |
static char line[255]; | |
static Queue* tokens; | |
static FILE *_source; | |
static int line_count; | |
/* | |
* Name: tokenizer_init | |
* Description: Takes an open file and generates tokens | |
* Input: An open file | |
* Output: -1 error | |
*/ | |
int tokenizer_init(const char *source) { | |
_source = fopen(source, "r"); | |
if (_source == NULL) | |
LogError("Can't open source file"); | |
if (tokens == NULL) | |
tokens = queue_init(); | |
line_count = 0; | |
return 1; | |
} | |
/* | |
* Name: tokenizer_destroy | |
* Description: Clean up when done | |
* Input: none | |
* Output: none | |
*/ | |
void tokenizer_destroy() { | |
fclose(_source); | |
} | |
/* | |
* Name: create_tokens | |
* Description: This will read a line in and then create tokens from that line | |
* Input: none | |
* Output: none | |
*/ | |
int generate_tokens() { | |
if (fgets(line, sizeof(line), _source) != NULL) { | |
printf("%s", line); | |
build_tokens(); | |
} else { | |
create_token("eof", end_of_file); | |
return 0; | |
} | |
return 1; | |
} | |
static void build_tokens() { | |
LogEvent("Building Tokens"); | |
LogEvent(line); | |
char *p = line; | |
char tok_buf[255]; | |
int tok_buf_i = 0; | |
while (*p && *p != '\n' && *p != '\r') { | |
tok_buf_i = 0; | |
if (*p == ' ' || *p == '\t') { | |
} else if (*p == '/' && *(p + 1) == '/') { | |
printf("Found Comment: %s\n", p); | |
*p = '\n'; | |
break; | |
} else if (*p == '+' || *p == '-') { | |
tok_buf[tok_buf_i++] = *p; | |
if (isdigit(*(p+1))) { | |
p++; | |
tok_buf[tok_buf_i++] = *p; | |
while (isdigit(*(p+1))) { | |
tok_buf[tok_buf_i++] = *p++; | |
} | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, numeric_literal); | |
//printf("Found Signed Numeric Literal: %s\n", tok_buf); | |
} else { | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, math_exp); | |
//printf("Found Mathematical Expression: %s\n", tok_buf); | |
} | |
} else if (isdigit(*p)) { | |
/* Is it a number */ | |
tok_buf[tok_buf_i++] = *p++; | |
while (isdigit(*p)) { | |
tok_buf[tok_buf_i++] = *p++; | |
} | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, numeric_literal); | |
//printf("Found Numeric Literal: %s\n", tok_buf); | |
--p; | |
} else if (*p == '\'') { | |
tok_buf[tok_buf_i++] = *p++; | |
if (*p == '\\') { | |
tok_buf[tok_buf_i++] = *p++; | |
} | |
tok_buf[tok_buf_i++] = *p++; | |
tok_buf[tok_buf_i++] = *p; | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, character_literal); | |
//printf("Found Character: %s\n", tok_buf); | |
} else if (*p == '\"') { | |
tok_buf[tok_buf_i++] = *p; | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, doublequote); | |
//printf("Found Double Quote\n"); | |
} else if (is_punctuation(*p)) { | |
tok_buf[tok_buf_i++] = *p; | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, punctuation); | |
//printf("Found Punctuation: %s\n", tok_buf); | |
} else if (is_special_char(*p)) { | |
tok_buf[tok_buf_i++] = *p; | |
char *next = p + 1; | |
if ((*p == '&' && *next == '&') || (*p == '|' && *next == '|') | |
|| (*p == '=' && *next == '=') | |
|| (*p == '<' && *next == '<') | |
|| (*p == '>' && *next == '>')) { | |
tok_buf[tok_buf_i++] = *(++p); | |
} else if (*p == '!' || *p == '<' || *p == '>') { | |
if (*next == '=') | |
tok_buf[tok_buf_i++] = *(++p); | |
} | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, special); | |
//printf("Found Special Character: %s\n", tok_buf); | |
} else if (is_math_operator(*p)) { | |
tok_buf[tok_buf_i++] = *p; | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, math_exp); | |
//printf("Found Math Operator: %s\n", tok_buf); | |
} else if (isalpha(*p)) { | |
tok_buf[tok_buf_i++] = *p++; | |
while (isalpha(*p) || isdigit(*p)) { | |
tok_buf[tok_buf_i++] = *p++; | |
} | |
tok_buf[tok_buf_i] = '\0'; | |
create_alpha_token(tok_buf); | |
//printf("Found alpha string: %s\n", tok_buf); | |
p--; | |
} else if (*p == ';') { | |
tok_buf[tok_buf_i++] = *p; | |
tok_buf[tok_buf_i] = '\0'; | |
create_token(tok_buf, end_of_rule); | |
//printf("Found End Of Rule: %s\n", tok_buf); | |
} else { | |
printf("***Unknown token*** :%x:\n", *p); | |
printf("***Line number: %d\n", line_count); | |
} | |
p++; | |
} | |
if (*p == '\n' || *p == '\r') { | |
//printf("Found newline character\n"); | |
line_count++; | |
} | |
} | |
/* | |
* Name: get_token | |
* Description: Gets the next token | |
* Input: none | |
* Output: char* to token | |
*/ | |
Token * | |
token_get() { | |
if (tokens->count == 0) | |
if (generate_tokens() == 0) | |
return NULL; | |
if (tokens->count > 0) | |
return (Token *) queue_dequeue(tokens); | |
LogError("token_get: Something has gone badly wrong"); | |
return NULL; | |
} | |
const Token * | |
token_peek() { | |
return (Token*) queue_peek(tokens); | |
} | |
void | |
token_free(const void *token) | |
{ | |
Token *t = (Token *)token; | |
free(t->name); | |
free(t); | |
} | |
void | |
token_free_all() | |
{ | |
queue_clear(tokens, token_free); | |
queue_free(tokens); | |
} | |
int | |
is_punctuation(const char punct) { | |
switch (punct) { | |
case '.': | |
case ',': | |
return 1; | |
break; | |
default: | |
break; | |
} | |
return 0; | |
} | |
int | |
is_special_char(const char sp) { | |
switch (sp) { | |
case '{': | |
case '}': | |
case '(': | |
case ')': | |
case '[': | |
case ']': | |
case '=': | |
case '+': | |
case '-': | |
case '<': | |
case '>': | |
case '&': | |
case '|': | |
case '!': | |
return 1; | |
break; | |
default: | |
break; | |
} | |
return 0; | |
} | |
int | |
is_math_operator(const char op) { | |
if (op == '+' || op == '-' || op == '/' || op == '*' || op == '%') | |
return 1; | |
return 0; | |
} | |
static void | |
create_token(const char *name, enum ttypes type) { | |
LogNotice("Creating Token..."); | |
Token *t = (Token *) malloc(sizeof(Token)); | |
if (t == NULL) | |
LogError("Creating Token failed. Out of Memory"); | |
t->name = (char *) malloc(strlen(name) + 1); | |
strcpy(t->name, name); | |
t->type = type; | |
t->line = line_count; | |
queue_enqueue(tokens, t); | |
} | |
static void | |
create_alpha_token(const char * alpha) { | |
if(strcmp(alpha, "public") || strcmp(alpha, "private")){ | |
create_token(alpha, modifier); | |
} else if (strcmp(alpha, "int") || strcmp(alpha, "char") || | |
strcmp(alpha, "bool")|| strcmp(alpha, "void")) { | |
create_token(alpha, type); | |
} else if (strcmp(alpha, "atoi") || strcmp(alpha, "bool") || | |
strcmp(alpha, "class")|| strcmp(alpha, "char") || | |
strcmp(alpha, "cin")|| strcmp(alpha, "cout") || | |
strcmp(alpha, "else")|| strcmp(alpha, "false") || | |
strcmp(alpha, "if")|| strcmp(alpha, "int") || | |
strcmp(alpha, "itoa")|| strcmp(alpha, "main") || | |
strcmp(alpha, "new")|| strcmp(alpha, "null") || | |
strcmp(alpha, "object")|| strcmp(alpha, "public") || | |
strcmp(alpha, "private")|| strcmp(alpha, "return") || | |
strcmp(alpha, "string")|| strcmp(alpha, "this") || | |
strcmp(alpha, "true")|| strcmp(alpha, "void") || | |
strcmp(alpha, "while")) { | |
create_token(alpha, keyword); | |
} else { | |
create_token(alpha, identifier); | |
} | |
} | |
static void | |
print_tokens(const void *token) | |
{ | |
Token *t = (Token *)token; | |
printf("[TOKEN] Name: %s\n[TOKEN] Type: %d\n[TOKEN] Line: %d\n\n", t->name, t->type, t->line); | |
} | |
void | |
print_all_tokens() | |
{ | |
queue_print(tokens, print_tokens); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment