Skip to content

Instantly share code, notes, and snippets.

@decriptor
Created August 28, 2017 20:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save decriptor/c4236ee7b17d3806909308bddeec4941 to your computer and use it in GitHub Desktop.
Save decriptor/c4236ee7b17d3806909308bddeec4941 to your computer and use it in GitHub Desktop.
/*
* tokenizer.c
*
* Created on: June 17, 2011
* Author: Stephen Shaw
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "logger.h"
#include "queue.h"
#include "tokenizer.h"
/* This holds the line currently read
* in from the source code
*/
static char line[255];
static Queue* tokens;
static FILE *_source;
static int line_count;
/*
* Name: tokenizer_init
* Description: Takes an open file and generates tokens
* Input: An open file
* Output: -1 error
*/
int tokenizer_init(const char *source) {
_source = fopen(source, "r");
if (_source == NULL)
LogError("Can't open source file");
if (tokens == NULL)
tokens = queue_init();
line_count = 0;
return 1;
}
/*
* Name: tokenizer_destroy
* Description: Clean up when done
* Input: none
* Output: none
*/
void tokenizer_destroy() {
fclose(_source);
}
/*
* Name: create_tokens
* Description: This will read a line in and then create tokens from that line
* Input: none
* Output: none
*/
int generate_tokens() {
if (fgets(line, sizeof(line), _source) != NULL) {
printf("%s", line);
build_tokens();
} else {
create_token("eof", end_of_file);
return 0;
}
return 1;
}
static void build_tokens() {
LogEvent("Building Tokens");
LogEvent(line);
char *p = line;
char tok_buf[255];
int tok_buf_i = 0;
while (*p && *p != '\n' && *p != '\r') {
tok_buf_i = 0;
if (*p == ' ' || *p == '\t') {
} else if (*p == '/' && *(p + 1) == '/') {
printf("Found Comment: %s\n", p);
*p = '\n';
break;
} else if (*p == '+' || *p == '-') {
tok_buf[tok_buf_i++] = *p;
if (isdigit(*(p+1))) {
p++;
tok_buf[tok_buf_i++] = *p;
while (isdigit(*(p+1))) {
tok_buf[tok_buf_i++] = *p++;
}
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, numeric_literal);
//printf("Found Signed Numeric Literal: %s\n", tok_buf);
} else {
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, math_exp);
//printf("Found Mathematical Expression: %s\n", tok_buf);
}
} else if (isdigit(*p)) {
/* Is it a number */
tok_buf[tok_buf_i++] = *p++;
while (isdigit(*p)) {
tok_buf[tok_buf_i++] = *p++;
}
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, numeric_literal);
//printf("Found Numeric Literal: %s\n", tok_buf);
--p;
} else if (*p == '\'') {
tok_buf[tok_buf_i++] = *p++;
if (*p == '\\') {
tok_buf[tok_buf_i++] = *p++;
}
tok_buf[tok_buf_i++] = *p++;
tok_buf[tok_buf_i++] = *p;
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, character_literal);
//printf("Found Character: %s\n", tok_buf);
} else if (*p == '\"') {
tok_buf[tok_buf_i++] = *p;
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, doublequote);
//printf("Found Double Quote\n");
} else if (is_punctuation(*p)) {
tok_buf[tok_buf_i++] = *p;
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, punctuation);
//printf("Found Punctuation: %s\n", tok_buf);
} else if (is_special_char(*p)) {
tok_buf[tok_buf_i++] = *p;
char *next = p + 1;
if ((*p == '&' && *next == '&') || (*p == '|' && *next == '|')
|| (*p == '=' && *next == '=')
|| (*p == '<' && *next == '<')
|| (*p == '>' && *next == '>')) {
tok_buf[tok_buf_i++] = *(++p);
} else if (*p == '!' || *p == '<' || *p == '>') {
if (*next == '=')
tok_buf[tok_buf_i++] = *(++p);
}
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, special);
//printf("Found Special Character: %s\n", tok_buf);
} else if (is_math_operator(*p)) {
tok_buf[tok_buf_i++] = *p;
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, math_exp);
//printf("Found Math Operator: %s\n", tok_buf);
} else if (isalpha(*p)) {
tok_buf[tok_buf_i++] = *p++;
while (isalpha(*p) || isdigit(*p)) {
tok_buf[tok_buf_i++] = *p++;
}
tok_buf[tok_buf_i] = '\0';
create_alpha_token(tok_buf);
//printf("Found alpha string: %s\n", tok_buf);
p--;
} else if (*p == ';') {
tok_buf[tok_buf_i++] = *p;
tok_buf[tok_buf_i] = '\0';
create_token(tok_buf, end_of_rule);
//printf("Found End Of Rule: %s\n", tok_buf);
} else {
printf("***Unknown token*** :%x:\n", *p);
printf("***Line number: %d\n", line_count);
}
p++;
}
if (*p == '\n' || *p == '\r') {
//printf("Found newline character\n");
line_count++;
}
}
/*
* Name: get_token
* Description: Gets the next token
* Input: none
* Output: char* to token
*/
Token *
token_get() {
if (tokens->count == 0)
if (generate_tokens() == 0)
return NULL;
if (tokens->count > 0)
return (Token *) queue_dequeue(tokens);
LogError("token_get: Something has gone badly wrong");
return NULL;
}
const Token *
token_peek() {
return (Token*) queue_peek(tokens);
}
void
token_free(const void *token)
{
Token *t = (Token *)token;
free(t->name);
free(t);
}
void
token_free_all()
{
queue_clear(tokens, token_free);
queue_free(tokens);
}
int
is_punctuation(const char punct) {
switch (punct) {
case '.':
case ',':
return 1;
break;
default:
break;
}
return 0;
}
int
is_special_char(const char sp) {
switch (sp) {
case '{':
case '}':
case '(':
case ')':
case '[':
case ']':
case '=':
case '+':
case '-':
case '<':
case '>':
case '&':
case '|':
case '!':
return 1;
break;
default:
break;
}
return 0;
}
int
is_math_operator(const char op) {
if (op == '+' || op == '-' || op == '/' || op == '*' || op == '%')
return 1;
return 0;
}
static void
create_token(const char *name, enum ttypes type) {
LogNotice("Creating Token...");
Token *t = (Token *) malloc(sizeof(Token));
if (t == NULL)
LogError("Creating Token failed. Out of Memory");
t->name = (char *) malloc(strlen(name) + 1);
strcpy(t->name, name);
t->type = type;
t->line = line_count;
queue_enqueue(tokens, t);
}
static void
create_alpha_token(const char * alpha) {
if(strcmp(alpha, "public") || strcmp(alpha, "private")){
create_token(alpha, modifier);
} else if (strcmp(alpha, "int") || strcmp(alpha, "char") ||
strcmp(alpha, "bool")|| strcmp(alpha, "void")) {
create_token(alpha, type);
} else if (strcmp(alpha, "atoi") || strcmp(alpha, "bool") ||
strcmp(alpha, "class")|| strcmp(alpha, "char") ||
strcmp(alpha, "cin")|| strcmp(alpha, "cout") ||
strcmp(alpha, "else")|| strcmp(alpha, "false") ||
strcmp(alpha, "if")|| strcmp(alpha, "int") ||
strcmp(alpha, "itoa")|| strcmp(alpha, "main") ||
strcmp(alpha, "new")|| strcmp(alpha, "null") ||
strcmp(alpha, "object")|| strcmp(alpha, "public") ||
strcmp(alpha, "private")|| strcmp(alpha, "return") ||
strcmp(alpha, "string")|| strcmp(alpha, "this") ||
strcmp(alpha, "true")|| strcmp(alpha, "void") ||
strcmp(alpha, "while")) {
create_token(alpha, keyword);
} else {
create_token(alpha, identifier);
}
}
static void
print_tokens(const void *token)
{
Token *t = (Token *)token;
printf("[TOKEN] Name: %s\n[TOKEN] Type: %d\n[TOKEN] Line: %d\n\n", t->name, t->type, t->line);
}
void
print_all_tokens()
{
queue_print(tokens, print_tokens);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment