decriptor/gist:c4236ee7b17d3806909308bddeec4941

## gistfile1.txt
/*
 * tokenizer.c
 *
 *  Created on: June 17, 2011
 *      Author: Stephen Shaw
 */

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "logger.h"
#include "queue.h"
#include "tokenizer.h"

/* This holds the line currently read
 * in from the source code
 */
static char line[255];
static Queue* tokens;
static FILE *_source;
static int line_count;

/*
 * Name: tokenizer_init
 * Description: Takes an open file and generates tokens
 * Input: An open file
 * Output: -1 error
 */
int tokenizer_init(const char *source) {
	_source = fopen(source, "r");
	if (_source == NULL)
		LogError("Can't open source file");
	if (tokens == NULL)
		tokens = queue_init();
	line_count = 0;
	return 1;
}

/*
 * Name: tokenizer_destroy
 * Description: Clean up when done
 * Input: none
 * Output: none
 */
void tokenizer_destroy() {
	fclose(_source);
}

/*
 * Name: create_tokens
 * Description: This will read a line in and then create tokens from that line
 * Input: none
 * Output: none
 */
int generate_tokens() {
	if (fgets(line, sizeof(line), _source) != NULL) {
		printf("%s", line);
		build_tokens();
	} else {
		create_token("eof", end_of_file);
		return 0;
	}
	return 1;
}

static void build_tokens() {
	LogEvent("Building Tokens");
	LogEvent(line);
	char *p = line;
	char tok_buf[255];
	int tok_buf_i = 0;

	while (*p && *p != '\n' && *p != '\r') {
		tok_buf_i = 0;
		if (*p == ' ' || *p == '\t') {
		} else if (*p == '/' && *(p + 1) == '/') {
			printf("Found Comment: %s\n", p);
			*p = '\n';
			break;
		} else if (*p == '+' || *p == '-') {
			tok_buf[tok_buf_i++] = *p;
			if (isdigit(*(p+1))) {
				p++;
				tok_buf[tok_buf_i++] = *p;
				while (isdigit(*(p+1))) {
					tok_buf[tok_buf_i++] = *p++;
				}
				tok_buf[tok_buf_i] = '\0';
				create_token(tok_buf, numeric_literal);
				//printf("Found Signed Numeric Literal: %s\n", tok_buf);
			} else {
				tok_buf[tok_buf_i] = '\0';
				create_token(tok_buf, math_exp);
				//printf("Found Mathematical Expression: %s\n", tok_buf);
			}
		} else if (isdigit(*p)) {
			/* Is it a number */
			tok_buf[tok_buf_i++] = *p++;
			while (isdigit(*p)) {
				tok_buf[tok_buf_i++] = *p++;
			}
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, numeric_literal);
			//printf("Found Numeric Literal: %s\n", tok_buf);
			--p;
		} else if (*p == '\'') {
			tok_buf[tok_buf_i++] = *p++;

			if (*p == '\\') {
				tok_buf[tok_buf_i++] = *p++;
			}
			tok_buf[tok_buf_i++] = *p++;
			tok_buf[tok_buf_i++] = *p;
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, character_literal);
			//printf("Found Character: %s\n", tok_buf);
		} else if (*p == '\"') {
			tok_buf[tok_buf_i++] = *p;
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, doublequote);
			//printf("Found Double Quote\n");
		} else if (is_punctuation(*p)) {
			tok_buf[tok_buf_i++] = *p;
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, punctuation);
			//printf("Found Punctuation: %s\n", tok_buf);
		} else if (is_special_char(*p)) {
			tok_buf[tok_buf_i++] = *p;
			char *next = p + 1;
			if ((*p == '&' && *next == '&') || (*p == '|' && *next == '|')
					|| (*p == '=' && *next == '=')
					|| (*p == '<' && *next == '<')
					|| (*p == '>' && *next == '>')) {
				tok_buf[tok_buf_i++] = *(++p);
			} else if (*p == '!' || *p == '<' || *p == '>') {
				if (*next == '=')
					tok_buf[tok_buf_i++] = *(++p);
			}
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, special);
			//printf("Found Special Character: %s\n", tok_buf);
		} else if (is_math_operator(*p)) {
			tok_buf[tok_buf_i++] = *p;
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, math_exp);
			//printf("Found Math Operator: %s\n", tok_buf);
		} else if (isalpha(*p)) {
			tok_buf[tok_buf_i++] = *p++;
			while (isalpha(*p) || isdigit(*p)) {
				tok_buf[tok_buf_i++] = *p++;
			}
			tok_buf[tok_buf_i] = '\0';
			create_alpha_token(tok_buf);
			//printf("Found alpha string: %s\n", tok_buf);
			p--;
		} else if (*p == ';') {
			tok_buf[tok_buf_i++] = *p;
			tok_buf[tok_buf_i] = '\0';
			create_token(tok_buf, end_of_rule);
			//printf("Found End Of Rule: %s\n", tok_buf);
		} else {
			printf("***Unknown token*** :%x:\n", *p);
			printf("***Line number: %d\n", line_count);
		}
		p++;
	}

	if (*p == '\n' || *p == '\r') {
		//printf("Found newline character\n");
		line_count++;
	}
}

/*
 * Name: get_token
 * Description: Gets the next token
 * Input: none
 * Output: char* to token
 */
Token *
token_get() {
	if (tokens->count == 0)
		if (generate_tokens() == 0)
			return NULL;

	if (tokens->count > 0)
		return (Token *) queue_dequeue(tokens);

	LogError("token_get: Something has gone badly wrong");
	return NULL;
}

const Token *
token_peek() {
	return (Token*) queue_peek(tokens);
}

void
token_free(const void *token)
{
	Token *t = (Token *)token;
	free(t->name);
	free(t);
}

void
token_free_all()
{
	queue_clear(tokens, token_free);
	queue_free(tokens);
}

int
is_punctuation(const char punct) {
	switch (punct) {
	case '.':
	case ',':
		return 1;
		break;
	default:
		break;
	}
	return 0;
}

int
is_special_char(const char sp) {
	switch (sp) {
	case '{':
	case '}':
	case '(':
	case ')':
	case '[':
	case ']':
	case '=':
	case '+':
	case '-':
	case '<':
	case '>':
	case '&':
	case '|':
	case '!':
		return 1;
		break;
	default:
		break;
	}
	return 0;
}

int
is_math_operator(const char op) {
	if (op == '+' || op == '-' || op == '/' || op == '*' || op == '%')
		return 1;
	return 0;
}

static void
create_token(const char *name, enum ttypes type) {
	LogNotice("Creating Token...");

	Token *t = (Token *) malloc(sizeof(Token));
	if (t == NULL)
		LogError("Creating Token failed.  Out of Memory");

	t->name = (char *) malloc(strlen(name) + 1);
	strcpy(t->name, name);
	t->type = type;
	t->line = line_count;
	queue_enqueue(tokens, t);
}

static void
create_alpha_token(const char * alpha) {
	if(strcmp(alpha, "public") || strcmp(alpha, "private")){
		create_token(alpha, modifier);
	} else if (strcmp(alpha, "int") || strcmp(alpha, "char") ||
			    strcmp(alpha, "bool")|| strcmp(alpha, "void")) {
		create_token(alpha, type);
	} else if (strcmp(alpha, "atoi") || strcmp(alpha, "bool") ||
				strcmp(alpha, "class")|| strcmp(alpha, "char") ||
				strcmp(alpha, "cin")|| strcmp(alpha, "cout") ||
				strcmp(alpha, "else")|| strcmp(alpha, "false") ||
				strcmp(alpha, "if")|| strcmp(alpha, "int") ||
				strcmp(alpha, "itoa")|| strcmp(alpha, "main") ||
				strcmp(alpha, "new")|| strcmp(alpha, "null") ||
				strcmp(alpha, "object")|| strcmp(alpha, "public") ||
				strcmp(alpha, "private")|| strcmp(alpha, "return") ||
				strcmp(alpha, "string")|| strcmp(alpha, "this") ||
				strcmp(alpha, "true")|| strcmp(alpha, "void") ||
				strcmp(alpha, "while")) {
			create_token(alpha, keyword);
	} else {
		create_token(alpha, identifier);
	}
}

static void
print_tokens(const void *token)
{
	Token *t = (Token *)token;
	printf("[TOKEN] Name: %s\n[TOKEN] Type: %d\n[TOKEN] Line: %d\n\n", t->name, t->type, t->line);
}

void
print_all_tokens()
{
	queue_print(tokens, print_tokens);
}
	/*
	* tokenizer.c
	*
	* Created on: June 17, 2011
	* Author: Stephen Shaw
	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <ctype.h>
	#include <string.h>

	#include "logger.h"
	#include "queue.h"
	#include "tokenizer.h"

	/* This holds the line currently read
	* in from the source code
	*/
	static char line[255];
	static Queue* tokens;
	static FILE *_source;
	static int line_count;

	/*
	* Name: tokenizer_init
	* Description: Takes an open file and generates tokens
	* Input: An open file
	* Output: -1 error
	*/
	int tokenizer_init(const char *source) {
	_source = fopen(source, "r");
	if (_source == NULL)
	LogError("Can't open source file");
	if (tokens == NULL)
	tokens = queue_init();
	line_count = 0;
	return 1;
	}

	/*
	* Name: tokenizer_destroy
	* Description: Clean up when done
	* Input: none
	* Output: none
	*/
	void tokenizer_destroy() {
	fclose(_source);
	}

	/*
	* Name: create_tokens
	* Description: This will read a line in and then create tokens from that line
	* Input: none
	* Output: none
	*/
	int generate_tokens() {
	if (fgets(line, sizeof(line), _source) != NULL) {
	printf("%s", line);
	build_tokens();
	} else {
	create_token("eof", end_of_file);
	return 0;
	}
	return 1;
	}

	static void build_tokens() {
	LogEvent("Building Tokens");
	LogEvent(line);
	char *p = line;
	char tok_buf[255];
	int tok_buf_i = 0;

	while (p && p != '\n' && *p != '\r') {
	tok_buf_i = 0;
	if (p == ' ' \|\| p == '\t') {
	} else if (p == '/' && (p + 1) == '/') {
	printf("Found Comment: %s\n", p);
	*p = '\n';
	break;
	} else if (p == '+' \|\| p == '-') {
	tok_buf[tok_buf_i++] = *p;
	if (isdigit(*(p+1))) {
	p++;
	tok_buf[tok_buf_i++] = *p;
	while (isdigit(*(p+1))) {
	tok_buf[tok_buf_i++] = *p++;
	}
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, numeric_literal);
	//printf("Found Signed Numeric Literal: %s\n", tok_buf);
	} else {
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, math_exp);
	//printf("Found Mathematical Expression: %s\n", tok_buf);
	}
	} else if (isdigit(*p)) {
	/* Is it a number */
	tok_buf[tok_buf_i++] = *p++;
	while (isdigit(*p)) {
	tok_buf[tok_buf_i++] = *p++;
	}
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, numeric_literal);
	//printf("Found Numeric Literal: %s\n", tok_buf);
	--p;
	} else if (*p == '\'') {
	tok_buf[tok_buf_i++] = *p++;

	if (*p == '\\') {
	tok_buf[tok_buf_i++] = *p++;
	}
	tok_buf[tok_buf_i++] = *p++;
	tok_buf[tok_buf_i++] = *p;
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, character_literal);
	//printf("Found Character: %s\n", tok_buf);
	} else if (*p == '\"') {
	tok_buf[tok_buf_i++] = *p;
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, doublequote);
	//printf("Found Double Quote\n");
	} else if (is_punctuation(*p)) {
	tok_buf[tok_buf_i++] = *p;
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, punctuation);
	//printf("Found Punctuation: %s\n", tok_buf);
	} else if (is_special_char(*p)) {
	tok_buf[tok_buf_i++] = *p;
	char *next = p + 1;
	if ((p == '&' && next == '&') \|\| (p == '\|' && next == '\|')
	\|\| (p == '=' && next == '=')
	\|\| (p == '<' && next == '<')
	\|\| (p == '>' && next == '>')) {
	tok_buf[tok_buf_i++] = *(++p);
	} else if (p == '!' \|\| p == '<' \|\| *p == '>') {
	if (*next == '=')
	tok_buf[tok_buf_i++] = *(++p);
	}
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, special);
	//printf("Found Special Character: %s\n", tok_buf);
	} else if (is_math_operator(*p)) {
	tok_buf[tok_buf_i++] = *p;
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, math_exp);
	//printf("Found Math Operator: %s\n", tok_buf);
	} else if (isalpha(*p)) {
	tok_buf[tok_buf_i++] = *p++;
	while (isalpha(p) \|\| isdigit(p)) {
	tok_buf[tok_buf_i++] = *p++;
	}
	tok_buf[tok_buf_i] = '\0';
	create_alpha_token(tok_buf);
	//printf("Found alpha string: %s\n", tok_buf);
	p--;
	} else if (*p == ';') {
	tok_buf[tok_buf_i++] = *p;
	tok_buf[tok_buf_i] = '\0';
	create_token(tok_buf, end_of_rule);
	//printf("Found End Of Rule: %s\n", tok_buf);
	} else {
	printf("*Unknown token* :%x:\n", *p);
	printf("***Line number: %d\n", line_count);
	}
	p++;
	}

	if (p == '\n' \|\| p == '\r') {
	//printf("Found newline character\n");
	line_count++;
	}
	}

	/*
	* Name: get_token
	* Description: Gets the next token
	* Input: none
	* Output: char* to token
	*/
	Token *
	token_get() {
	if (tokens->count == 0)
	if (generate_tokens() == 0)
	return NULL;

	if (tokens->count > 0)
	return (Token *) queue_dequeue(tokens);

	LogError("token_get: Something has gone badly wrong");
	return NULL;
	}

	const Token *
	token_peek() {
	return (Token*) queue_peek(tokens);
	}

	void
	token_free(const void *token)
	{
	Token t = (Token )token;
	free(t->name);
	free(t);
	}

	void
	token_free_all()
	{
	queue_clear(tokens, token_free);
	queue_free(tokens);
	}

	int
	is_punctuation(const char punct) {
	switch (punct) {
	case '.':
	case ',':
	return 1;
	break;
	default:
	break;
	}
	return 0;
	}

	int
	is_special_char(const char sp) {
	switch (sp) {
	case '{':
	case '}':
	case '(':
	case ')':
	case '[':
	case ']':
	case '=':
	case '+':
	case '-':
	case '<':
	case '>':
	case '&':
	case '\|':
	case '!':
	return 1;
	break;
	default:
	break;
	}
	return 0;
	}

	int
	is_math_operator(const char op) {
	if (op == '+' \|\| op == '-' \|\| op == '/' \|\| op == '*' \|\| op == '%')
	return 1;
	return 0;
	}

	static void
	create_token(const char *name, enum ttypes type) {
	LogNotice("Creating Token...");

	Token t = (Token ) malloc(sizeof(Token));
	if (t == NULL)
	LogError("Creating Token failed. Out of Memory");

	t->name = (char *) malloc(strlen(name) + 1);
	strcpy(t->name, name);
	t->type = type;
	t->line = line_count;
	queue_enqueue(tokens, t);
	}

	static void
	create_alpha_token(const char * alpha) {
	if(strcmp(alpha, "public") \|\| strcmp(alpha, "private")){
	create_token(alpha, modifier);
	} else if (strcmp(alpha, "int") \|\| strcmp(alpha, "char") \|\|
	strcmp(alpha, "bool")\|\| strcmp(alpha, "void")) {
	create_token(alpha, type);
	} else if (strcmp(alpha, "atoi") \|\| strcmp(alpha, "bool") \|\|
	strcmp(alpha, "class")\|\| strcmp(alpha, "char") \|\|
	strcmp(alpha, "cin")\|\| strcmp(alpha, "cout") \|\|
	strcmp(alpha, "else")\|\| strcmp(alpha, "false") \|\|
	strcmp(alpha, "if")\|\| strcmp(alpha, "int") \|\|
	strcmp(alpha, "itoa")\|\| strcmp(alpha, "main") \|\|
	strcmp(alpha, "new")\|\| strcmp(alpha, "null") \|\|
	strcmp(alpha, "object")\|\| strcmp(alpha, "public") \|\|
	strcmp(alpha, "private")\|\| strcmp(alpha, "return") \|\|
	strcmp(alpha, "string")\|\| strcmp(alpha, "this") \|\|
	strcmp(alpha, "true")\|\| strcmp(alpha, "void") \|\|
	strcmp(alpha, "while")) {
	create_token(alpha, keyword);
	} else {
	create_token(alpha, identifier);
	}
	}

	static void
	print_tokens(const void *token)
	{
	Token t = (Token )token;
	printf("[TOKEN] Name: %s\n[TOKEN] Type: %d\n[TOKEN] Line: %d\n\n", t->name, t->type, t->line);
	}

	void
	print_all_tokens()
	{
	queue_print(tokens, print_tokens);
	}