Skip to content

Instantly share code, notes, and snippets.

@jonenzl
Created June 21, 2017 04:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonenzl/9eeb9306f8f8416331ac6d6503921a18 to your computer and use it in GitHub Desktop.
Save jonenzl/9eeb9306f8f8416331ac6d6503921a18 to your computer and use it in GitHub Desktop.
My implementation of CS50x Pset5 - Speller
/****************************************************************************
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
***************************************************************************/
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "dictionary.h"
// Create our root node
node *root;
// Define a global variable to track the number of words in the dictionary
unsigned int word_count = 0;
// Define a global variable to track whether the dictionary has loaded properly
bool loaded = false;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
// Set current_node to root
node *current_node = root;
// For each character in the input word
for (int i = 0; word[i] != '\0'; i++)
{
// Find the index of the current character
int index = arrayIndex(word[i]);
// If the character at the current node is NULL, the world is misspelled
if (current_node->children[index] == NULL)
{
return false;
}
// If not NULL, move to the next character
current_node = current_node->children[index];
}
// At the end of the word, check if is_word is true
return current_node->is_word;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
// Read the dictionary
FILE *fp = fopen(dictionary, "r");
if (fp == NULL)
{
printf("Could not open %s.\n", dictionary);
unload();
return false;
}
// Create space for root
root = malloc(sizeof(node));
// Check to make sure the pointer to the node does not return NULL
if (root == NULL)
{
unload();
return false;
}
// Set current_node to root
node *current_node = root;
for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
{
// Find the index of the current character
int index = arrayIndex(c);
// Check whether it is a new line
if(c != '\n')
{
// Check whether a node already exists for the character
if(current_node->children[index] == NULL)
{
// Create a new node
current_node->children[index] = malloc(sizeof(node));
// Check to make sure the pointer to the node does not return NULL
if (current_node->children[index] == NULL)
{
unload();
return false;
}
// Move to the next node
current_node = current_node->children[index];
}
else
{
// Move to the next node
current_node = current_node->children[index];
}
}
else
{
// Mark as a word
current_node->is_word = true;
// Increment the word count
word_count++;
// Reset the current_node to root to traverse the trie again
current_node = root;
}
}
//Close the dictionary
fclose(fp);
//The dictionary has load successfully
loaded = true;
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
if (loaded)
{
return word_count;
}
else
{
return 0;
}
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
freeNode(root);
return true;
}
/**
* Returns the index of the array for each character (ie. a = 0, b = 1...).
*/
int arrayIndex(const char c)
{
// If the character is an apostrophe, return the index for the last element in the array
if (c == '\'')
{
return 26;
}
else
{
/* Convert all characters to lower case. The character's index is the remainder of the character's ascii value
after dividing by the ascii value of "a" (ie. g = 103 % 97 = 6) */
return tolower(c) % 'a';
}
}
/**
* Check to see whether node children can be freed.
*/
void freeNode(node* current_node)
{
// Iterate through the nodes children
for(int i = 0; i < 27; i ++)
{
// If child is a pointer, iterate recursively through it as well
if(current_node->children[i] != NULL)
{
freeNode(current_node->children[i]);
}
}
// If all children are NULL, free the node
free(current_node);
}
/**
* Declares a dictionary's functionality.
*/
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Define our node for the trie
typedef struct node
{
// Boolean value to indicate whether this node is the end of a valid word
bool is_word;
// Array of 27 pointers to other nodes (26 letters and an apostrophe)
struct node *children[27];
}
node;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word);
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary);
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void);
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void);
/**
* Returns the index of the array for each character (ie. a = 0, b = 1...).
*/
int arrayIndex(const char c);
/**
* Check to see whether node children can be freed.
*/
void freeNode(node *current_node);
#endif // DICTIONARY_H
/****************************************************************************
* speller.c
*
* Computer Science 50
* Problem Set 5
*
* Checks a text file against a dictionary for spelling mistakes.
***************************************************************************/
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
#undef calculate
#undef getrusage
// default dictionary
#define DICTIONARY "dictionaries/large"
// prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: speller [dictionary] text\n");
return 1;
}
// structs for timing data
struct rusage before, after;
// benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// determine dictionary to use
char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// abort if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// calculate time to load dictionary
time_load = calculate(&before, &after);
// try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *fp = fopen(text, "r");
if (fp == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH+1];
// spell-check each word in text
for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
{
// allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// append character to word
word[index] = c;
index++;
// ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// consume remainder of alphabetical string
while ((c = fgetc(fp)) != EOF && isalpha(c));
// prepare for new word
index = 0;
}
}
// ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// consume remainder of alphanumeric string
while ((c = fgetc(fp)) != EOF && isalnum(c));
// prepare for new word
index = 0;
}
// we must have found a whole word
else if (index > 0)
{
// terminate current word
word[index] = '\0';
// update counter
words++;
// check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// update benchmark
time_check += calculate(&before, &after);
// print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// prepare for next word
index = 0;
}
}
// check whether there was an error
if (ferror(fp))
{
fclose(fp);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// close text
fclose(fp);
// determine dictionary's size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// calculate time to unload dictionary
time_unload = calculate(&before, &after);
// report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// that's all folks
return 0;
}
/**
* Returns number of seconds between b and a.
*/
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment