Created
June 21, 2017 04:59
-
-
Save jonenzl/9eeb9306f8f8416331ac6d6503921a18 to your computer and use it in GitHub Desktop.
My implementation of CS50x Pset5 - Speller
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/**************************************************************************** | |
* dictionary.c | |
* | |
* Computer Science 50 | |
* Problem Set 5 | |
* | |
* Implements a dictionary's functionality. | |
***************************************************************************/ | |
#include <ctype.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include "dictionary.h" | |
// Create our root node | |
node *root; | |
// Define a global variable to track the number of words in the dictionary | |
unsigned int word_count = 0; | |
// Define a global variable to track whether the dictionary has loaded properly | |
bool loaded = false; | |
/** | |
* Returns true if word is in dictionary else false. | |
*/ | |
bool check(const char* word) | |
{ | |
// Set current_node to root | |
node *current_node = root; | |
// For each character in the input word | |
for (int i = 0; word[i] != '\0'; i++) | |
{ | |
// Find the index of the current character | |
int index = arrayIndex(word[i]); | |
// If the character at the current node is NULL, the world is misspelled | |
if (current_node->children[index] == NULL) | |
{ | |
return false; | |
} | |
// If not NULL, move to the next character | |
current_node = current_node->children[index]; | |
} | |
// At the end of the word, check if is_word is true | |
return current_node->is_word; | |
} | |
/** | |
* Loads dictionary into memory. Returns true if successful else false. | |
*/ | |
bool load(const char* dictionary) | |
{ | |
// Read the dictionary | |
FILE *fp = fopen(dictionary, "r"); | |
if (fp == NULL) | |
{ | |
printf("Could not open %s.\n", dictionary); | |
unload(); | |
return false; | |
} | |
// Create space for root | |
root = malloc(sizeof(node)); | |
// Check to make sure the pointer to the node does not return NULL | |
if (root == NULL) | |
{ | |
unload(); | |
return false; | |
} | |
// Set current_node to root | |
node *current_node = root; | |
for (int c = fgetc(fp); c != EOF; c = fgetc(fp)) | |
{ | |
// Find the index of the current character | |
int index = arrayIndex(c); | |
// Check whether it is a new line | |
if(c != '\n') | |
{ | |
// Check whether a node already exists for the character | |
if(current_node->children[index] == NULL) | |
{ | |
// Create a new node | |
current_node->children[index] = malloc(sizeof(node)); | |
// Check to make sure the pointer to the node does not return NULL | |
if (current_node->children[index] == NULL) | |
{ | |
unload(); | |
return false; | |
} | |
// Move to the next node | |
current_node = current_node->children[index]; | |
} | |
else | |
{ | |
// Move to the next node | |
current_node = current_node->children[index]; | |
} | |
} | |
else | |
{ | |
// Mark as a word | |
current_node->is_word = true; | |
// Increment the word count | |
word_count++; | |
// Reset the current_node to root to traverse the trie again | |
current_node = root; | |
} | |
} | |
//Close the dictionary | |
fclose(fp); | |
//The dictionary has load successfully | |
loaded = true; | |
return true; | |
} | |
/** | |
* Returns number of words in dictionary if loaded else 0 if not yet loaded. | |
*/ | |
unsigned int size(void) | |
{ | |
if (loaded) | |
{ | |
return word_count; | |
} | |
else | |
{ | |
return 0; | |
} | |
} | |
/** | |
* Unloads dictionary from memory. Returns true if successful else false. | |
*/ | |
bool unload(void) | |
{ | |
freeNode(root); | |
return true; | |
} | |
/** | |
* Returns the index of the array for each character (ie. a = 0, b = 1...). | |
*/ | |
int arrayIndex(const char c) | |
{ | |
// If the character is an apostrophe, return the index for the last element in the array | |
if (c == '\'') | |
{ | |
return 26; | |
} | |
else | |
{ | |
/* Convert all characters to lower case. The character's index is the remainder of the character's ascii value | |
after dividing by the ascii value of "a" (ie. g = 103 % 97 = 6) */ | |
return tolower(c) % 'a'; | |
} | |
} | |
/** | |
* Check to see whether node children can be freed. | |
*/ | |
void freeNode(node* current_node) | |
{ | |
// Iterate through the nodes children | |
for(int i = 0; i < 27; i ++) | |
{ | |
// If child is a pointer, iterate recursively through it as well | |
if(current_node->children[i] != NULL) | |
{ | |
freeNode(current_node->children[i]); | |
} | |
} | |
// If all children are NULL, free the node | |
free(current_node); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Declares a dictionary's functionality. | |
*/ | |
#ifndef DICTIONARY_H | |
#define DICTIONARY_H | |
#include <stdbool.h> | |
// maximum length for a word | |
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis) | |
#define LENGTH 45 | |
// Define our node for the trie | |
typedef struct node | |
{ | |
// Boolean value to indicate whether this node is the end of a valid word | |
bool is_word; | |
// Array of 27 pointers to other nodes (26 letters and an apostrophe) | |
struct node *children[27]; | |
} | |
node; | |
/** | |
* Returns true if word is in dictionary else false. | |
*/ | |
bool check(const char *word); | |
/** | |
* Loads dictionary into memory. Returns true if successful else false. | |
*/ | |
bool load(const char *dictionary); | |
/** | |
* Returns number of words in dictionary if loaded else 0 if not yet loaded. | |
*/ | |
unsigned int size(void); | |
/** | |
* Unloads dictionary from memory. Returns true if successful else false. | |
*/ | |
bool unload(void); | |
/** | |
* Returns the index of the array for each character (ie. a = 0, b = 1...). | |
*/ | |
int arrayIndex(const char c); | |
/** | |
* Check to see whether node children can be freed. | |
*/ | |
void freeNode(node *current_node); | |
#endif // DICTIONARY_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/**************************************************************************** | |
* speller.c | |
* | |
* Computer Science 50 | |
* Problem Set 5 | |
* | |
* Checks a text file against a dictionary for spelling mistakes. | |
***************************************************************************/ | |
#include <ctype.h> | |
#include <stdio.h> | |
#include <sys/resource.h> | |
#include <sys/time.h> | |
#include "dictionary.h" | |
#undef calculate | |
#undef getrusage | |
// default dictionary | |
#define DICTIONARY "dictionaries/large" | |
// prototype | |
double calculate(const struct rusage *b, const struct rusage *a); | |
int main(int argc, char *argv[]) | |
{ | |
// check for correct number of args | |
if (argc != 2 && argc != 3) | |
{ | |
printf("Usage: speller [dictionary] text\n"); | |
return 1; | |
} | |
// structs for timing data | |
struct rusage before, after; | |
// benchmarks | |
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0; | |
// determine dictionary to use | |
char* dictionary = (argc == 3) ? argv[1] : DICTIONARY; | |
// load dictionary | |
getrusage(RUSAGE_SELF, &before); | |
bool loaded = load(dictionary); | |
getrusage(RUSAGE_SELF, &after); | |
// abort if dictionary not loaded | |
if (!loaded) | |
{ | |
printf("Could not load %s.\n", dictionary); | |
return 1; | |
} | |
// calculate time to load dictionary | |
time_load = calculate(&before, &after); | |
// try to open text | |
char *text = (argc == 3) ? argv[2] : argv[1]; | |
FILE *fp = fopen(text, "r"); | |
if (fp == NULL) | |
{ | |
printf("Could not open %s.\n", text); | |
unload(); | |
return 1; | |
} | |
// prepare to report misspellings | |
printf("\nMISSPELLED WORDS\n\n"); | |
// prepare to spell-check | |
int index = 0, misspellings = 0, words = 0; | |
char word[LENGTH+1]; | |
// spell-check each word in text | |
for (int c = fgetc(fp); c != EOF; c = fgetc(fp)) | |
{ | |
// allow only alphabetical characters and apostrophes | |
if (isalpha(c) || (c == '\'' && index > 0)) | |
{ | |
// append character to word | |
word[index] = c; | |
index++; | |
// ignore alphabetical strings too long to be words | |
if (index > LENGTH) | |
{ | |
// consume remainder of alphabetical string | |
while ((c = fgetc(fp)) != EOF && isalpha(c)); | |
// prepare for new word | |
index = 0; | |
} | |
} | |
// ignore words with numbers (like MS Word can) | |
else if (isdigit(c)) | |
{ | |
// consume remainder of alphanumeric string | |
while ((c = fgetc(fp)) != EOF && isalnum(c)); | |
// prepare for new word | |
index = 0; | |
} | |
// we must have found a whole word | |
else if (index > 0) | |
{ | |
// terminate current word | |
word[index] = '\0'; | |
// update counter | |
words++; | |
// check word's spelling | |
getrusage(RUSAGE_SELF, &before); | |
bool misspelled = !check(word); | |
getrusage(RUSAGE_SELF, &after); | |
// update benchmark | |
time_check += calculate(&before, &after); | |
// print word if misspelled | |
if (misspelled) | |
{ | |
printf("%s\n", word); | |
misspellings++; | |
} | |
// prepare for next word | |
index = 0; | |
} | |
} | |
// check whether there was an error | |
if (ferror(fp)) | |
{ | |
fclose(fp); | |
printf("Error reading %s.\n", text); | |
unload(); | |
return 1; | |
} | |
// close text | |
fclose(fp); | |
// determine dictionary's size | |
getrusage(RUSAGE_SELF, &before); | |
unsigned int n = size(); | |
getrusage(RUSAGE_SELF, &after); | |
// calculate time to determine dictionary's size | |
time_size = calculate(&before, &after); | |
// unload dictionary | |
getrusage(RUSAGE_SELF, &before); | |
bool unloaded = unload(); | |
getrusage(RUSAGE_SELF, &after); | |
// abort if dictionary not unloaded | |
if (!unloaded) | |
{ | |
printf("Could not unload %s.\n", dictionary); | |
return 1; | |
} | |
// calculate time to unload dictionary | |
time_unload = calculate(&before, &after); | |
// report benchmarks | |
printf("\nWORDS MISSPELLED: %d\n", misspellings); | |
printf("WORDS IN DICTIONARY: %d\n", n); | |
printf("WORDS IN TEXT: %d\n", words); | |
printf("TIME IN load: %.2f\n", time_load); | |
printf("TIME IN check: %.2f\n", time_check); | |
printf("TIME IN size: %.2f\n", time_size); | |
printf("TIME IN unload: %.2f\n", time_unload); | |
printf("TIME IN TOTAL: %.2f\n\n", | |
time_load + time_check + time_size + time_unload); | |
// that's all folks | |
return 0; | |
} | |
/** | |
* Returns number of seconds between b and a. | |
*/ | |
double calculate(const struct rusage *b, const struct rusage *a) | |
{ | |
if (b == NULL || a == NULL) | |
{ | |
return 0.0; | |
} | |
else | |
{ | |
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) - | |
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) + | |
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) - | |
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec))) | |
/ 1000000.0); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment