jonenzl/dictionary.c

## dictionary.c
/****************************************************************************
 * dictionary.c
 *
 * Computer Science 50
 * Problem Set 5
 *
 * Implements a dictionary's functionality.
 ***************************************************************************/

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#include "dictionary.h"

// Create our root node
node *root;

// Define a global variable to track the number of words in the dictionary
unsigned int word_count = 0;

// Define a global variable to track whether the dictionary has loaded properly
bool loaded = false;

/**
 * Returns true if word is in dictionary else false.
 */
bool check(const char* word)
{
    // Set current_node to root
    node *current_node = root;

    // For each character in the input word
    for (int i = 0; word[i] != '\0'; i++)
    {
        // Find the index of the current character
        int index = arrayIndex(word[i]);

        // If the character at the current node is NULL, the world is misspelled
        if (current_node->children[index] == NULL)
        {
            return false;
        }

        // If not NULL, move to the next character
        current_node = current_node->children[index];
    }

    // At the end of the word, check if is_word is true
    return current_node->is_word;
}

/**
 * Loads dictionary into memory.  Returns true if successful else false.
 */
bool load(const char* dictionary)
{
    // Read the dictionary
    FILE *fp = fopen(dictionary, "r");
    if (fp == NULL)
    {
        printf("Could not open %s.\n", dictionary);
        unload();
        return false;
    }

    // Create space for root
    root = malloc(sizeof(node));
    // Check to make sure the pointer to the node does not return NULL
    if (root == NULL)
    {
        unload();
        return false;
    }

    // Set current_node to root
    node *current_node = root;

    for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
    {
        // Find the index of the current character
        int index = arrayIndex(c);

        // Check whether it is a new line
        if(c != '\n')
        {
            // Check whether a node already exists for the character
            if(current_node->children[index] == NULL)
            {
                // Create a new node
                current_node->children[index] = malloc(sizeof(node));
                // Check to make sure the pointer to the node does not return NULL
                if (current_node->children[index] == NULL)
                {
                    unload();
                    return false;
                }
                // Move to the next node
                current_node = current_node->children[index];
            }
            else
            {
                // Move to the next node
                current_node = current_node->children[index];
            }
        }
        else
        {
            // Mark as a word
            current_node->is_word = true;

            // Increment the word count
            word_count++;

            // Reset the current_node to root to traverse the trie again
            current_node = root;
        }
    }
    //Close the dictionary
    fclose(fp);

    //The dictionary has load successfully
    loaded = true;

    return true;
}

/**
 * Returns number of words in dictionary if loaded else 0 if not yet loaded.
 */
unsigned int size(void)
{
    if (loaded)
    {
        return word_count;
    }
    else
    {
        return 0;
    }
}

/**
 * Unloads dictionary from memory. Returns true if successful else false.
 */
bool unload(void)
{
    freeNode(root);
    return true;
}

/**
 * Returns the index of the array for each character (ie. a = 0, b = 1...).
 */
int arrayIndex(const char c)
{
    // If the character is an apostrophe, return the index for the last element in the array
    if (c == '\'')
    {
        return 26;
    }
    else
    {
        /* Convert all characters to lower case. The character's index is the remainder of the character's ascii value
        after dividing by the ascii value of "a" (ie. g = 103 % 97 = 6) */
        return tolower(c) % 'a';
    }
}

/**
 * Check to see whether node children can be freed.
 */
void freeNode(node* current_node)
{
    // Iterate through the nodes children
    for(int i = 0; i < 27; i ++)
    {
        // If child is a pointer, iterate recursively through it as well
        if(current_node->children[i] != NULL)
        {
            freeNode(current_node->children[i]);
        }
    }

    // If all children are NULL, free the node
    free(current_node);
}

## dictionary.h
/**
 * Declares a dictionary's functionality.
 */

#ifndef DICTIONARY_H
#define DICTIONARY_H

#include <stdbool.h>

// maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45

// Define our node for the trie
typedef struct node
{
    // Boolean value to indicate whether this node is the end of a valid word
    bool is_word;
    // Array of 27 pointers to other nodes (26 letters and an apostrophe)
    struct node *children[27];
}
node;

/**
 * Returns true if word is in dictionary else false.
 */
bool check(const char *word);

/**
 * Loads dictionary into memory. Returns true if successful else false.
 */
bool load(const char *dictionary);

/**
 * Returns number of words in dictionary if loaded else 0 if not yet loaded.
 */
unsigned int size(void);

/**
 * Unloads dictionary from memory.  Returns true if successful else false.
 */
bool unload(void);

/**
 * Returns the index of the array for each character (ie. a = 0, b = 1...).
 */
int arrayIndex(const char c);

/**
 * Check to see whether node children can be freed.
 */
void freeNode(node *current_node);

#endif // DICTIONARY_H

## speller.c
/****************************************************************************
 * speller.c
 *
 * Computer Science 50
 * Problem Set 5
 *
 * Checks a text file against a dictionary for spelling mistakes.
 ***************************************************************************/

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"
#undef calculate
#undef getrusage

// default dictionary
#define DICTIONARY "dictionaries/large"

// prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: speller [dictionary] text\n");
        return 1;
    }

    // structs for timing data
    struct rusage before, after;

    // benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // determine dictionary to use
    char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // calculate time to load dictionary
    time_load = calculate(&before, &after);

    // try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *fp = fopen(text, "r");
    if (fp == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH+1];

    // spell-check each word in text
    for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
    {
        // allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // append character to word
            word[index] = c;
            index++;

            // ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // consume remainder of alphabetical string
                while ((c = fgetc(fp)) != EOF && isalpha(c));

                // prepare for new word
                index = 0;
            }
        }

        // ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // consume remainder of alphanumeric string
            while ((c = fgetc(fp)) != EOF && isalnum(c));

            // prepare for new word
            index = 0;
        }

        // we must have found a whole word
        else if (index > 0)
        {
            // terminate current word
            word[index] = '\0';

            // update counter
            words++;

            // check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // update benchmark
            time_check += calculate(&before, &after);

            // print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // prepare for next word
            index = 0;
        }
    }

    // check whether there was an error
    if (ferror(fp))
    {
        fclose(fp);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // close text
    fclose(fp);

    // determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n",
     time_load + time_check + time_size + time_unload);

    // that's all folks
    return 0;
}

/**
 * Returns number of seconds between b and a.
 */
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                 (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                 (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}
	/****************************************************************************
	* dictionary.c
	*
	* Computer Science 50
	* Problem Set 5
	*
	* Implements a dictionary's functionality.
	***************************************************************************/

	#include <ctype.h>
	#include <stdbool.h>
	#include <stdio.h>
	#include <stdlib.h>

	#include "dictionary.h"

	// Create our root node
	node *root;

	// Define a global variable to track the number of words in the dictionary
	unsigned int word_count = 0;

	// Define a global variable to track whether the dictionary has loaded properly
	bool loaded = false;

	/**
	* Returns true if word is in dictionary else false.
	*/
	bool check(const char* word)
	{
	// Set current_node to root
	node *current_node = root;

	// For each character in the input word
	for (int i = 0; word[i] != '\0'; i++)
	{
	// Find the index of the current character
	int index = arrayIndex(word[i]);

	// If the character at the current node is NULL, the world is misspelled
	if (current_node->children[index] == NULL)
	{
	return false;
	}

	// If not NULL, move to the next character
	current_node = current_node->children[index];
	}

	// At the end of the word, check if is_word is true
	return current_node->is_word;
	}

	/**
	* Loads dictionary into memory. Returns true if successful else false.
	*/
	bool load(const char* dictionary)
	{
	// Read the dictionary
	FILE *fp = fopen(dictionary, "r");
	if (fp == NULL)
	{
	printf("Could not open %s.\n", dictionary);
	unload();
	return false;
	}

	// Create space for root
	root = malloc(sizeof(node));
	// Check to make sure the pointer to the node does not return NULL
	if (root == NULL)
	{
	unload();
	return false;
	}

	// Set current_node to root
	node *current_node = root;

	for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
	{
	// Find the index of the current character
	int index = arrayIndex(c);

	// Check whether it is a new line
	if(c != '\n')
	{
	// Check whether a node already exists for the character
	if(current_node->children[index] == NULL)
	{
	// Create a new node
	current_node->children[index] = malloc(sizeof(node));
	// Check to make sure the pointer to the node does not return NULL
	if (current_node->children[index] == NULL)
	{
	unload();
	return false;
	}
	// Move to the next node
	current_node = current_node->children[index];
	}
	else
	{
	// Move to the next node
	current_node = current_node->children[index];
	}
	}
	else
	{
	// Mark as a word
	current_node->is_word = true;

	// Increment the word count
	word_count++;

	// Reset the current_node to root to traverse the trie again
	current_node = root;
	}
	}
	//Close the dictionary
	fclose(fp);

	//The dictionary has load successfully
	loaded = true;

	return true;
	}

	/**
	* Returns number of words in dictionary if loaded else 0 if not yet loaded.
	*/
	unsigned int size(void)
	{
	if (loaded)
	{
	return word_count;
	}
	else
	{
	return 0;
	}
	}

	/**
	* Unloads dictionary from memory. Returns true if successful else false.
	*/
	bool unload(void)
	{
	freeNode(root);
	return true;
	}

	/**
	* Returns the index of the array for each character (ie. a = 0, b = 1...).
	*/
	int arrayIndex(const char c)
	{
	// If the character is an apostrophe, return the index for the last element in the array
	if (c == '\'')
	{
	return 26;
	}
	else
	{
	/* Convert all characters to lower case. The character's index is the remainder of the character's ascii value
	after dividing by the ascii value of "a" (ie. g = 103 % 97 = 6) */
	return tolower(c) % 'a';
	}
	}

	/**
	* Check to see whether node children can be freed.
	*/
	void freeNode(node* current_node)
	{
	// Iterate through the nodes children
	for(int i = 0; i < 27; i ++)
	{
	// If child is a pointer, iterate recursively through it as well
	if(current_node->children[i] != NULL)
	{
	freeNode(current_node->children[i]);
	}
	}

	// If all children are NULL, free the node
	free(current_node);
	}
	/**
	* Declares a dictionary's functionality.
	*/

	#ifndef DICTIONARY_H
	#define DICTIONARY_H

	#include <stdbool.h>

	// maximum length for a word
	// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
	#define LENGTH 45

	// Define our node for the trie
	typedef struct node
	{
	// Boolean value to indicate whether this node is the end of a valid word
	bool is_word;
	// Array of 27 pointers to other nodes (26 letters and an apostrophe)
	struct node *children[27];
	}
	node;

	/**
	* Returns true if word is in dictionary else false.
	*/
	bool check(const char *word);

	/**
	* Loads dictionary into memory. Returns true if successful else false.
	*/
	bool load(const char *dictionary);

	/**
	* Returns number of words in dictionary if loaded else 0 if not yet loaded.
	*/
	unsigned int size(void);

	/**
	* Unloads dictionary from memory. Returns true if successful else false.
	*/
	bool unload(void);

	/**
	* Returns the index of the array for each character (ie. a = 0, b = 1...).
	*/
	int arrayIndex(const char c);

	/**
	* Check to see whether node children can be freed.
	*/
	void freeNode(node *current_node);

	#endif // DICTIONARY_H
	/****************************************************************************
	* speller.c
	*
	* Computer Science 50
	* Problem Set 5
	*
	* Checks a text file against a dictionary for spelling mistakes.
	***************************************************************************/

	#include <ctype.h>
	#include <stdio.h>
	#include <sys/resource.h>
	#include <sys/time.h>

	#include "dictionary.h"
	#undef calculate
	#undef getrusage

	// default dictionary
	#define DICTIONARY "dictionaries/large"

	// prototype
	double calculate(const struct rusage b, const struct rusage a);

	int main(int argc, char *argv[])
	{
	// check for correct number of args
	if (argc != 2 && argc != 3)
	{
	printf("Usage: speller [dictionary] text\n");
	return 1;
	}

	// structs for timing data
	struct rusage before, after;

	// benchmarks
	double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

	// determine dictionary to use
	char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;

	// load dictionary
	getrusage(RUSAGE_SELF, &before);
	bool loaded = load(dictionary);
	getrusage(RUSAGE_SELF, &after);

	// abort if dictionary not loaded
	if (!loaded)
	{
	printf("Could not load %s.\n", dictionary);
	return 1;
	}

	// calculate time to load dictionary
	time_load = calculate(&before, &after);

	// try to open text
	char *text = (argc == 3) ? argv[2] : argv[1];
	FILE *fp = fopen(text, "r");
	if (fp == NULL)
	{
	printf("Could not open %s.\n", text);
	unload();
	return 1;
	}

	// prepare to report misspellings
	printf("\nMISSPELLED WORDS\n\n");

	// prepare to spell-check
	int index = 0, misspellings = 0, words = 0;
	char word[LENGTH+1];

	// spell-check each word in text
	for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
	{
	// allow only alphabetical characters and apostrophes
	if (isalpha(c) \|\| (c == '\'' && index > 0))
	{
	// append character to word
	word[index] = c;
	index++;

	// ignore alphabetical strings too long to be words
	if (index > LENGTH)
	{
	// consume remainder of alphabetical string
	while ((c = fgetc(fp)) != EOF && isalpha(c));

	// prepare for new word
	index = 0;
	}
	}

	// ignore words with numbers (like MS Word can)
	else if (isdigit(c))
	{
	// consume remainder of alphanumeric string
	while ((c = fgetc(fp)) != EOF && isalnum(c));

	// prepare for new word
	index = 0;
	}

	// we must have found a whole word
	else if (index > 0)
	{
	// terminate current word
	word[index] = '\0';

	// update counter
	words++;

	// check word's spelling
	getrusage(RUSAGE_SELF, &before);
	bool misspelled = !check(word);
	getrusage(RUSAGE_SELF, &after);

	// update benchmark
	time_check += calculate(&before, &after);

	// print word if misspelled
	if (misspelled)
	{
	printf("%s\n", word);
	misspellings++;
	}

	// prepare for next word
	index = 0;
	}
	}

	// check whether there was an error
	if (ferror(fp))
	{
	fclose(fp);
	printf("Error reading %s.\n", text);
	unload();
	return 1;
	}

	// close text
	fclose(fp);

	// determine dictionary's size
	getrusage(RUSAGE_SELF, &before);
	unsigned int n = size();
	getrusage(RUSAGE_SELF, &after);

	// calculate time to determine dictionary's size
	time_size = calculate(&before, &after);

	// unload dictionary
	getrusage(RUSAGE_SELF, &before);
	bool unloaded = unload();
	getrusage(RUSAGE_SELF, &after);

	// abort if dictionary not unloaded
	if (!unloaded)
	{
	printf("Could not unload %s.\n", dictionary);
	return 1;
	}

	// calculate time to unload dictionary
	time_unload = calculate(&before, &after);

	// report benchmarks
	printf("\nWORDS MISSPELLED: %d\n", misspellings);
	printf("WORDS IN DICTIONARY: %d\n", n);
	printf("WORDS IN TEXT: %d\n", words);
	printf("TIME IN load: %.2f\n", time_load);
	printf("TIME IN check: %.2f\n", time_check);
	printf("TIME IN size: %.2f\n", time_size);
	printf("TIME IN unload: %.2f\n", time_unload);
	printf("TIME IN TOTAL: %.2f\n\n",
	time_load + time_check + time_size + time_unload);

	// that's all folks
	return 0;
	}

	/**
	* Returns number of seconds between b and a.
	*/
	double calculate(const struct rusage b, const struct rusage a)
	{
	if (b == NULL \|\| a == NULL)
	{
	return 0.0;
	}
	else
	{
	return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
	(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
	((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
	(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
	/ 1000000.0);
	}
	}