Skip to content

Instantly share code, notes, and snippets.

@OxiBo
Created August 6, 2017 16:09
Show Gist options
  • Save OxiBo/91abfc3d9ff7c8fcfa281895111314db to your computer and use it in GitHub Desktop.
Save OxiBo/91abfc3d9ff7c8fcfa281895111314db to your computer and use it in GitHub Desktop.
CS50 pset5/speller_implemented with hash table
/**
* Implements a dictionary's functionality using hashtable
*/
#include <stdbool.h>
#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "dictionary.h"
//declare hashtable (array of pointers)
node *hashtable[SIZE]={NULL};
//declare dictionary words counter
unsigned int *ptr_counter;
unsigned int word_counter;
/**
*Returnes hash value.
* this hash function was found on the internet but there was no author mentioned
*/
unsigned int hash_func(const char* word)
{
unsigned int hash = 0;
for (int i=0; word[i]!= '\0'; i++)
{
hash = 31*hash + word[i];
}
return hash % SIZE;
}
/**
*Returns a new node
*/
struct node *getNewNode(const char* word)
{
struct node *newNode=(struct node*)malloc(sizeof(struct node));
if(newNode==NULL)
{
printf("Could not access memory\n");
unload();
return false;
}
newNode->next=NULL;
strcpy(newNode->word, word);
return newNode;
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
//declares a string to hold a word converted to lowercase for hashing
char tword[LENGTH+1];
int n=strlen(word);
for(int i=0; i<n+1; i++)
{
tword[i]=tolower(word[i]);
}
//creates a node pointer pointing to the first node in hashtable hash_func-th element
node *cursor=hashtable[hash_func(tword)];
//search for a word in a linked list linked to hashtable element
while(cursor!=NULL)
{
if(strcasecmp(cursor->word, word)==0)
{
return true;
}
else
{
cursor=cursor->next;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
// try to open dictionary to load
FILE *fpd=fopen(dictionary, "r");
if(fpd==NULL)
{
printf("Could not open %s\n.", dictionary);
return false;
}
//initialize word counter to 0
word_counter=0;
//declare char array (word) to be read from dictionary
char dword[47];
//declare variable for hash value
unsigned int hash=0;
//scan dictionary word by word until the EOF is reached
while(fscanf(fpd, "%s", dword)!=EOF)
{
hash=hash_func(dword);
if(hashtable[hash]==NULL)
{
hashtable[hash]=getNewNode(dword);
word_counter++;
}
else
{
node *newNode=getNewNode(dword);
newNode->next=hashtable[hash];
hashtable[hash]=newNode;
word_counter++;
}
}
//close dictionary
fclose(fpd);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
ptr_counter=&word_counter;
return word_counter;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i=0; i<SIZE; i++)
{
node *cursor=hashtable[i];
while(cursor!=NULL)
{
node *temp=cursor;
cursor=cursor->next;
free(temp);
}
}
return true;
}
/**
* Declares a dictionary's functionality.
*/
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
//maximum elements of hash table
#define SIZE 1000
//declare node struct
typedef struct node
{
char word[LENGTH+1];
struct node *next;
}
node;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word);
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary);
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void);
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void);
#endif // DICTIONARY_H
# compiler to use
CC = clang
# flags to pass compiler
CFLAGS = -ggdb3 -O0 -Qunused-arguments -std=c11 -Wall -Werror
# name for executable
EXE = speller
# space-separated list of header files
HDRS = dictionary.h
# space-separated list of libraries, if any,
# each of which should be prefixed with -l
LIBS =
# space-separated list of source files
SRCS = speller.c dictionary.c
# automatically generated list of object files
OBJS = $(SRCS:.c=.o)
# default target
$(EXE): $(OBJS) $(HDRS) Makefile
$(CC) $(CFLAGS) -o $@ $(OBJS) $(LIBS)
# dependencies
$(OBJS): $(HDRS) Makefile
# housekeeping
clean:
rm -f core $(EXE) *.o
0. What is pneumonoultramicroscopicsilicovolcanoconiosis?
The maximum length of a word is 45 characters, the longest word in English language. "Pneumonoultramicroscopicsilicovolcanoconiosis"
is the example of a long word in the program in dictionary.h.
1. According to its man page, what does getrusage do?
Getrusage finds(get) resource usage. Namely RUSAGE_SELF (which is used by slepper.c) returns resource usage statistics for the
calling process, which is the sum of resources used by all threads in the process. The resource usages are returned in the structure
pointed to by usage. In our program getrusage returns usages in structs rusage before, rusage after.
2. Per that same man page, how many members are in a variable of type struct rusage?
16 members.
3. Why do you think we pass before and after by reference (instead of by value) to calculate, even though we’re not changing their
contents?
In before and after we pass by reference "user CPU time used" which is the total amount of time spent executing in user mode,
expressed in struct rusage members: ru_utime.tv_sec and ru_utime.tv_usec) and "system CPU time used" which is the total amount
of time spent executing in kernel mode, expressed in struct rusage members: ru_stime.tv_sec and ru_stime.tv_usec). We don't change
values of before and after because we need to calculate the difference between after and before to find the total resouce usage
(time usage). Function calculate() always has access to the latest variable data which cannot be modified outside of the function.
4. Explain as precisely as possible, in a paragraph or more, how main goes about reading words from a file. In other words,
convince us that you indeed understand how that function’s for loop works.
For loop will be executed untill fgetc reaches EOF. Function fgetc reads a character from the given file (text). If the character
is alphabetical/apostroph the program appends such character/apostroph in an array called "word" untill fgetc reaches '\0' (end of
a word). When the whole word is found the program terminates current word appending '\0' to the array "word" and sets index of
the array to 0, which prerares for looking for a new word. Also the program updates word counter once the whole word is found.
if, while reading, fgetc finds alphanumeric string or string longer then allowed LENGTH (45 characters) the program consumes
(fgetc reads until reaches "\0") remainder of such string and sets index to 0 to skip the string.
5. Why do you think we used fgetc to read each word’s characters one at a time rather than use fscanf with a format string
like "%s" to read whole words at a time? Put another way, what problems might arise by relying on fscanf alone?
Fscanf reads the whole string regardless if it is numeric, alphabetic, alphanumeric or a word longer then 45 characters and not
exist at all, so the program would have to waste resources (namely time) checking for misspelling such a string.
Using fgetc lets the program find a word and check spelling it without wasting time.
6. Why do you think we declared the parameters for check and load as const (which means "constant")?
Functions bool and check are passed pointers to const chars dictionary and word to make sure words in dictionary as well as words
to spellcheck won't be modified while executing the program.
/**
* Implements a spell-checker.
*/
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
#undef calculate
#undef getrusage
// default dictionary
#define DICTIONARY "dictionaries/large"
// prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: speller [dictionary] text\n");
return 1;
}
// structs for timing data
struct rusage before, after;
// benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// determine dictionary to use
char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// abort if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// calculate time to load dictionary
time_load = calculate(&before, &after);
// try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *fp = fopen(text, "r");
if (fp == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH+1];
// spell-check each word in text
for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
{
// allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// append character to word
word[index] = c;
index++;
// ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// consume remainder of alphabetical string
while ((c = fgetc(fp)) != EOF && isalpha(c));
// prepare for new word
index = 0;
}
}
// ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// consume remainder of alphanumeric string
while ((c = fgetc(fp)) != EOF && isalnum(c));
// prepare for new word
index = 0;
}
// we must have found a whole word
else if (index > 0)
{
// terminate current word
word[index] = '\0';
// update counter
words++;
// check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// update benchmark
time_check += calculate(&before, &after);
// print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// prepare for next word
index = 0;
}
}
// check whether there was an error
if (ferror(fp))
{
fclose(fp);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// close text
fclose(fp);
// determine dictionary's size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// calculate time to unload dictionary
time_unload = calculate(&before, &after);
// report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// that's all folks
return 0;
}
/**
* Returns number of seconds between b and a.
*/
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment