Created
August 6, 2017 16:09
-
-
Save OxiBo/91abfc3d9ff7c8fcfa281895111314db to your computer and use it in GitHub Desktop.
CS50 pset5/speller_implemented with hash table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implements a dictionary's functionality using hashtable | |
*/ | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <strings.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <stdlib.h> | |
#include "dictionary.h" | |
//declare hashtable (array of pointers) | |
node *hashtable[SIZE]={NULL}; | |
//declare dictionary words counter | |
unsigned int *ptr_counter; | |
unsigned int word_counter; | |
/** | |
*Returnes hash value. | |
* this hash function was found on the internet but there was no author mentioned | |
*/ | |
unsigned int hash_func(const char* word) | |
{ | |
unsigned int hash = 0; | |
for (int i=0; word[i]!= '\0'; i++) | |
{ | |
hash = 31*hash + word[i]; | |
} | |
return hash % SIZE; | |
} | |
/** | |
*Returns a new node | |
*/ | |
struct node *getNewNode(const char* word) | |
{ | |
struct node *newNode=(struct node*)malloc(sizeof(struct node)); | |
if(newNode==NULL) | |
{ | |
printf("Could not access memory\n"); | |
unload(); | |
return false; | |
} | |
newNode->next=NULL; | |
strcpy(newNode->word, word); | |
return newNode; | |
} | |
/** | |
* Returns true if word is in dictionary else false. | |
*/ | |
bool check(const char *word) | |
{ | |
//declares a string to hold a word converted to lowercase for hashing | |
char tword[LENGTH+1]; | |
int n=strlen(word); | |
for(int i=0; i<n+1; i++) | |
{ | |
tword[i]=tolower(word[i]); | |
} | |
//creates a node pointer pointing to the first node in hashtable hash_func-th element | |
node *cursor=hashtable[hash_func(tword)]; | |
//search for a word in a linked list linked to hashtable element | |
while(cursor!=NULL) | |
{ | |
if(strcasecmp(cursor->word, word)==0) | |
{ | |
return true; | |
} | |
else | |
{ | |
cursor=cursor->next; | |
} | |
} | |
return false; | |
} | |
/** | |
* Loads dictionary into memory. Returns true if successful else false. | |
*/ | |
bool load(const char *dictionary) | |
{ | |
// try to open dictionary to load | |
FILE *fpd=fopen(dictionary, "r"); | |
if(fpd==NULL) | |
{ | |
printf("Could not open %s\n.", dictionary); | |
return false; | |
} | |
//initialize word counter to 0 | |
word_counter=0; | |
//declare char array (word) to be read from dictionary | |
char dword[47]; | |
//declare variable for hash value | |
unsigned int hash=0; | |
//scan dictionary word by word until the EOF is reached | |
while(fscanf(fpd, "%s", dword)!=EOF) | |
{ | |
hash=hash_func(dword); | |
if(hashtable[hash]==NULL) | |
{ | |
hashtable[hash]=getNewNode(dword); | |
word_counter++; | |
} | |
else | |
{ | |
node *newNode=getNewNode(dword); | |
newNode->next=hashtable[hash]; | |
hashtable[hash]=newNode; | |
word_counter++; | |
} | |
} | |
//close dictionary | |
fclose(fpd); | |
return true; | |
} | |
/** | |
* Returns number of words in dictionary if loaded else 0 if not yet loaded. | |
*/ | |
unsigned int size(void) | |
{ | |
ptr_counter=&word_counter; | |
return word_counter; | |
} | |
/** | |
* Unloads dictionary from memory. Returns true if successful else false. | |
*/ | |
bool unload(void) | |
{ | |
for(int i=0; i<SIZE; i++) | |
{ | |
node *cursor=hashtable[i]; | |
while(cursor!=NULL) | |
{ | |
node *temp=cursor; | |
cursor=cursor->next; | |
free(temp); | |
} | |
} | |
return true; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Declares a dictionary's functionality. | |
*/ | |
#ifndef DICTIONARY_H | |
#define DICTIONARY_H | |
#include <stdbool.h> | |
// maximum length for a word | |
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis) | |
#define LENGTH 45 | |
//maximum elements of hash table | |
#define SIZE 1000 | |
//declare node struct | |
typedef struct node | |
{ | |
char word[LENGTH+1]; | |
struct node *next; | |
} | |
node; | |
/** | |
* Returns true if word is in dictionary else false. | |
*/ | |
bool check(const char *word); | |
/** | |
* Loads dictionary into memory. Returns true if successful else false. | |
*/ | |
bool load(const char *dictionary); | |
/** | |
* Returns number of words in dictionary if loaded else 0 if not yet loaded. | |
*/ | |
unsigned int size(void); | |
/** | |
* Unloads dictionary from memory. Returns true if successful else false. | |
*/ | |
bool unload(void); | |
#endif // DICTIONARY_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compiler to use | |
CC = clang | |
# flags to pass compiler | |
CFLAGS = -ggdb3 -O0 -Qunused-arguments -std=c11 -Wall -Werror | |
# name for executable | |
EXE = speller | |
# space-separated list of header files | |
HDRS = dictionary.h | |
# space-separated list of libraries, if any, | |
# each of which should be prefixed with -l | |
LIBS = | |
# space-separated list of source files | |
SRCS = speller.c dictionary.c | |
# automatically generated list of object files | |
OBJS = $(SRCS:.c=.o) | |
# default target | |
$(EXE): $(OBJS) $(HDRS) Makefile | |
$(CC) $(CFLAGS) -o $@ $(OBJS) $(LIBS) | |
# dependencies | |
$(OBJS): $(HDRS) Makefile | |
# housekeeping | |
clean: | |
rm -f core $(EXE) *.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0. What is pneumonoultramicroscopicsilicovolcanoconiosis? | |
The maximum length of a word is 45 characters, the longest word in English language. "Pneumonoultramicroscopicsilicovolcanoconiosis" | |
is the example of a long word in the program in dictionary.h. | |
1. According to its man page, what does getrusage do? | |
Getrusage finds(get) resource usage. Namely RUSAGE_SELF (which is used by slepper.c) returns resource usage statistics for the | |
calling process, which is the sum of resources used by all threads in the process. The resource usages are returned in the structure | |
pointed to by usage. In our program getrusage returns usages in structs rusage before, rusage after. | |
2. Per that same man page, how many members are in a variable of type struct rusage? | |
16 members. | |
3. Why do you think we pass before and after by reference (instead of by value) to calculate, even though we’re not changing their | |
contents? | |
In before and after we pass by reference "user CPU time used" which is the total amount of time spent executing in user mode, | |
expressed in struct rusage members: ru_utime.tv_sec and ru_utime.tv_usec) and "system CPU time used" which is the total amount | |
of time spent executing in kernel mode, expressed in struct rusage members: ru_stime.tv_sec and ru_stime.tv_usec). We don't change | |
values of before and after because we need to calculate the difference between after and before to find the total resouce usage | |
(time usage). Function calculate() always has access to the latest variable data which cannot be modified outside of the function. | |
4. Explain as precisely as possible, in a paragraph or more, how main goes about reading words from a file. In other words, | |
convince us that you indeed understand how that function’s for loop works. | |
For loop will be executed untill fgetc reaches EOF. Function fgetc reads a character from the given file (text). If the character | |
is alphabetical/apostroph the program appends such character/apostroph in an array called "word" untill fgetc reaches '\0' (end of | |
a word). When the whole word is found the program terminates current word appending '\0' to the array "word" and sets index of | |
the array to 0, which prerares for looking for a new word. Also the program updates word counter once the whole word is found. | |
if, while reading, fgetc finds alphanumeric string or string longer then allowed LENGTH (45 characters) the program consumes | |
(fgetc reads until reaches "\0") remainder of such string and sets index to 0 to skip the string. | |
5. Why do you think we used fgetc to read each word’s characters one at a time rather than use fscanf with a format string | |
like "%s" to read whole words at a time? Put another way, what problems might arise by relying on fscanf alone? | |
Fscanf reads the whole string regardless if it is numeric, alphabetic, alphanumeric or a word longer then 45 characters and not | |
exist at all, so the program would have to waste resources (namely time) checking for misspelling such a string. | |
Using fgetc lets the program find a word and check spelling it without wasting time. | |
6. Why do you think we declared the parameters for check and load as const (which means "constant")? | |
Functions bool and check are passed pointers to const chars dictionary and word to make sure words in dictionary as well as words | |
to spellcheck won't be modified while executing the program. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implements a spell-checker. | |
*/ | |
#include <ctype.h> | |
#include <stdio.h> | |
#include <sys/resource.h> | |
#include <sys/time.h> | |
#include "dictionary.h" | |
#undef calculate | |
#undef getrusage | |
// default dictionary | |
#define DICTIONARY "dictionaries/large" | |
// prototype | |
double calculate(const struct rusage *b, const struct rusage *a); | |
int main(int argc, char *argv[]) | |
{ | |
// check for correct number of args | |
if (argc != 2 && argc != 3) | |
{ | |
printf("Usage: speller [dictionary] text\n"); | |
return 1; | |
} | |
// structs for timing data | |
struct rusage before, after; | |
// benchmarks | |
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0; | |
// determine dictionary to use | |
char* dictionary = (argc == 3) ? argv[1] : DICTIONARY; | |
// load dictionary | |
getrusage(RUSAGE_SELF, &before); | |
bool loaded = load(dictionary); | |
getrusage(RUSAGE_SELF, &after); | |
// abort if dictionary not loaded | |
if (!loaded) | |
{ | |
printf("Could not load %s.\n", dictionary); | |
return 1; | |
} | |
// calculate time to load dictionary | |
time_load = calculate(&before, &after); | |
// try to open text | |
char *text = (argc == 3) ? argv[2] : argv[1]; | |
FILE *fp = fopen(text, "r"); | |
if (fp == NULL) | |
{ | |
printf("Could not open %s.\n", text); | |
unload(); | |
return 1; | |
} | |
// prepare to report misspellings | |
printf("\nMISSPELLED WORDS\n\n"); | |
// prepare to spell-check | |
int index = 0, misspellings = 0, words = 0; | |
char word[LENGTH+1]; | |
// spell-check each word in text | |
for (int c = fgetc(fp); c != EOF; c = fgetc(fp)) | |
{ | |
// allow only alphabetical characters and apostrophes | |
if (isalpha(c) || (c == '\'' && index > 0)) | |
{ | |
// append character to word | |
word[index] = c; | |
index++; | |
// ignore alphabetical strings too long to be words | |
if (index > LENGTH) | |
{ | |
// consume remainder of alphabetical string | |
while ((c = fgetc(fp)) != EOF && isalpha(c)); | |
// prepare for new word | |
index = 0; | |
} | |
} | |
// ignore words with numbers (like MS Word can) | |
else if (isdigit(c)) | |
{ | |
// consume remainder of alphanumeric string | |
while ((c = fgetc(fp)) != EOF && isalnum(c)); | |
// prepare for new word | |
index = 0; | |
} | |
// we must have found a whole word | |
else if (index > 0) | |
{ | |
// terminate current word | |
word[index] = '\0'; | |
// update counter | |
words++; | |
// check word's spelling | |
getrusage(RUSAGE_SELF, &before); | |
bool misspelled = !check(word); | |
getrusage(RUSAGE_SELF, &after); | |
// update benchmark | |
time_check += calculate(&before, &after); | |
// print word if misspelled | |
if (misspelled) | |
{ | |
printf("%s\n", word); | |
misspellings++; | |
} | |
// prepare for next word | |
index = 0; | |
} | |
} | |
// check whether there was an error | |
if (ferror(fp)) | |
{ | |
fclose(fp); | |
printf("Error reading %s.\n", text); | |
unload(); | |
return 1; | |
} | |
// close text | |
fclose(fp); | |
// determine dictionary's size | |
getrusage(RUSAGE_SELF, &before); | |
unsigned int n = size(); | |
getrusage(RUSAGE_SELF, &after); | |
// calculate time to determine dictionary's size | |
time_size = calculate(&before, &after); | |
// unload dictionary | |
getrusage(RUSAGE_SELF, &before); | |
bool unloaded = unload(); | |
getrusage(RUSAGE_SELF, &after); | |
// abort if dictionary not unloaded | |
if (!unloaded) | |
{ | |
printf("Could not unload %s.\n", dictionary); | |
return 1; | |
} | |
// calculate time to unload dictionary | |
time_unload = calculate(&before, &after); | |
// report benchmarks | |
printf("\nWORDS MISSPELLED: %d\n", misspellings); | |
printf("WORDS IN DICTIONARY: %d\n", n); | |
printf("WORDS IN TEXT: %d\n", words); | |
printf("TIME IN load: %.2f\n", time_load); | |
printf("TIME IN check: %.2f\n", time_check); | |
printf("TIME IN size: %.2f\n", time_size); | |
printf("TIME IN unload: %.2f\n", time_unload); | |
printf("TIME IN TOTAL: %.2f\n\n", | |
time_load + time_check + time_size + time_unload); | |
// that's all folks | |
return 0; | |
} | |
/** | |
* Returns number of seconds between b and a. | |
*/ | |
double calculate(const struct rusage *b, const struct rusage *a) | |
{ | |
if (b == NULL || a == NULL) | |
{ | |
return 0.0; | |
} | |
else | |
{ | |
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) - | |
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) + | |
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) - | |
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec))) | |
/ 1000000.0); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment