-
-
Save anonymous/be20f7ccb77e911ec40a1e4d7aa24109 to your computer and use it in GitHub Desktop.
dictionary.c - shared from CS50 IDE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* dictionary.c | |
* | |
* Computer Science 50 | |
* Problem Set 5 | |
* | |
* Implements a dictionary's functionality. | |
*/ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <ctype.h> | |
#include <string.h> | |
#include "dictionary.h" | |
#define HASH_MAP_BIN_SIZE 143091 | |
unsigned sax_hash(void *key, int len); | |
typedef struct node | |
{ | |
long val; | |
char node_word[LENGTH + 1]; | |
struct node* next; | |
} | |
node; | |
//define variable that tracks the number of words in the dictionary hashmap | |
int outof_hashmap = 0; | |
int clear_tracker = 0; | |
node hash_map[HASH_MAP_BIN_SIZE]; | |
/** | |
* Returns true if word is in dictionary else false. | |
*/ | |
bool check(const char* word) | |
{ | |
// TODO | |
//if any letter isupper(), need to convert to lower to compare. | |
char buffer[LENGTH + 1]; | |
int je = 0; | |
while (true) | |
{ | |
if (word[je] == '\0') | |
{ | |
buffer[je] = '\0'; | |
break; | |
} | |
else | |
{ | |
buffer[je] = tolower(word[je]); | |
je++; | |
} | |
} | |
unsigned long bin_to_search = sax_hash(buffer, strlen(buffer)) ; | |
for(int yi = 0 ; yi < 143091 ; yi++) | |
{ | |
if (hash_map[yi].val == bin_to_search && strcmp(buffer, hash_map[yi].node_word) == 0 ) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
/** | |
* Loads dictionary into memory. Returns true if successful else false. | |
*/ | |
bool load(const char* dictionary) | |
{ | |
//NOTE, /dictionaries/large is 143,091 words....sqrt of this ~378. optimal bins ~378 with 378 items per bin? | |
//open pointer to dictionary | |
FILE* dict_ptr = fopen(dictionary, "r"); | |
//initialize hashmap with all pointers to NULL | |
for(int i = 0 ; i < HASH_MAP_BIN_SIZE ; i++) | |
{ | |
hash_map[i].next = NULL; | |
} | |
int index2 = 0; //index2 counts letters in word | |
char dict_word[LENGTH + 1]; //buffer storage of a dictionary word | |
int into_hashmap = 0; //tells the number of words that have gone thru and are ready to be added to dictionary to search. | |
// stack up words char by char to go into the hashmap | |
for (int ltr = fgetc(dict_ptr); ltr != EOF; ltr = fgetc(dict_ptr)) | |
{ | |
// allow only alphabetical characters and apostrophes | |
if (isalpha(ltr) || (ltr == '\'' && index2 > 0)) | |
{ | |
// append character to word | |
dict_word[index2] = ltr; | |
index2++; | |
// ignore alphabetical strings too long to be words | |
if (index2 > LENGTH) | |
{ | |
// consume remainder of alphabetical string | |
while ((ltr = fgetc(dict_ptr)) != EOF && isalpha(ltr)); | |
// prepare for new word | |
index2 = 0; | |
} | |
} | |
// we must have found a whole word | |
else if (index2 > 0) | |
{ | |
dict_word[index2] = '\0'; //terminate current word | |
into_hashmap++; | |
//add word to the dictionary hashmap | |
//copy dict_word to new word node | |
strcpy(hash_map[into_hashmap - 1].node_word , dict_word); | |
hash_map[into_hashmap - 1].val= sax_hash(dict_word , strlen(dict_word)); | |
index2 = 0; | |
outof_hashmap++; | |
//printf("%ld\n",hash_map[into_hashmap - 1].val); | |
} | |
} | |
fclose(dict_ptr); | |
//check that all words passed successfully to report true | |
if (into_hashmap == outof_hashmap) | |
return true; | |
else | |
return false; | |
} | |
/** | |
* Returns number of words in dictionary if loaded else 0 if not yet loaded. | |
*/ | |
unsigned int size(void) | |
{ | |
if(outof_hashmap != 0) | |
return outof_hashmap; | |
else | |
return 0; | |
} | |
/** | |
* Unloads dictionary from memory. Returns true if successful else false. | |
*/ | |
bool unload(void) | |
{ | |
//nothing to unload, no heap allocation used | |
return true; | |
} | |
//http://www.eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx | |
//Shift-Add-XOR Hash | |
unsigned sax_hash(void *key, int len) | |
{ | |
unsigned char *p = key; | |
unsigned h = 0; | |
int i; | |
for (i = 0; i < len; i++) | |
{ | |
h ^= (h << 5) + (h >> 2) + p[i]; | |
} | |
return h; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment