Skip to content

Instantly share code, notes, and snippets.

/dictionary.c Secret

Created August 24, 2016 13:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/be20f7ccb77e911ec40a1e4d7aa24109 to your computer and use it in GitHub Desktop.
Save anonymous/be20f7ccb77e911ec40a1e4d7aa24109 to your computer and use it in GitHub Desktop.
dictionary.c - shared from CS50 IDE
/**
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
*/
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
#define HASH_MAP_BIN_SIZE 143091
unsigned sax_hash(void *key, int len);
typedef struct node
{
long val;
char node_word[LENGTH + 1];
struct node* next;
}
node;
//define variable that tracks the number of words in the dictionary hashmap
int outof_hashmap = 0;
int clear_tracker = 0;
node hash_map[HASH_MAP_BIN_SIZE];
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
// TODO
//if any letter isupper(), need to convert to lower to compare.
char buffer[LENGTH + 1];
int je = 0;
while (true)
{
if (word[je] == '\0')
{
buffer[je] = '\0';
break;
}
else
{
buffer[je] = tolower(word[je]);
je++;
}
}
unsigned long bin_to_search = sax_hash(buffer, strlen(buffer)) ;
for(int yi = 0 ; yi < 143091 ; yi++)
{
if (hash_map[yi].val == bin_to_search && strcmp(buffer, hash_map[yi].node_word) == 0 )
{
return true;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
//NOTE, /dictionaries/large is 143,091 words....sqrt of this ~378. optimal bins ~378 with 378 items per bin?
//open pointer to dictionary
FILE* dict_ptr = fopen(dictionary, "r");
//initialize hashmap with all pointers to NULL
for(int i = 0 ; i < HASH_MAP_BIN_SIZE ; i++)
{
hash_map[i].next = NULL;
}
int index2 = 0; //index2 counts letters in word
char dict_word[LENGTH + 1]; //buffer storage of a dictionary word
int into_hashmap = 0; //tells the number of words that have gone thru and are ready to be added to dictionary to search.
// stack up words char by char to go into the hashmap
for (int ltr = fgetc(dict_ptr); ltr != EOF; ltr = fgetc(dict_ptr))
{
// allow only alphabetical characters and apostrophes
if (isalpha(ltr) || (ltr == '\'' && index2 > 0))
{
// append character to word
dict_word[index2] = ltr;
index2++;
// ignore alphabetical strings too long to be words
if (index2 > LENGTH)
{
// consume remainder of alphabetical string
while ((ltr = fgetc(dict_ptr)) != EOF && isalpha(ltr));
// prepare for new word
index2 = 0;
}
}
// we must have found a whole word
else if (index2 > 0)
{
dict_word[index2] = '\0'; //terminate current word
into_hashmap++;
//add word to the dictionary hashmap
//copy dict_word to new word node
strcpy(hash_map[into_hashmap - 1].node_word , dict_word);
hash_map[into_hashmap - 1].val= sax_hash(dict_word , strlen(dict_word));
index2 = 0;
outof_hashmap++;
//printf("%ld\n",hash_map[into_hashmap - 1].val);
}
}
fclose(dict_ptr);
//check that all words passed successfully to report true
if (into_hashmap == outof_hashmap)
return true;
else
return false;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
if(outof_hashmap != 0)
return outof_hashmap;
else
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
//nothing to unload, no heap allocation used
return true;
}
//http://www.eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
//Shift-Add-XOR Hash
unsigned sax_hash(void *key, int len)
{
unsigned char *p = key;
unsigned h = 0;
int i;
for (i = 0; i < len; i++)
{
h ^= (h << 5) + (h >> 2) + p[i];
}
return h;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment