Skip to content

Instantly share code, notes, and snippets.

@FlorianMerkle
Created August 13, 2018 08:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FlorianMerkle/3648fc0af44f3de24f594d429c949546 to your computer and use it in GitHub Desktop.
Save FlorianMerkle/3648fc0af44f3de24f594d429c949546 to your computer and use it in GitHub Desktop.
Implementation of a spell checker with mmap instead of malloc
// Implements a dictionary's functionality
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "dictionary.h"
// Definitions
#define hashmod buckets
#define buckets 350000
#define seedx 0
// Global declarations
node *hashtable [buckets] = {NULL};
unsigned int wcounter = 0;
int arraynr = 0;
// Returns true if word is in dictionary else false
bool check(const char *word)
{
char lword[LENGTH + 1];
strncpy(lword, word, strlen(word) + 1);
for (int i = 0; i < strlen(lword); i++) // Convert input to non-const, all-lower case string
{
lword[i] = tolower(lword[i]);
}
arraynr = hash((unsigned const char *) lword, strlen(word), seedx) % buckets; // Assign hash-based index
node *travers = hashtable[arraynr]; // Create travers node
if (travers == NULL)
{
return false;
}
while (travers->next != NULL) // Loop through SLL and compare every element
{
if (strcmp(travers->value, lword) == 0)
{
return true;
}
else
{
travers = travers->next;
}
}
if (strcmp(travers->value, lword) == 0) // Check the last element
{
return true;
}
else
{
return false;
}
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
char input [LENGTH + 1]; // Input buffer
FILE *dictptr = fopen(dictionary, "r");
while (fscanf(dictptr, "%s", input) != EOF) // Scan whole document for strings and write each into SLL
{
wcounter++;
int len = strlen(input);
arraynr = hash((unsigned const char *)input, len, seedx) % buckets; // Assign hash-based index
if (hashtable[arraynr] == NULL) // If it is the first element to be written into this SLL, create SLL
{
//hashtable[arraynr] = malloc(sizeof(node)); // Allocate memory for (struct) node
hashtable[arraynr] = mmap(NULL, sizeof(node), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
/*if (hashtable[arraynr] == NULL) // Check if mem allocation was successfull
{
fprintf(stderr, "could not allocate memory\n");
return false;
}*/
strncpy(hashtable[arraynr]->value, input, len + 1); // Write value into node
hashtable[arraynr]->next = NULL; // Set adress to NULL
}
else // If the SLL already exist, insert a new node
{
//node *newnode = malloc(sizeof(node)); // Allocate memory for buffer (struct) node
node *newnode = mmap(NULL, sizeof(node), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
/*if (newnode == NULL) // Check if mem allocation was successfull
{
fprintf(stderr, "could not allocate memory\n");
return false;
}*/
strncpy(newnode->value, input, len + 1); // Write value to the node
newnode->next = hashtable[arraynr]; // Connect the new node with (in front of) the old head of the list
hashtable[arraynr] = newnode; // Hashtable points now to the new node as the first node of the SLL
}
}
fclose(dictptr);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
return wcounter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
/* for (int i = 0; i < buckets; i++)
{
if (hashtable[i] != NULL)
{
delete_sll(hashtable[i]);
}
}*/
return true;
}
// Delete a whole SLL
void delete_sll (node *head)
{
if (head->next == NULL) // If current element is the last element of the SLL
{
free(head); // Free memory
}
else
{
node *travers = head->next; // Else move to next element
delete_sll(travers); // Stop here on this stack and do it over again for the next element
free(head); // After all other elements are freeed, free memory of this element
}
}
// 32-bit Murmur 3 hash algoritm, code from Wikipedia @ https://en.wikipedia.org/wiki/MurmurHash
unsigned int hash(const unsigned char *key, int len, unsigned int seed)
{
uint32_t h = seed;
if (len > 3)
{
const uint32_t *key_x4 = (const uint32_t*) key;
size_t i = len >> 2;
do
{
uint32_t k = *key_x4++;
k *= 0xcc9e2d51;
k = (k << 15) | (k >> 17);
k *= 0x1b873593;
h ^= k;
h = (h << 13) | (h >> 19);
h = (h * 5) + 0xe6546b64;
}
while (--i);
key = (const uint8_t*) key_x4;
}
if (len & 3)
{
size_t i = len & 3;
uint32_t k = 0;
key = &key[i - 1];
do
{
k <<= 8;
k |= *key--;
}
while (--i);
k *= 0xcc9e2d51;
k = (k << 15) | (k >> 17);
k *= 0x1b873593;
h ^= k;
}
h ^= len;
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment