Created
October 18, 2020 21:49
-
-
Save saagarjha/00faa1963023206a8ccd9877983b4546 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _XOPEN_SOURCE 700 | |
#include <ctype.h> | |
#include <ftw.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
// djb2 because it's short | |
unsigned int hash(char *string) { | |
unsigned int hash = 5381; | |
for (hash = 0; *string; hash = 33 * hash ^ *string++) | |
; | |
return hash; | |
} | |
#define SKETCHY_LOAD 5 | |
struct entry { | |
char *key; | |
int count; | |
}; | |
struct table { | |
struct entry **buckets; | |
int size; | |
}; | |
void table_set(struct table *table) { | |
for (int i = 0; i < table->size; ++i) { | |
int size = SKETCHY_LOAD + 1; | |
table->buckets[i] = malloc(sizeof(struct entry) * size); | |
for (int j = 0; j < size; ++j) { | |
table->buckets[i][j] = (struct entry){0}; | |
} | |
} | |
} | |
void table_init(struct table *table) { | |
table->buckets = malloc(sizeof(struct entry *)); | |
table->size = 1; | |
table_set(table); | |
} | |
void table_free(struct table *table) { | |
for (int i = 0; i < table->size; ++i) { | |
for (struct entry *entry = table->buckets[i]; entry->key; ++entry) { | |
free(entry->key); | |
} | |
free(table->buckets[i]); | |
} | |
free(table->buckets); | |
} | |
struct entry *table_find_in_bucket(struct entry *bucket, char *word) { | |
while (bucket->key && strcmp(bucket->key, word) && ++bucket) | |
; | |
return bucket; | |
} | |
struct entry *table_register(struct table *table, char *word); | |
void table_expand(struct table *table) { | |
struct table old = *table; | |
table->size *= 2; | |
table->buckets = malloc(sizeof(struct entry *) * ++table->size); | |
table_set(table); | |
for (int i = 0; i < old.size; ++i) { | |
for (struct entry *entry = old.buckets[i]; entry->key; ++entry) { | |
table_register(table, entry->key)->count = entry->count; | |
free(entry->key); | |
} | |
free(old.buckets[i]); | |
} | |
free(old.buckets); | |
} | |
struct entry *table_register(struct table *table, char *word) { | |
struct entry *bucket = table->buckets[hash(word) % table->size]; | |
struct entry *entry = table_find_in_bucket(bucket, word); | |
if (!entry->key) { | |
if (entry - bucket >= SKETCHY_LOAD) { | |
table_expand(table); | |
return table_register(table, word); | |
} else { | |
entry->key = strdup(word); | |
} | |
} | |
return entry; | |
} | |
struct table table; | |
int process(const char *path, const struct stat *stat, int flag, struct FTW *ftw) { | |
{ | |
size_t length = strlen(path); | |
if (length < 4 || strcmp(path + length - 4, ".txt")) { | |
return 0; | |
} | |
} | |
FILE *file = fopen(path, "r"); | |
char *word = NULL; | |
int length = 0; | |
int capacity = 0; | |
int c; | |
while ((c = getc(file)) != EOF) { | |
if (length + 1 >= capacity) { | |
word = realloc(word, capacity = capacity * 2 + 2); | |
} | |
if (isalpha(c)) { | |
word[length++] = c; | |
word[length] = '\0'; | |
} else { | |
if (length >= 2) { | |
++table_register(&table, word)->count; | |
} | |
length = 0; | |
*word = '\0'; | |
} | |
} | |
fclose(file); | |
return 0; | |
} | |
int compare(const void *lhs, const void *rhs) { | |
return (*(struct entry **)rhs)->count - (*(struct entry **)lhs)->count; | |
} | |
int main() { | |
table_init(&table); | |
nftw(".", process, 10, 0); | |
struct entry **entries = NULL; | |
int count = 0; | |
int capacity = 0; | |
for (int i = 0; i < table.size; ++i) { | |
for (struct entry *entry = table.buckets[i]; entry->key; ++entry) { | |
if (capacity <= count) { | |
entries = realloc(entries, sizeof(struct entry *) * (capacity = capacity * 2 + 1)); | |
} | |
entries[count++] = entry; | |
} | |
} | |
qsort(entries, count, sizeof(struct entry *), compare); | |
for (int i = 0; i < 10; ++i) { | |
printf("%d %s\n", entries[i]->count, entries[i]->key); | |
} | |
free(entries); | |
table_free(&table); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment