Skip to content

Instantly share code, notes, and snippets.

@saagarjha
Created October 18, 2020 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saagarjha/00faa1963023206a8ccd9877983b4546 to your computer and use it in GitHub Desktop.
Save saagarjha/00faa1963023206a8ccd9877983b4546 to your computer and use it in GitHub Desktop.
#define _XOPEN_SOURCE 700
#include <ctype.h>
#include <ftw.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// djb2 because it's short
unsigned int hash(char *string) {
unsigned int hash = 5381;
for (hash = 0; *string; hash = 33 * hash ^ *string++)
;
return hash;
}
#define SKETCHY_LOAD 5
struct entry {
char *key;
int count;
};
struct table {
struct entry **buckets;
int size;
};
void table_set(struct table *table) {
for (int i = 0; i < table->size; ++i) {
int size = SKETCHY_LOAD + 1;
table->buckets[i] = malloc(sizeof(struct entry) * size);
for (int j = 0; j < size; ++j) {
table->buckets[i][j] = (struct entry){0};
}
}
}
void table_init(struct table *table) {
table->buckets = malloc(sizeof(struct entry *));
table->size = 1;
table_set(table);
}
void table_free(struct table *table) {
for (int i = 0; i < table->size; ++i) {
for (struct entry *entry = table->buckets[i]; entry->key; ++entry) {
free(entry->key);
}
free(table->buckets[i]);
}
free(table->buckets);
}
struct entry *table_find_in_bucket(struct entry *bucket, char *word) {
while (bucket->key && strcmp(bucket->key, word) && ++bucket)
;
return bucket;
}
struct entry *table_register(struct table *table, char *word);
void table_expand(struct table *table) {
struct table old = *table;
table->size *= 2;
table->buckets = malloc(sizeof(struct entry *) * ++table->size);
table_set(table);
for (int i = 0; i < old.size; ++i) {
for (struct entry *entry = old.buckets[i]; entry->key; ++entry) {
table_register(table, entry->key)->count = entry->count;
free(entry->key);
}
free(old.buckets[i]);
}
free(old.buckets);
}
struct entry *table_register(struct table *table, char *word) {
struct entry *bucket = table->buckets[hash(word) % table->size];
struct entry *entry = table_find_in_bucket(bucket, word);
if (!entry->key) {
if (entry - bucket >= SKETCHY_LOAD) {
table_expand(table);
return table_register(table, word);
} else {
entry->key = strdup(word);
}
}
return entry;
}
struct table table;
int process(const char *path, const struct stat *stat, int flag, struct FTW *ftw) {
{
size_t length = strlen(path);
if (length < 4 || strcmp(path + length - 4, ".txt")) {
return 0;
}
}
FILE *file = fopen(path, "r");
char *word = NULL;
int length = 0;
int capacity = 0;
int c;
while ((c = getc(file)) != EOF) {
if (length + 1 >= capacity) {
word = realloc(word, capacity = capacity * 2 + 2);
}
if (isalpha(c)) {
word[length++] = c;
word[length] = '\0';
} else {
if (length >= 2) {
++table_register(&table, word)->count;
}
length = 0;
*word = '\0';
}
}
fclose(file);
return 0;
}
int compare(const void *lhs, const void *rhs) {
return (*(struct entry **)rhs)->count - (*(struct entry **)lhs)->count;
}
int main() {
table_init(&table);
nftw(".", process, 10, 0);
struct entry **entries = NULL;
int count = 0;
int capacity = 0;
for (int i = 0; i < table.size; ++i) {
for (struct entry *entry = table.buckets[i]; entry->key; ++entry) {
if (capacity <= count) {
entries = realloc(entries, sizeof(struct entry *) * (capacity = capacity * 2 + 1));
}
entries[count++] = entry;
}
}
qsort(entries, count, sizeof(struct entry *), compare);
for (int i = 0; i < 10; ++i) {
printf("%d %s\n", entries[i]->count, entries[i]->key);
}
free(entries);
table_free(&table);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment