Skip to content

Instantly share code, notes, and snippets.

@tqbf
Last active October 18, 2020 23:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tqbf/4de61a3e34d2e4664044666c107abe77 to your computer and use it in GitHub Desktop.
Save tqbf/4de61a3e34d2e4664044666c107abe77 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <limits.h>
#include <ctype.h>
#define TSZ 18199
struct wordcount {
char *word;
size_t count;
struct wordcount *next;
} table[TSZ];
int table_entries = 0;
static int wcompare(const void *a, const void *b) {
const struct wordcount *wa = a, *wb = b;
if(wb->count < wa->count) {
return -1;
}
return(wb->count != wa->count);
}
static unsigned hash(char *s) {
unsigned h = 5381;
int c;
while((c = (*s++))) {
h = ((h << 5) + h) + c;
}
return h;
}
static void count(char *word) {
struct wordcount *w = &table[hash(word) % TSZ];
while(w->word) {
if(!strcmp(word, w->word)) {
w->count += 1;
return;
}
if(!w->next)
w->next = calloc(1, sizeof(*w));
w = w->next;
}
table_entries += 1;
w->count = 1;
w->word = strdup(word);
}
static void count_file(char *path) {
FILE *fp = fopen(path, "r");
if(!fp) {
return;
}
struct stat sb;
if(fstat(fileno(fp), &sb) < 0 || !(sb.st_mode & S_IFREG)) {
fclose(fp);
return;
}
char buf[1024];
while(fgets(buf, 1024, fp)) {
// XXX check long line; potato code
char *bp = buf;
for(char *cp = strsep(&bp, " \t\n"); cp; cp = strsep(&bp, " \t\n")) {
if(*cp && cp[1] && cp[2] && strchr("abcdefghijklmnopqrstuvwxyz", tolower(*cp))) {
count(cp);
}
}
}
fclose(fp);
}
void walk(char *path) {
struct dirent ent, *ep = NULL;
DIR *d = opendir(path);
if(!d) {
return;
}
while(readdir_r(d, &ent, &ep) == 0 && ep != NULL) {
if(!strcmp(ep->d_name, ".") || !strcmp(ep->d_name, "..")) {
continue;
}
char pathbuf[PATH_MAX];
snprintf(pathbuf, PATH_MAX, "%s/%s", path, ep->d_name);
if(ep->d_type == DT_DIR) {
walk(pathbuf);
} else if(ep->d_type == DT_REG) {
size_t len = strlen(ep->d_name);
if(len > 3 && !strcmp(&ep->d_name[len-4], ".txt")) {
count_file(pathbuf);
}
}
}
closedir(d);
}
int main(int argc, char **argv) {
walk(argv[1] ? argv[1] : ".");
size_t cents = 0;
struct wordcount *counts = calloc(table_entries, sizeof(struct wordcount));
for(int i = 0; i < TSZ; i++) {
struct wordcount *w = &table[i];
while(w && w->word) {
counts[cents++] = *w;
w = w->next;
}
}
if(cents) {
qsort(counts, cents, sizeof(*counts), wcompare);
for(int i = 0; i <= 10 && i < cents; i++)
printf("%d. %s / %lu\n", i, counts[i].word, counts[i].count);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment