-
-
Save tqbf/4de61a3e34d2e4664044666c107abe77 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <dirent.h> | |
#include <limits.h> | |
#include <ctype.h> | |
#define TSZ 18199 | |
struct wordcount { | |
char *word; | |
size_t count; | |
struct wordcount *next; | |
} table[TSZ]; | |
int table_entries = 0; | |
static int wcompare(const void *a, const void *b) { | |
const struct wordcount *wa = a, *wb = b; | |
if(wb->count < wa->count) { | |
return -1; | |
} | |
return(wb->count != wa->count); | |
} | |
static unsigned hash(char *s) { | |
unsigned h = 5381; | |
int c; | |
while((c = (*s++))) { | |
h = ((h << 5) + h) + c; | |
} | |
return h; | |
} | |
static void count(char *word) { | |
struct wordcount *w = &table[hash(word) % TSZ]; | |
while(w->word) { | |
if(!strcmp(word, w->word)) { | |
w->count += 1; | |
return; | |
} | |
if(!w->next) | |
w->next = calloc(1, sizeof(*w)); | |
w = w->next; | |
} | |
table_entries += 1; | |
w->count = 1; | |
w->word = strdup(word); | |
} | |
static void count_file(char *path) { | |
FILE *fp = fopen(path, "r"); | |
if(!fp) { | |
return; | |
} | |
struct stat sb; | |
if(fstat(fileno(fp), &sb) < 0 || !(sb.st_mode & S_IFREG)) { | |
fclose(fp); | |
return; | |
} | |
char buf[1024]; | |
while(fgets(buf, 1024, fp)) { | |
// XXX check long line; potato code | |
char *bp = buf; | |
for(char *cp = strsep(&bp, " \t\n"); cp; cp = strsep(&bp, " \t\n")) { | |
if(*cp && cp[1] && cp[2] && strchr("abcdefghijklmnopqrstuvwxyz", tolower(*cp))) { | |
count(cp); | |
} | |
} | |
} | |
fclose(fp); | |
} | |
void walk(char *path) { | |
struct dirent ent, *ep = NULL; | |
DIR *d = opendir(path); | |
if(!d) { | |
return; | |
} | |
while(readdir_r(d, &ent, &ep) == 0 && ep != NULL) { | |
if(!strcmp(ep->d_name, ".") || !strcmp(ep->d_name, "..")) { | |
continue; | |
} | |
char pathbuf[PATH_MAX]; | |
snprintf(pathbuf, PATH_MAX, "%s/%s", path, ep->d_name); | |
if(ep->d_type == DT_DIR) { | |
walk(pathbuf); | |
} else if(ep->d_type == DT_REG) { | |
size_t len = strlen(ep->d_name); | |
if(len > 3 && !strcmp(&ep->d_name[len-4], ".txt")) { | |
count_file(pathbuf); | |
} | |
} | |
} | |
closedir(d); | |
} | |
int main(int argc, char **argv) { | |
walk(argv[1] ? argv[1] : "."); | |
size_t cents = 0; | |
struct wordcount *counts = calloc(table_entries, sizeof(struct wordcount)); | |
for(int i = 0; i < TSZ; i++) { | |
struct wordcount *w = &table[i]; | |
while(w && w->word) { | |
counts[cents++] = *w; | |
w = w->next; | |
} | |
} | |
if(cents) { | |
qsort(counts, cents, sizeof(*counts), wcompare); | |
for(int i = 0; i <= 10 && i < cents; i++) | |
printf("%d. %s / %lu\n", i, counts[i].word, counts[i].count); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment