Skip to content

Instantly share code, notes, and snippets.

@kirugan
Created March 3, 2020 08:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kirugan/405a7d908e6b097ffc0962aec37ef3bc to your computer and use it in GitHub Desktop.
Save kirugan/405a7d908e6b097ffc0962aec37ef3bc to your computer and use it in GitHub Desktop.
Fast frequency dictionary
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/mman.h>
typedef struct freq {
int freq;
char* word;
} freq;
inline bool is_letter(char c) {
return 'A' <= c && c <= 'Z';
}
int main(int argc, char** argv) {
if (argc < 2) {
fprintf(stderr, "Wrong usage");
return 1;
}
struct stat st;
int fd = open(argv[1], O_RDONLY);
fstat(fd, &st);
char* first_addr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
char* last_addr = first_addr + st.st_size;
madvise(first_addr, st.st_size, MADV_SEQUENTIAL);
//void* words = malloc(sizeof(freq) * 1000);
char* addr = first_addr;
char c;
char* word_ptr = NULL;
for (; addr <= last_addr;++addr) {
c = *addr;
if (c > 'a') {
c -= ('a' - 'A');
*addr = c;
}
continue;
if (is_letter(c)){
if (word_ptr == NULL) {
word_ptr = addr;
}
} else {
if (word_ptr != NULL) {
// what about last word in memory? if there wont be any garbage at the end
*addr = '\0';
word_ptr = NULL;
}
}
}
printf("last addr: %p\n", addr);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment