Created
June 6, 2015 10:37
-
-
Save vmxdev/6c563ddabcad9a38fe0e to your computer and use it in GitHub Desktop.
calculate entropy of file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Shannon entropy calculation | |
$ cc -Wall shent.c -o shent -lm | |
$ ./shent shent.c | |
*/ | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <math.h> | |
int | |
main(int argc, char *argv[]) | |
{ | |
uint64_t map[256]; | |
size_t i; | |
FILE *f; | |
long int flen; | |
double info = 0.0; | |
if (argc < 2) { | |
fprintf(stderr, "Usage: %s file\n", argv[0]); | |
return 1; | |
} | |
f = fopen(argv[1], "r"); | |
if (!f) { | |
fprintf(stderr, "Can't open %s\n", argv[1]); | |
return 1; | |
} | |
for (i=0; i<(sizeof(map)/sizeof(map[0])); i++) { | |
map[i] = 0; | |
} | |
while (!feof(f)) { | |
char buf[1024*8]; | |
size_t r; | |
r = fread(buf, 1, sizeof(buf), f); | |
if (r == 0) break; | |
for (i=0; i<r; i++) { | |
size_t index = buf[i]; | |
map[index]++; | |
} | |
} | |
flen = ftell(f); | |
fclose(f); | |
for (i=0; i<(sizeof(map)/sizeof(map[0])); i++) { | |
double freq; | |
if (map[i] == 0) continue; | |
freq = (double)map[i] / flen; | |
info += freq * log2(freq); | |
} | |
info = -info; | |
printf("%f\n", info); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment