Skip to content

Instantly share code, notes, and snippets.

@mikdiet
Created November 28, 2015 20:17
Show Gist options
  • Save mikdiet/242d6c05205a734de8f5 to your computer and use it in GitHub Desktop.
Save mikdiet/242d6c05205a734de8f5 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <math.h>
// we need it as globals to use in comparing function
int symbols[256], stats[256];
// Analizes input file and prints result in output file
void analize(char *input_name, char *output_name);
// Converts input filename to output filename
void convert_filename(char* name, char* result);
// comparing function
int compare_frequency(const void * a, const void * b);
// Main program
int main() {
DIR *dp;
struct dirent *ep;
dp = opendir("./input");
if (dp != NULL) {
while((ep = readdir(dp)))
if(ep->d_name[0] != '.') {
// analizes each file in input directory
char input[100], output[100];
strcpy(input, ep->d_name);
convert_filename(input, output);
analize(input, output);
}
closedir(dp);
}
return 0;
}
void analize(char *input_name, char *output_name) {
// Initializing
FILE *input, *output;
int i, count = 0, c;
double entropy = 0;
for(i = 0; i < 256; i++) {
stats[i] = 0;
symbols[i] = i;
}
char input_path[100], output_path[100];
strcpy(input_path, "./input/");
strcat(input_path, input_name);
strcpy(output_path, "./output/");
strcat(output_path, output_name);
// opens input file for binary reading, and output for writing
input = fopen(input_path, "rb");
output = fopen(output_path, "w");
if(input == NULL) printf("!!!!\n");
// collects data from input file
while((c = fgetc(input)) != EOF) {
stats[c]++;
count++;
}
// sorts symbols by frequency
qsort(symbols, 256, sizeof(int), compare_frequency);
// writes results into output file
fprintf(output, "%s\n", input_name);
fprintf(output, "%i\n\n", count);
for(i = 0; i < 256; i++) {
double p = (double) stats[symbols[i]] / count;
// ignore absent symbols
if(p != 0) {
entropy -= p * log2(p);
fprintf(output, "%i\t%e\n", symbols[i], p);
}
}
fprintf(output, "\n%e\n", entropy);
// closes files
fclose(input);
fclose(output);
}
void convert_filename(char* name, char* result) {
char variant[100], extension[100];
sscanf(name, "%*[^123456780]%[^.].%s", variant, extension);
sprintf(result, "%s%s.tab", extension, variant);
}
int compare_frequency(const void * a, const void * b) {
return ( stats[*(int*)b] - stats[*(int*)a] );
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment