-
-
Save mikdiet/242d6c05205a734de8f5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/types.h> | |
#include <dirent.h> | |
#include <math.h> | |
// we need it as globals to use in comparing function | |
int symbols[256], stats[256]; | |
// Analizes input file and prints result in output file | |
void analize(char *input_name, char *output_name); | |
// Converts input filename to output filename | |
void convert_filename(char* name, char* result); | |
// comparing function | |
int compare_frequency(const void * a, const void * b); | |
// Main program | |
int main() { | |
DIR *dp; | |
struct dirent *ep; | |
dp = opendir("./input"); | |
if (dp != NULL) { | |
while((ep = readdir(dp))) | |
if(ep->d_name[0] != '.') { | |
// analizes each file in input directory | |
char input[100], output[100]; | |
strcpy(input, ep->d_name); | |
convert_filename(input, output); | |
analize(input, output); | |
} | |
closedir(dp); | |
} | |
return 0; | |
} | |
void analize(char *input_name, char *output_name) { | |
// Initializing | |
FILE *input, *output; | |
int i, count = 0, c; | |
double entropy = 0; | |
for(i = 0; i < 256; i++) { | |
stats[i] = 0; | |
symbols[i] = i; | |
} | |
char input_path[100], output_path[100]; | |
strcpy(input_path, "./input/"); | |
strcat(input_path, input_name); | |
strcpy(output_path, "./output/"); | |
strcat(output_path, output_name); | |
// opens input file for binary reading, and output for writing | |
input = fopen(input_path, "rb"); | |
output = fopen(output_path, "w"); | |
if(input == NULL) printf("!!!!\n"); | |
// collects data from input file | |
while((c = fgetc(input)) != EOF) { | |
stats[c]++; | |
count++; | |
} | |
// sorts symbols by frequency | |
qsort(symbols, 256, sizeof(int), compare_frequency); | |
// writes results into output file | |
fprintf(output, "%s\n", input_name); | |
fprintf(output, "%i\n\n", count); | |
for(i = 0; i < 256; i++) { | |
double p = (double) stats[symbols[i]] / count; | |
// ignore absent symbols | |
if(p != 0) { | |
entropy -= p * log2(p); | |
fprintf(output, "%i\t%e\n", symbols[i], p); | |
} | |
} | |
fprintf(output, "\n%e\n", entropy); | |
// closes files | |
fclose(input); | |
fclose(output); | |
} | |
void convert_filename(char* name, char* result) { | |
char variant[100], extension[100]; | |
sscanf(name, "%*[^123456780]%[^.].%s", variant, extension); | |
sprintf(result, "%s%s.tab", extension, variant); | |
} | |
int compare_frequency(const void * a, const void * b) { | |
return ( stats[*(int*)b] - stats[*(int*)a] ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment