Skip to content

Instantly share code, notes, and snippets.

@ScratchyCode
Last active June 14, 2018 21:48
Show Gist options
  • Save ScratchyCode/0895c83b414e6834499412cfad1dc523 to your computer and use it in GitHub Desktop.
Save ScratchyCode/0895c83b414e6834499412cfad1dc523 to your computer and use it in GitHub Desktop.
Evaluate the Chi square of observed dataset respect to a expected dataset.
// Coded by ScratchyCode
// Compile in gcc with option -lm
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
// maximum size for file name
#define LEN 100
double chiSquare(long long int dim, double expected[], double observed[]);
double distChiSquare(long long int k, double array[]);
double average(long long int N, double array[]);
double sigma(long long int N, double average, double array[]);
int fileLines(char file[]);
int main(){
long long int i, dim;
char expected[LEN], observed[LEN];
FILE *input;
printf("\nEnter the file name (or path) with the expected data: ");
fgets(expected,sizeof(expected),stdin);
expected[strlen(expected)-1] = 0;
printf("\nEnter the file name (or path) with the observed data: ");
fgets(observed,sizeof(expected),stdin);
observed[strlen(observed)-1] = 0;
// file lines expected == file line observed
dim = fileLines(expected);
if(dim <= 5){
printf("\nUnreliable test. Insufficient variables number.\n");
}
// arrays for expected data
double *vect1x = calloc(dim,sizeof(double));
double *vect1y = calloc(dim,sizeof(double));
// arrays for observed data
double *vect2x = calloc(dim,sizeof(double));
double *vect2y = calloc(dim,sizeof(double));
if(vect1x == NULL || vect1y == NULL || vect2x == NULL || vect2y == NULL){
perror("\nError");
exit(1);
}
// take the expected data
input = fopen(expected,"r");
if(input == NULL){
perror("\nError");
exit(1);
}
for(i=0; i<dim; i++){
fscanf(input,"%lf %lf",&vect1x[i],&vect1y[i]);
}
fclose(input);
// take the observed data
input = fopen(observed,"r");
if(input == NULL){
perror("\nError");
exit(1);
}
for(i=0; i<dim; i++){
fscanf(input,"%lf %lf",&vect2x[i],&vect2y[i]);
}
fclose(input);
printf("Computing...\n");
// observed data average
double x = average(dim,vect2y);
// computing chi square
double chi = chiSquare(dim,vect1y,vect2y);
// calculate the density of probability
double freeDegree = dim;
double probab = (1 / (pow(2,(int)freeDegree/2) * tgamma(freeDegree))) * pow(x,(int)(freeDegree/2)-1) * exp(-(x/2));
//printf("Have a probability of %lf%% to have a chi square of %lf\n",probab*100,chi);
printf("Chi square: %lf\n",chi);
free(vect1x);
free(vect1y);
free(vect2x);
free(vect2y);
return 0;
}
double chiSquare(long long int dim, double expected[], double observed[]){
long long int i, freeDegree=dim-1;
double sum=0;
for(i=0; i<dim; i++){
sum += (pow((expected[i] - observed[i]),2))/expected[i];
}
return sum;
}
double distChiSquare(long long int k, double array[]){
long long int i;
double sumSquare=0;
for(i=0; i<k; i++){
sumSquare += pow(array[i],2);
}
return sumSquare;
}
double average(long long int N, double array[]){
long long int i;
double sum = 0;
for(i=0; i<N; i++){
sum += array[i];
}
sum = (double)sum/N;
return sum;
}
double sigma(long long int N, double average, double array[]){
long long int i;
double sigma, sum = 0;
for(i=0; i<N; i++){
sum += pow((array[i] - average),2);
}
sigma = sqrt(sum/(N-1));
return sigma;
}
int fileLines(char file[]){
int lines=0;
char c;
FILE *input = fopen(file,"r");
if(input == NULL){
perror("\nError");
exit(1);
}
while((c = getc(input)) != EOF){
if(c == '\n'){
lines++;
}
}
fclose(input);
return lines;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment