Skip to content

Instantly share code, notes, and snippets.

@akay-10
Created September 30, 2022 08:06
Show Gist options
  • Save akay-10/1c0c26f98447180e690fbd0be8f545b8 to your computer and use it in GitHub Desktop.
Save akay-10/1c0c26f98447180e690fbd0be8f545b8 to your computer and use it in GitHub Desktop.
// task3.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdio.h>
#include <cstring>
#include <limits>
#include <cfloat>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <conio.h>
// A frame under evaluation (20 millisecs * 16000 samples/sec = 320 samples)
#define N 320
// Past P samples
#define P 12
// +-2 frames at steady frame, hence total 5 frames in steady part
#define STEADY_FRAMES 5
// Number of utterance
#define UTT 10
// Number of vowels
#define NVOW 5
#define PI 3.142857142857
// Used in normalizing values in range [-5000, 5000]
const long double NORM = 5000.0;
// Maximum sample value
long double MAX;
// By def. = NORM / MAX
long double NORMALISATION_FACTOR;
// Stores sigma Xi
long double dcShift;
// For tracking files
int files = 0;
// Some constants used
const char VOWELS[NVOW] = {'a', 'e', 'i', 'o', 'u'};
const long double tokhuraWeights[]={1.0, 3.0, 7.0, 13.0, 19.0, 22.0, 25.0, 33.0, 42.0, 50.0, 56.0, 61.0};
// Size of sample array
long long sz;
// Size of Energy array with sz/320 frames
long long EnergySz;
// Starting and ending position of the sample in steady part
long long steadyStart, steadyEnd;
// Accuracy
int totalCorrect = 0, individualCorrect= 0;
// Array for samples
long double x[100000];
// Array for energy of frames
long double energy[100000];
// Array for samples but for only steady part
long double steadyFrames[STEADY_FRAMES][N];
// Array for storing tokhura distance
long double tokhuraDist[5];
// Array for storing Ris, ais, Cis, avg Cis, reference Cis and combined Cis respectively
long double R[STEADY_FRAMES][P+1];
long double A[STEADY_FRAMES][P+1];
long double C[STEADY_FRAMES][P+1];
long double avgCi[STEADY_FRAMES*NVOW][P+1];
long double Ci[STEADY_FRAMES][P+1];
long double restoreCi[STEADY_FRAMES][P+1];
long double combinedCi[NVOW*UTT][STEADY_FRAMES][P+1];
// Storing the avg ci values to file
void calculateAvgCis(){
FILE *filePtr;
char fileName[] = "gen/ref_ci_$.txt";
int index = 0;
for(int ithVowel=0; ithVowel<5; ithVowel++){
printf("ref_ci_%c.txt generated \n", VOWELS[ithVowel]);
fileName[11] = VOWELS[ithVowel];
filePtr = fopen(fileName, "w");
for(int ithFrame=0; ithFrame<STEADY_FRAMES; ithFrame++){
for(int p=0; p<P; p++){
long double sum = 0;
for(int file=ithVowel*10; file<(ithVowel+1)*10; file++){
sum += combinedCi[file][ithFrame][p+1];
}
sum /= 10.0;
avgCi[index][p+1] = sum;
fprintf(filePtr, "%lf ", sum);
}
index++;
fprintf(filePtr, "\n");
}
fclose(filePtr);
}
}
// Function for calulating the cepstral coeff Ci's
void calculateCis(){
long double sum=0;
for(int ithFrame = 0; ithFrame<STEADY_FRAMES; ithFrame++){
C[ithFrame][0]=log(R[ithFrame][0]*R[ithFrame][0]);
for(int m=1;m<=P;m++){
sum=0;
for(int k=1;k<m;k++){
sum += (k*C[ithFrame][k]*A[ithFrame][m-k])/(m*1.0);
}
C[ithFrame][m] = A[ithFrame][m]+sum;
}
}
// Applying raised sine window
sum=0;
for(int ithFrame = 0; ithFrame<STEADY_FRAMES; ithFrame++){
for(int m=1;m<=P;m++){
sum = (P/2)*sin((PI*m)/P);
C[ithFrame][m] *= sum;
}
}
for(int ithFrame=0; ithFrame<STEADY_FRAMES; ithFrame++){
for(int i=0;i<P;i++){
combinedCi[files][ithFrame][i+1]=C[ithFrame][i+1];
}
}
files++;
}
// Executing Durbins Algorithm and finding the value of ais
void execDurbinAlgo(){
long double alpha[13][13];
long double E[13];
long double K[13];
long double sum=0;
for(int ithFrame = 0; ithFrame<STEADY_FRAMES; ithFrame++){
E[0] = R[ithFrame][0];
for(int i=1;i<=P;i++){
sum=0;
for(int j=1;j<=i-1;j++){
sum += alpha[i-1][j]*R[ithFrame][i-j];
}
K[i] = R[ithFrame][i]-sum;
K[i] /= E[i-1];
alpha[i][i]=K[i];
for(int j=1;j<=i-1;j++){
alpha[i][j] = alpha[i-1][j] - K[i]*alpha[i-1][i-j];
}
E[i]=(1-(K[i]*K[i]))*E[i-1];
}
for(int i=1;i<=P;i++){
A[ithFrame][i] = alpha[P][i];
}
}
//finding cepstral constants
calculateCis();
}
//function to get dcshift value and set in global variable
void handleDCShift(char *fileName){
long int sampleCnt = 0;
FILE *filePtr;
filePtr = fopen(fileName, "r");
dcShift = 0;
long double amp;
while(!feof(filePtr)){
fscanf(filePtr,"%Lf\n", &amp);
dcShift += amp;
sampleCnt++;
}
dcShift /= sampleCnt;
fclose(filePtr);
}
//marking the stable frames using STE
void storeSteadyFrames(){
long long ithSample = 0, maxAt = 0, n = 0;
long double E = 0, maxE = 0;
EnergySz = 0;
for(;ithSample < sz;ithSample++, n++){
if(n == N){
E /= N;
if(maxE < E){
maxE = E;
maxAt = EnergySz;
}
energy[EnergySz++] = E;
E = 0, n = 0;
}
E += x[ithSample] * x[ithSample];
}
steadyStart = (maxAt-2)*N;
if(maxAt <= 2) steadyStart = 0;
steadyEnd = (maxAt+3)*N;
if(maxAt >= EnergySz-3) steadyEnd = EnergySz*N;
int ithFrame = 0;
for(int i = steadyStart, j=0; i<steadyEnd; i++){
steadyFrames[ithFrame][j++] = x[i];
if(j == N) ithFrame++, j=0;
}
}
void init(char *fileName){
long long totalSample = 0;
FILE *filePtr;
filePtr = fopen(fileName, "r");
MAX = 0;
long long amp;
while(!feof(filePtr)){
fscanf(filePtr,"%lld\n", &amp);
totalSample++;
if(MAX < abs(amp))
MAX = abs(amp);
}
NORMALISATION_FACTOR = (long double)NORM/MAX;
handleDCShift(fileName);
filePtr = fopen(fileName, "r");
sz = 0;
// Reading the values from input files, normalizing it and storing in x[]
long double amp2;
while(!feof(filePtr)){
fscanf(filePtr,"%Lf\n", &amp2);
long double newXi = floor((amp2-dcShift)*NORMALISATION_FACTOR);
x[sz++] = newXi;
}
storeSteadyFrames();
// Applying hamming window
for(int i=0; i<STEADY_FRAMES; ++i){
for(int j=0; j<N; ++j){
steadyFrames[i][j] *= (0.54-0.46*cos((2*PI*steadyFrames[i][j])/(N-1)));
}
}
// Calculating Ris
for(int ithFrame = 0; ithFrame < 5; ithFrame++){
for(int m = 0; m <= P; m++){
R[ithFrame][m] = 0;
for(int k=0; k<N-m; k++){
R[ithFrame][m] += steadyFrames[ithFrame][k]*steadyFrames[ithFrame][k+m];
}
}
}
//calling execDurbinAlgo to find ai values
execDurbinAlgo();
fclose(filePtr);
}
//driver function to execute training
void train(){
printf("Training of the data ...\n\n");
for(int i=0; i<5; i++){
for(int j = 0; j<10; j++){
char fileName[] = "$/190101002_$_$.txt";
fileName[0] = VOWELS[i];
fileName[12] = VOWELS[i];
fileName[14] = j + '0';
init(fileName);
}
}
calculateAvgCis();
}
//fucntion which calculates the distance using dump Ci values of training set
double tokhuraDistance(FILE *filePtr){
int ithFrame = 0;
while(!feof(filePtr) && ithFrame<STEADY_FRAMES){
for(int i=1;i<=12;i++){
if(i==12)
fscanf(filePtr, "%lf\n", &restoreCi[ithFrame][i]);
else
fscanf(filePtr, "%lf ", &restoreCi[ithFrame][i]);
}
ithFrame++;
}
double finalDist = 0;
for(int i=0; i<STEADY_FRAMES; i++){
double dist = 0;
for(int p=1; p<=P; p++){
dist += tokhuraWeights[p-1]*((C[i][p]- restoreCi[i][p]))*((C[i][p]- restoreCi[i][p]));
}
finalDist += dist/(P*1.0);
}
return finalDist/(STEADY_FRAMES*1.0);
}
//function to calculate the distance and making prediction
char calculateTokhura(){
char fileName[] = "gen/ref_ci_$.txt";
FILE *filePtr;
long double minDist = DBL_MAX;
char predictedVowel;
for(int i=0; i<5; i++){
fileName[11] = VOWELS[i];
filePtr = fopen(fileName, "r");
long double distance = tokhuraDistance(filePtr);
tokhuraDist[i] = distance;
if(minDist > distance){
minDist = distance;
predictedVowel = VOWELS[i];
}
}
return predictedVowel;
}
void test(){
files = 0;
printf("\n\nTesting phase started\n");
for(int ithVowel = 0; ithVowel<5; ithVowel++){
individualCorrect = 0;
for(int file=0; file<10; file++){
char fileName[] = "$/190101002_$_1$.txt";
fileName[0] = VOWELS[ithVowel];
fileName[12] = VOWELS[ithVowel];
fileName[15] = file + '0';
init(fileName);
char pred = calculateTokhura();
printf("\nVowel %c is predicted as %c\n", fileName[0], pred);
// printf("Distance from (a, e, i, o, u) is \n(%lf, %lf, %lf, %lf, %lf)\n\n", tokhuraDist[0], tokhuraDist[1], tokhuraDist[2], tokhuraDist[3], tokhuraDist[4]);
totalCorrect += (pred == VOWELS[ithVowel]);
individualCorrect += (pred == VOWELS[ithVowel]);
}
printf("======================= Accuracy of %c is : %.2lf %% \n\n", VOWELS[ithVowel], (individualCorrect/10.0)*100);
}
printf("======================= Overall Accuracy is : %.2lf %% \n", (totalCorrect/50.0)*100);
}
int _tmain(int argc, _TCHAR* argv[]){
// Training using 50 recorded samples and then generate reference files saved in the folder ./gen/
train();
// Testing the rest of the samples for prediction
test();
getch();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment