Skip to content

Instantly share code, notes, and snippets.

@stormxuwz
Created February 2, 2015 05:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save stormxuwz/f90cca402996c492120e to your computer and use it in GitHub Desktop.
Save stormxuwz/f90cca402996c492120e to your computer and use it in GitHub Desktop.
MFCC for signal
from __future__ import division
import numpy as np;
import matplotlib.pyplot as plt
import warnings
# Ref: http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/
# Ref2:http://python-speech-features.readthedocs.org/en/latest/. However, I checked the library code and found there might be a mistake when using rfft
def preemphasis(signal,coeff=0.95):
return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])
def create_frames(sig,rate,frameSize,overlap=0.5):
sig_size=len(sig)
num_of_overlap=int(frameSize*overlap)
frames=[sig[i:i+frameSize] for i in np.arange(0,sig_size-frameSize,frameSize-num_of_overlap)]
return frames; # Frames is a list
def spectrogram(frames,nfft=None,returnType="mfcc"):
if nfft is None:
nfft=len(frames[0])
window=np.hamming(nfft)
energy_for_frames=[]
for sub_sig in frames:
fft_result=np.fft.rfft(sub_sig*window,nfft)
energy_for_frames.append(np.abs(fft_result))
energy_for_frames=np.array(energy_for_frames)
if returnType=="log":
return np.log(energy_for_frames)
elif returnType=="mfcc":
return energy_for_frames**2/nfft
else:
return energy_for_frames
def mel_filterbank(nfft,rate,nbanks=26,lowfreq=0,highfreq=None):
if highfreq is None:
highfreq=rate/2
def hz2mel(f):
return 1125*np.log(1+f/700)
def mel2hz(m):
return 700*(np.exp(m/1125)-1)
highMel=hz2mel(highfreq)
lowMel=hz2mel(lowfreq)
melPoints=np.linspace(lowMel,highMel,num=nbanks+2)
HzPoints=mel2hz(melPoints)
# print HzPoints
# print rate/nfft
fbins=np.floor(nfft*HzPoints/rate)
#floor([HzPoints/(rate/nfft)]+1)
fbanks=np.zeros((nbanks,nfft/2+1))
# print fbins
for j in range(0,nbanks):
# print j
for i in range(int(fbins[j]),int(fbins[j+1])):
fbanks[j,i]=(i-fbins[j])/(fbins[j+1]-fbins[j])
for i in range(int(fbins[j+1]),int(fbins[j+2])):
fbanks[j,i]=(fbins[j+2]-i)/(fbins[j+2]-fbins[j+1])
return fbanks;
def apply_melBanks(spectrogram_array,fbanks):
print "filterbank size",spectrogram_array.shape,fbanks.shape;
return np.dot(spectrogram_array,np.transpose(fbanks)) # return (num_frame,n_banks)
def myMFCC(sig,rate,nfft):
from scipy.fftpack import dct
frames=create_frames(sig,rate,nfft)
# print frames[0]
spec=spectrogram(frames,nfft)
fbanks=mel_filterbank(nfft,rate)
melBank_energy=apply_melBanks(spec,fbanks)
# print melBank_coeff.shape
with warnings.catch_warnings():
warnings.filterwarnings('error')
try:
log_melBank_energy=np.log(melBank_energy)
except:
print melBank_energy
return dct(log_melBank_energy,type=2,axis=1)[:,:13] # return (n_frame,13) as features
if __name__ == '__main__':
import os
from scipy.io.wavfile import read,write
musicFileRoot="../hw3/cs598ps_3/SpeechMusic/music/"
rate,sig=read(musicFileRoot+"1.wav")
print len(sig)/1024
a=myMFCC(sig,rate,1024)
print a.shape
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment