Instantly share code, notes, and snippets.

# kastnerkyle/gmmhmm.py Last active May 20, 2018

What would you like to do?
GMM-HMM (Hidden markov model with Gaussian mixture emissions) implementation for speech recognition and other uses
 # (C) Kyle Kastner, June 2014 # License: BSD 3 clause import scipy.stats as st import numpy as np class gmmhmm: #This class converted with modifications from https://code.google.com/p/hmm-speech-recognition/source/browse/Word.m def __init__(self, n_states): self.n_states = n_states self.random_state = np.random.RandomState(0) #Normalize random initial state self.prior = self._normalize(self.random_state.rand(self.n_states, 1)) self.A = self._stochasticize(self.random_state.rand(self.n_states, self.n_states)) self.mu = None self.covs = None self.n_dims = None def _forward(self, B): log_likelihood = 0. T = B.shape[1] alpha = np.zeros(B.shape) for t in range(T): if t == 0: alpha[:, t] = B[:, t] * self.prior.ravel() else: alpha[:, t] = B[:, t] * np.dot(self.A.T, alpha[:, t - 1]) alpha_sum = np.sum(alpha[:, t]) alpha[:, t] /= alpha_sum log_likelihood = log_likelihood + np.log(alpha_sum) return log_likelihood, alpha def _backward(self, B): T = B.shape[1] beta = np.zeros(B.shape); beta[:, -1] = np.ones(B.shape[0]) for t in range(T - 1)[::-1]: beta[:, t] = np.dot(self.A, (B[:, t + 1] * beta[:, t + 1])) beta[:, t] /= np.sum(beta[:, t]) return beta def _state_likelihood(self, obs): obs = np.atleast_2d(obs) B = np.zeros((self.n_states, obs.shape[1])) for s in range(self.n_states): #Needs scipy 0.14 B[s, :] = st.multivariate_normal.pdf(obs.T, mean=self.mu[:, s].T, cov=self.covs[:, :, s].T) #This function can (and will!) return values >> 1 #See the discussion here for the equivalent matlab function #https://groups.google.com/forum/#!topic/comp.soft-sys.matlab/YksWK0T74Ak #Key line: "Probabilities have to be less than 1, #Densities can be anything, even infinite (at individual points)." #This is evaluating the density at individual points... return B def _normalize(self, x): return (x + (x == 0)) / np.sum(x) def _stochasticize(self, x): return (x + (x == 0)) / np.sum(x, axis=1) def _em_init(self, obs): #Using this _em_init function allows for less required constructor args if self.n_dims is None: self.n_dims = obs.shape[0] if self.mu is None: subset = self.random_state.choice(np.arange(self.n_dims), size=self.n_states, replace=False) self.mu = obs[:, subset] if self.covs is None: self.covs = np.zeros((self.n_dims, self.n_dims, self.n_states)) self.covs += np.diag(np.diag(np.cov(obs)))[:, :, None] return self def _em_step(self, obs): obs = np.atleast_2d(obs) B = self._state_likelihood(obs) T = obs.shape[1] log_likelihood, alpha = self._forward(B) beta = self._backward(B) xi_sum = np.zeros((self.n_states, self.n_states)) gamma = np.zeros((self.n_states, T)) for t in range(T - 1): partial_sum = self.A * np.dot(alpha[:, t], (beta[:, t] * B[:, t + 1]).T) xi_sum += self._normalize(partial_sum) partial_g = alpha[:, t] * beta[:, t] gamma[:, t] = self._normalize(partial_g) partial_g = alpha[:, -1] * beta[:, -1] gamma[:, -1] = self._normalize(partial_g) expected_prior = gamma[:, 0] expected_A = self._stochasticize(xi_sum) expected_mu = np.zeros((self.n_dims, self.n_states)) expected_covs = np.zeros((self.n_dims, self.n_dims, self.n_states)) gamma_state_sum = np.sum(gamma, axis=1) #Set zeros to 1 before dividing gamma_state_sum = gamma_state_sum + (gamma_state_sum == 0) for s in range(self.n_states): gamma_obs = obs * gamma[s, :] expected_mu[:, s] = np.sum(gamma_obs, axis=1) / gamma_state_sum[s] partial_covs = np.dot(gamma_obs, obs.T) / gamma_state_sum[s] - np.dot(expected_mu[:, s], expected_mu[:, s].T) #Symmetrize partial_covs = np.triu(partial_covs) + np.triu(partial_covs).T - np.diag(partial_covs) #Ensure positive semidefinite by adding diagonal loading expected_covs += .01 * np.eye(self.n_dims)[:, :, None] self.prior = expected_prior self.mu = expected_mu self.covs = expected_covs self.A = expected_A return log_likelihood def fit(self, obs, n_iter=15): #Support for 2D and 3D arrays #2D should be n_features, n_dims #3D should be n_examples, n_features, n_dims #For example, with 6 features per speech segment, 105 different words #this array should be size #(105, 6, X) where X is the number of frames with features extracted #For a single example file, the array should be size (6, X) if len(obs.shape) == 2: for i in range(n_iter): self._em_init(obs) log_likelihood = self._em_step(obs) elif len(obs.shape) == 3: count = obs.shape[0] for n in range(count): for i in range(n_iter): self._em_init(obs[n, :, :]) log_likelihood = self._em_step(obs[n, :, :]) return self def transform(self, obs): #Support for 2D and 3D arrays #2D should be n_features, n_dims #3D should be n_examples, n_features, n_dims #For example, with 6 features per speech segment, 105 different words #this array should be size #(105, 6, X) where X is the number of frames with features extracted #For a single example file, the array should be size (6, X) if len(obs.shape) == 2: B = self._state_likelihood(obs) log_likelihood, _ = self._forward(B) return log_likelihood elif len(obs.shape) == 3: count = obs.shape[0] out = np.zeros((count,)) for n in range(count): B = self._state_likelihood(obs[n, :, :]) log_likelihood, _ = self._forward(B) out[n] = log_likelihood return out if __name__ == "__main__": rstate = np.random.RandomState(0) t1 = np.ones((4, 40)) + .001 * rstate.rand(4, 40) t1 /= t1.sum(axis=0) t2 = rstate.rand(*t1.shape) t2 /= t2.sum(axis=0) m1 = gmmhmm(2) m1.fit(t1) m2 = gmmhmm(2) m2.fit(t2) m1t1 = m1.transform(t1) m2t1 = m2.transform(t1) print "Likelihoods for test set 1" print "M1:",m1t1 print "M2:",m2t1 print "Prediction for test set 1" print "Model", np.argmax([m1t1, m2t1]) + 1 print m1t2 = m1.transform(t2) m2t2 = m2.transform(t2) print "Likelihoods for test set 2" print "M1:",m1t2 print "M2:",m2t2 print "Prediction for test set 2" print "Model", np.argmax([m1t2, m2t2]) + 1

### aspats commented Feb 17, 2015

 Hi kastnerkyle, I am really intresting to find good example of GMM-HMM for for speech recognition. I wanted to use your code to test matlab example https://code.google.com/p/hmm-speech-recognition/source/browse/Word.m from which you made python code. So I tried to use matlab project audio files. But I found that in your python code could be mistake: partial_covs = np.triu(partial_covs) + np.triu(partial_covs).T - np.diag(partial_covs) should be expected_covs[:,:, s] = np.triu(partial_covs) + np.triu(partial_covs).T - np.diag(partial_covs) With real audio files where are in matlab project I tried to make work it with your python code and I couldnt make it work. Your example are working with generated numbers, but with real audio didnt work. So when I found that problem which I describe before after that I get a new one error: "ValueError: the input matrix must be positive semidefinite" And it is come from def _state_likelihood(self, obs) function.
Owner

### kastnerkyle commented Apr 14, 2016

 Hi, I didn't have issues with this code here: http://kastnerkyle.github.io/posts/single-speaker-word-recognition-with-hidden-markov-models/ and it was using the same files from that MATLAB demo. Can you describe a bit more what you did? One thing you can try is adding a small positive constant to the diagonal of the covariance in the _state_likelihood - that often helps with these small numerical issues.

### kbagalo commented Oct 20, 2016• edited Edited 1 time kbagalo edited Oct 20, 2016

 Hello Kastnerkyle, I am trying to implement the example you have given, (apple-banana-pineapple,,,) using the hmmlearn python module. I am unable to use the model.fit(X) command properly, as I can't make sense of what X should be like. (I have understood what it is in your implementation). Could you please guide me in this case? Any response would be greatly appreciated.

### yvanlucas commented Oct 27, 2016

@kbagalo

You might want to look at the help of HMMLEARN for this purpose: https://github.com/hmmlearn/hmmlearn/blob/master/doc/tutorial.rst

## Just copy paste of the thing you need (if I understood well):

Working with multiple sequences

All of the examples so far were using a single sequence of observations. The input format in the case of multiple sequences is a bit involved and is best understood by example.

Consider two 1D sequences:

X1 = [[0.5], [1.0], [-1.0], [0.42], [0.24]]
X2 = [[2.4], [4.2], [0.5], [-0.24]]

To pass both sequences to :meth:`~base._BaseHMM.fit` or :meth:`~base._BaseHMM.predict` first concatenate them into a single array and then compute an array of sequence lengths:

X = np.concatenate([X1, X2])
lengths = [len(X1), len(X2)]

Finally just call the desired method with X and lengths:

hmm.GaussianHMM(n_components=3).fit(X, lengths) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
GaussianHMM(algorithm='viterbi', ...

### iichangle commented Apr 28, 2017• edited Edited 1 time iichangle edited Apr 28, 2017

 Hi kastnerkyle, I found that the python code above is a GaussianHMM instead of a GMMHMM as the emission distribution for one dimension has only one center, so there is no "mixture". I wonder if it is right. Thank you.

### veydan commented Jul 9, 2017

 Hi, I think this is NOT a GMM-HMM, the code has no "mixture" of multiple Gaussian components for each state, but rather a multivariate Gaussian for each state. Thanks.

### Rapternmn commented Nov 23, 2017

 I Think there is something wrong with this declaration partial_sum = self.A * np.dot(alpha[:, t], (beta[:, t] * B[:, t + 1]).T) I think it should be partial_sum = self.A * np.dot(alpha[:, t], (beta[:, t+1] * B[:, t + 1]).T) Please tell me what am I missing if I am wrong with this declaration.

### huangteng1220 commented Feb 2, 2018

I am new to statistical analysis. I will give a detailed description of my problem as follows: I have a 4D data set as follows:

ObjectID The number feature of every Object The length of every feature Types
1 100 64 1
2 100 64 1
3 100 64 2
4 100 64 3
··· ···· ·····

The shape likes (objID, num_feature of every Object, len_feature, Types),e.g.（274500，100，64，10）. How should I use hmmlearn or pomegranate HMM for recognition on these 4D data?