Skip to content

Instantly share code, notes, and snippets.

@joshdorrington
Created March 23, 2018 15:00
Show Gist options
  • Save joshdorrington/e5a1fc3c651ee6b2d5812f31e13903c4 to your computer and use it in GitHub Desktop.
Save joshdorrington/e5a1fc3c651ee6b2d5812f31e13903c4 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from pomegranate import *
#IMPORT THE SAME DATA FROM 2 FILE FORMATS USING 2 METHODS
pd_data_df=pd.read_csv("pd_input_file.txt",engine="python", delimiter="\\t",names=['x1','x2','x3','x4','x5','x6'])
pd_data=pd_data_df.values.reshape([1,200001,6])
np_data=np.fromfile("np_input_file.txt").reshape([1,200001,6])
#IF ALL 3 OF THESE STATEMENTS ARE TRUE THEN THE DATA SHOULD BE TOTALLY INDISTINGUISHABLE FROM EACH OTHER:
if pd_data[pd_data!=np_data].size==0:
print("the datasets are the same")
if type(pd_data)==type(np_data):
print("array type is the same")
if type(pd_data[0][0][0])==type(np_data[0][0][0]):
print("data type is the same")
##RUN THE MODEL ON THE DATA EXTRACTED FROM THE .CSV
pd_gaussmodel=HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components=3, X=pd_data, verbose=True)
for i in range(0,3):
print(pd_gaussmodel.states[i].distribution.mu)
print("\n\n\n")
##RUN THE MODEL ON THE DATA EXTRACTED FROM THE .NP
np_gaussmodel=HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components=3, X=np_data, verbose=True)
for i in range(0,3):
print(np_gaussmodel.states[i].distribution.mu)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment