joshdorrington/data_type_test.py

## data_type_test.py
import numpy as np
import pandas as pd
from pomegranate import *

#IMPORT THE SAME DATA FROM 2 FILE FORMATS USING 2 METHODS
pd_data_df=pd.read_csv("pd_input_file.txt",engine="python", delimiter="\\t",names=['x1','x2','x3','x4','x5','x6'])
pd_data=pd_data_df.values.reshape([1,200001,6])
np_data=np.fromfile("np_input_file.txt").reshape([1,200001,6])

#IF ALL 3 OF THESE STATEMENTS ARE TRUE THEN THE DATA SHOULD BE TOTALLY INDISTINGUISHABLE FROM EACH OTHER:
if pd_data[pd_data!=np_data].size==0:
    print("the datasets are the same")

if type(pd_data)==type(np_data):
    print("array type is the same")

if type(pd_data[0][0][0])==type(np_data[0][0][0]):
    print("data type is the same")

##RUN THE MODEL ON THE DATA EXTRACTED FROM THE .CSV
pd_gaussmodel=HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components=3, X=pd_data, verbose=True)
for i in range(0,3):
    print(pd_gaussmodel.states[i].distribution.mu)

print("\n\n\n")

##RUN THE MODEL ON THE DATA EXTRACTED FROM THE .NP
np_gaussmodel=HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components=3, X=np_data, verbose=True)
for i in range(0,3):
    print(np_gaussmodel.states[i].distribution.mu)
	import numpy as np
	import pandas as pd
	from pomegranate import *

	#IMPORT THE SAME DATA FROM 2 FILE FORMATS USING 2 METHODS
	pd_data_df=pd.read_csv("pd_input_file.txt",engine="python", delimiter="\\t",names=['x1','x2','x3','x4','x5','x6'])
	pd_data=pd_data_df.values.reshape([1,200001,6])
	np_data=np.fromfile("np_input_file.txt").reshape([1,200001,6])

	#IF ALL 3 OF THESE STATEMENTS ARE TRUE THEN THE DATA SHOULD BE TOTALLY INDISTINGUISHABLE FROM EACH OTHER:
	if pd_data[pd_data!=np_data].size==0:
	print("the datasets are the same")

	if type(pd_data)==type(np_data):
	print("array type is the same")

	if type(pd_data[0][0][0])==type(np_data[0][0][0]):
	print("data type is the same")

	##RUN THE MODEL ON THE DATA EXTRACTED FROM THE .CSV
	pd_gaussmodel=HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components=3, X=pd_data, verbose=True)
	for i in range(0,3):
	print(pd_gaussmodel.states[i].distribution.mu)

	print("\n\n\n")

	##RUN THE MODEL ON THE DATA EXTRACTED FROM THE .NP
	np_gaussmodel=HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, n_components=3, X=np_data, verbose=True)
	for i in range(0,3):
	print(np_gaussmodel.states[i].distribution.mu)