Skip to content

Instantly share code, notes, and snippets.

@sherbold
Created August 23, 2018 10:21
scikit-learn smoke and morph test examples
import unittest
import xmlrunner
import pandas as pd
import numpy as np
import threading
import functools
from scipy.io.arff import loadarff
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
class test_DecisionTree_Default_Scikit(unittest.TestCase):
def test_Const_UNIFORM(self):
for iter in range(1,5+1):
data_original, meta_original = loadarff('morphdata/UNIFORM_%i.arff' % iter)
data_morphed, meta_morphed = loadarff('morphdata/UNIFORM_%i_Const.arff' % iter)
lb_make = LabelEncoder()
data_original_df = pd.DataFrame(data_original)
data_original_df["classAtt"] = lb_make.fit_transform(data_original_df["classAtt"])
data_original_df = pd.get_dummies(data_original_df)
data_morph_df = pd.DataFrame(data_morphed)
data_morph_df["classAtt"] = lb_make.fit_transform(data_morph_df["classAtt"])
data_morph_df = pd.get_dummies(data_morph_df)
class_index_original = -1
for i, s in enumerate(data_original_df.columns):
if 'classAtt' in s:
class_index_original = i
class_index_morph = -1
for i, s in enumerate(data_morph_df.columns):
if 'classAtt' in s:
class_index_morph = i
classifier_original = DecisionTreeClassifier()
np.random.seed(42)
classifier_original.fit(np.delete(data_original_df.values, class_index_original, axis=1),data_original_df.values[:,class_index_original])
classifier_morph = DecisionTreeClassifier()
np.random.seed(42)
classifier_morph.fit(np.delete(data_morph_df.values, class_index_morph, axis=1),data_morph_df.values[:,class_index_morph])
prediction_original = classifier_original.predict(np.delete(data_original_df.values, class_index_original, axis=1))
prediction_morph = classifier_morph.predict(np.delete(data_morph_df.values, class_index_morph, axis=1))
self.assertTrue((prediction_original==prediction_morph).all())
def test_MaxDouble(self):
for iter in range(1,5+1):
data, meta = loadarff('smokedata/MaxDouble_%i_training.arff' % iter)
testdata, testmeta = loadarff('smokedata/MaxDouble_%i_test.arff' % iter)
lb_make = LabelEncoder()
data_df = pd.DataFrame(data)
data_df["classAtt"] = lb_make.fit_transform(data_df["classAtt"])
data_df = pd.get_dummies(data_df)
testdata_df = pd.DataFrame(data)
testdata_df["classAtt"] = lb_make.fit_transform(testdata_df["classAtt"])
testdata_df = pd.get_dummies(testdata_df, sparse=True)
classIndex = -1
for i, s in enumerate(data_df.columns):
if 'classAtt' in s:
classIndex = i
classifier = DecisionTreeClassifier()
np.random.seed(42)
classifier.fit(np.delete(data_df.values, classIndex, axis=1),data_df.values[:,classIndex])
classifier.predict(np.delete(testdata_df.values, classIndex, axis=1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment