Skip to content

Instantly share code, notes, and snippets.

@sherbold
Created August 23, 2018 10:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sherbold/570c9399e9bc39dd980d6c2bdbf3b64a to your computer and use it in GitHub Desktop.
Save sherbold/570c9399e9bc39dd980d6c2bdbf3b64a to your computer and use it in GitHub Desktop.
scikit-learn smoke and morph test examples
import unittest
import xmlrunner
import pandas as pd
import numpy as np
import threading
import functools
from scipy.io.arff import loadarff
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
class test_DecisionTree_Default_Scikit(unittest.TestCase):
def test_Const_UNIFORM(self):
for iter in range(1,5+1):
data_original, meta_original = loadarff('morphdata/UNIFORM_%i.arff' % iter)
data_morphed, meta_morphed = loadarff('morphdata/UNIFORM_%i_Const.arff' % iter)
lb_make = LabelEncoder()
data_original_df = pd.DataFrame(data_original)
data_original_df["classAtt"] = lb_make.fit_transform(data_original_df["classAtt"])
data_original_df = pd.get_dummies(data_original_df)
data_morph_df = pd.DataFrame(data_morphed)
data_morph_df["classAtt"] = lb_make.fit_transform(data_morph_df["classAtt"])
data_morph_df = pd.get_dummies(data_morph_df)
class_index_original = -1
for i, s in enumerate(data_original_df.columns):
if 'classAtt' in s:
class_index_original = i
class_index_morph = -1
for i, s in enumerate(data_morph_df.columns):
if 'classAtt' in s:
class_index_morph = i
classifier_original = DecisionTreeClassifier()
np.random.seed(42)
classifier_original.fit(np.delete(data_original_df.values, class_index_original, axis=1),data_original_df.values[:,class_index_original])
classifier_morph = DecisionTreeClassifier()
np.random.seed(42)
classifier_morph.fit(np.delete(data_morph_df.values, class_index_morph, axis=1),data_morph_df.values[:,class_index_morph])
prediction_original = classifier_original.predict(np.delete(data_original_df.values, class_index_original, axis=1))
prediction_morph = classifier_morph.predict(np.delete(data_morph_df.values, class_index_morph, axis=1))
self.assertTrue((prediction_original==prediction_morph).all())
def test_MaxDouble(self):
for iter in range(1,5+1):
data, meta = loadarff('smokedata/MaxDouble_%i_training.arff' % iter)
testdata, testmeta = loadarff('smokedata/MaxDouble_%i_test.arff' % iter)
lb_make = LabelEncoder()
data_df = pd.DataFrame(data)
data_df["classAtt"] = lb_make.fit_transform(data_df["classAtt"])
data_df = pd.get_dummies(data_df)
testdata_df = pd.DataFrame(data)
testdata_df["classAtt"] = lb_make.fit_transform(testdata_df["classAtt"])
testdata_df = pd.get_dummies(testdata_df, sparse=True)
classIndex = -1
for i, s in enumerate(data_df.columns):
if 'classAtt' in s:
classIndex = i
classifier = DecisionTreeClassifier()
np.random.seed(42)
classifier.fit(np.delete(data_df.values, classIndex, axis=1),data_df.values[:,classIndex])
classifier.predict(np.delete(testdata_df.values, classIndex, axis=1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment