Skip to content

Instantly share code, notes, and snippets.

@iprocheta
Created August 28, 2017 06:01
Show Gist options
  • Save iprocheta/4cdcf35fe39224047bf1f1a6c3c3e14a to your computer and use it in GitHub Desktop.
Save iprocheta/4cdcf35fe39224047bf1f1a6c3c3e14a to your computer and use it in GitHub Desktop.
python code
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
from sklearn import tree
from sklearn.metrics import roc_auc_score
balance_data = pd.read_csv(r"E:\Ranking-test.csv",sep=',',header=None)
balance_data.columns = ['class', 'age','gender','hypertension','hypertension medicine years','diabetes','diabetes medicine years',
'cholesterol','cholesterol medicine years','smoking','family history','chest pain','symptoms','chest pain location',
'chest pain mark','pain going','association','persistence','subsided','pain type','similiar pain before',
'after drinking water','after having food','by chest movement']
X = balance_data.values[:, 1:5]
Y = balance_data.values[:,0]
X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)
clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100,
max_depth=3, min_samples_leaf=5)
clf_entropy.fit(X_train, y_train)
y_pred_en = clf_entropy.predict(X_test)
y_pred_en
print ("Accuracy is ", accuracy_score(y_test,y_pred_en)*100)
print ("roc is ",roc_auc_score(y_test, y_pred_en, average=None))
with open(r"E:\clf_entropy.txt", "w") as f:
f = tree.export_graphviz(clf_entropy, out_file=f)
print(clf_entropy.get_params(deep=True))
@manashmandal
Copy link

import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
from sklearn import tree
from sklearn.metrics import roc_auc_score

balance_data = pd.read_csv("Ranking-test (1).csv",sep=',')
balance_data['class'].replace({99 : 1, 100 : 0}, inplace=True)

X = balance_data[balance_data.columns[1:]].values
Y = balance_data['class'].values

X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)

clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100,
 max_depth=3, min_samples_leaf=5)

clf_entropy.fit(X_train, y_train)

y_pred_en = clf_entropy.predict(X_test)

print ("Accuracy is ", accuracy_score(y_test, y_pred_en)*100)
print ("roc is ",roc_auc_score(y_test, y_pred_en, average=None))

print(clf_entropy.get_params(deep=True))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment