Skip to content

Instantly share code, notes, and snippets.

View rohanjoseph93's full-sized avatar

Rohan Joseph rohanjoseph93

View GitHub Profile
#Grid Search
from sklearn.model_selection import GridSearchCV
clf = LogisticRegression()
grid_values = {'penalty': ['l1', 'l2'],'C':[0.001,.009,0.01,.09,1,5,10,25]}
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,scoring = 'recall')
grid_clf_acc.fit(X_train, y_train)
#Predict values based on new parameters
y_pred_acc = grid_clf_acc.predict(X_test)
#Logistic regression
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression().fit(X_train,y_train)
y_pred = clf.predict(X_test)
# Model Evaluation metrics
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
print('Precision Score : ' + str(precision_score(y_test,y_pred)))
# Model Evaluation metrics
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
print('Precision Score : ' + str(precision_score(y_test,y_pred)))
print('Recall Score : ' + str(recall_score(y_test,y_pred)))
print('F1 Score : ' + str(f1_score(y_test,y_pred)))
#Dummy Classifier Confusion matrix
from sklearn.metrics import confusion_matrix
print('Confusion Matrix : \n' + str(confusion_matrix(y_test,y_pred)))
#Split data into attributes and class
X = data.drop(['Class'],axis=1)
y = data['Class']
#perform training and test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
#Dummy Classifier
from sklearn.dummy import DummyClassifier
data = data.drop(['Sample Code Number'],axis=1) #Drop 1st column
data = data[data['Bare Nuclei'] != '?'] #Remove rows with missing data
data['Class'] = np.where(data['Class'] ==2,0,1) #Change the Class representation
data['Class'].value_counts() #Class distribution
#import data
data = pd.read_csv('breast-cancer-wisconsin.csv',header=None)
#set column names
data.columns = ['Sample Code Number','Clump Thickness','Uniformity of Cell Size',
'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',
'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']
#view top 10 rows
data.head(10)
#import libraries
import pandas as pd
from scipy import stats
import os
os.chdir('C:\\Users\\rohan\\Documents\\Analytics\\Data')
#import file and apply one sample t test
a = pd.read_excel('onesamplet.xlsx')
stats.ttest_1samp(a,14)
a.save('C:/Users/rohan/Documents/clt2.gif', writer='imagemagick', fps=10)
# Function that will plot the histogram, where current is the latest figure
def clt(current):
# if animation is at the last frame, stop it
plt.cla()
if current == 1000:
a.event_source.stop()
plt.hist(avg[0:current])
plt.gca().set_title('Expected value of die rolls')
# Function that will plot the histogram, where current is the latest figure
def clt(current):
# if animation is at the last frame, stop it
plt.cla()
if current == 1000:
a.event_source.stop()
plt.hist(avg[0:current])
plt.gca().set_title('Expected value of die rolls')