Skip to content

Instantly share code, notes, and snippets.

@socratesk
Last active November 12, 2018 02:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save socratesk/b9f6124276bca737b0069fc69b26c5ce to your computer and use it in GitHub Desktop.
Save socratesk/b9f6124276bca737b0069fc69b26c5ce to your computer and use it in GitHub Desktop.
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# Import breast cancer (dataset) object from sklearn library
breast_cancer = datasets.load_breast_cancer()
# Define features need to be extracted from breast cancer (dataset) object
feature_names = ['mean radius', 'mean texture', 'mean perimeter', 'mean area',
'mean smoothness', 'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry', 'mean fractal dimension',
'radius error', 'texture error', 'perimeter error', 'area error',
'smoothness error', 'compactness error', 'concavity error',
'concave points error', 'symmetry error', 'fractal dimension error',
'worst radius', 'worst texture', 'worst perimeter', 'worst area',
'worst smoothness', 'worst compactness', 'worst concavity',
'worst concave points', 'worst symmetry', 'worst fractal dimension']
# Extract breast cancer dataset and create a dataframe out of it
fulldata = pd.DataFrame(breast_cancer.data, columns=feature_names)
# Print the shape of the dataframe
print (fulldata.shape)
>> (569, 30)
# Extract breast cancer dataset's target
target = breast_cancer.target
# Create Training and Test (Hold-out) datasets. Split ratio 75:25
X_train, X_test, y_train, y_test = train_test_split(fulldata, target, test_size=0.25, random_state=111)
print ("Train Data Shape: ", X_train.shape, y_train.shape)
print ("Test Data Shape: ", X_test.shape, y_test.shape)
>> Train Data Shape: (426, 30) (426,)
>> Test Data Shape: (143, 30) (143,)
# Create a Logistic Regression object
logistic_regression = LogisticRegression()
# Train a Logistic Regression model with Train dataset
logistic_regression.fit(X_train, y_train)
# Compute the accuracy score and print it
accuracy_score = logistic_regression.score(X_test, y_test)
print (accuracy_score)
>> 0.951048951049
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment