Skip to content

Instantly share code, notes, and snippets.

@jmsword
Created February 11, 2017 21:41
Show Gist options
  • Save jmsword/661995b81a31a5b6a561d97b43c577fa to your computer and use it in GitHub Desktop.
Save jmsword/661995b81a31a5b6a561d97b43c577fa to your computer and use it in GitHub Desktop.
Naive Bayes
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
#I kept getting this error 'pandas.io.common.CParserError: Error tokenizing data. C error: Expected 1 fields in line 104, saw 3'
#when trying to read in the data from GitHub so I just copied the data into a csv file and saved it locally
df = pd.read_csv('ideal_weight.csv')
#Remove single '' from coulmn names
df.rename(columns=lambda x: x.replace("'", ""), inplace=True)
#Remove single quotes from sex column
df['sex'] = df['sex'].map(lambda x: x.replace("'", ""))
#Plot histogram of actual & ideal weight
plt.figure()
a = df['actual'].hist()
i = df['ideal'].hist()
plt.show()
#Plot histogram of difference in weight
plt.figure()
d = df['diff'].hist()
plt.show()
#Make sex a categorical variable
df['sex'] = pd.Categorical(df['sex']).codes
#Check to see if there are more females than males in the data
print(df.groupby('sex').describe())
#Create training & testing data
train_set = int(len(df) * 0.7)
train = df[:train_set]
test = df[train_set:]
#Create variables to fit into classifier model
train_target = train['sex']
train_data = train.ix[:,2:]
#Classifier
clf = GaussianNB()
#Fit into model
clf.fit(train_data, train_target)
#Create variables to predict: 0 = female, 1 = male
test_target = test['sex']
test_data = test.ix[:,2:]
pred = clf.predict(test_data)
#Make first prediction
print(clf.predict([145,160,-15]))
#Make second prediction
print(clf.predict([160,145,15]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment