Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
"""
Here we solve the problem of predicting if a wine is white or red
"""
reds = pd.read_csv('winequality-red.csv', sep=';')
whites = pd.read_csv('winequality-white.csv', sep=';')
fig, ax = plt.subplots(figsize=(10, 5))
plt.plot(reds.index, reds.get("fixed acidity"), 'ro')
ax.set_title('Wines vs fixed acidity')
ax.set_xlabel('red wine index')
ax.set_ylabel('Fixed Acidity')
plt.show()
reds['kind'] = 'red'
whites['kind'] = 'white'
wines = reds.append(whites, ignore_index=True)
# getting all feature vectors except the kind, which is the target
X = wines.ix[:, 0:-1]
y = wines.kind
#binarizing labels
y = y.apply(lambda val: 0 if val == 'white' else 1)
clf = LogisticRegression()
scores = cross_val_score(clf, X, y, cv=5)
print scores.mean(), scores.std()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.