Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
"""
Here we solve the problem of predicting if a wine is white or red
"""
reds = pd.read_csv('winequality-red.csv', sep=';')
whites = pd.read_csv('winequality-white.csv', sep=';')
fig, ax = plt.subplots(figsize=(10, 5))
plt.plot(reds.index, reds.get("fixed acidity"), 'ro')
ax.set_title('Wines vs fixed acidity')
ax.set_xlabel('red wine index')
ax.set_ylabel('Fixed Acidity')
plt.show()
reds['kind'] = 'red'
whites['kind'] = 'white'
wines = reds.append(whites, ignore_index=True)
# getting all feature vectors except the kind, which is the target
X = wines.ix[:, 0:-1]
y = wines.kind
#binarizing labels
y = y.apply(lambda val: 0 if val == 'white' else 1)
clf = LogisticRegression()
scores = cross_val_score(clf, X, y, cv=5)
print scores.mean(), scores.std()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.