Skip to content

Instantly share code, notes, and snippets.

@digorithm
Created April 2, 2015 00:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save digorithm/ad742f9314f76e732888 to your computer and use it in GitHub Desktop.
Save digorithm/ad742f9314f76e732888 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
"""
Here we solve the problem of predicting if a wine is white or red
"""
reds = pd.read_csv('winequality-red.csv', sep=';')
whites = pd.read_csv('winequality-white.csv', sep=';')
fig, ax = plt.subplots(figsize=(10, 5))
plt.plot(reds.index, reds.get("fixed acidity"), 'ro')
ax.set_title('Wines vs fixed acidity')
ax.set_xlabel('red wine index')
ax.set_ylabel('Fixed Acidity')
plt.show()
reds['kind'] = 'red'
whites['kind'] = 'white'
wines = reds.append(whites, ignore_index=True)
# getting all feature vectors except the kind, which is the target
X = wines.ix[:, 0:-1]
y = wines.kind
#binarizing labels
y = y.apply(lambda val: 0 if val == 'white' else 1)
clf = LogisticRegression()
scores = cross_val_score(clf, X, y, cv=5)
print scores.mean(), scores.std()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment