Skip to content

Instantly share code, notes, and snippets.

@braz
Last active February 27, 2019 16:47
Python plotting of "Vinho Verde" red wine dataset for linear regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
# URL for the Wine Quality Portuguese "Vinho Verde" red wine dataset (UCI Machine Learning Repository)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
# download the file
try:
import urllib.request
raw_data = urllib.request.urlopen(url)
except ImportError:
import urllib
raw_data = urllib.urlopen(url)
data_names= ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality (0-10)']
# load the CSV file as a numpy matrix
data = pd.read_csv(raw_data, sep=";", header=None, skiprows=1)
data.columns = data_names
# Create linear regression object
regr = linear_model.LinearRegression()
x_data = data['alcohol'].values.reshape(-1,1)
y_data = data['quality (0-10)'].values.reshape(-1,1)
# once the data is reshaped, running the fit is simple
regr.fit(x_data, y_data)
plt.clf()
plt.figure(figsize = (10, 6))
plt.title('Alcohol vs Quality')
plt.xlabel(data_names[10])
plt.ylabel(data_names[11])
plt.scatter(data['alcohol'].values, data['quality (0-10)'].values)
plt.legend()
# Plot the data and the fit for the linear regresssion
plt.plot(x_data, regr.predict(x_data), color='black', linewidth=3)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment