Skip to content

Instantly share code, notes, and snippets.

@gurimusan
Last active July 29, 2018 05:13
Show Gist options
  • Save gurimusan/d7fb4a5c50a7fe553918c3f9a51ef97c to your computer and use it in GitHub Desktop.
Save gurimusan/d7fb4a5c50a7fe553918c3f9a51ef97c to your computer and use it in GitHub Desktop.
ワインの品質を予想する
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split
def outlier_iqr(df):
q1 = df.describe().loc['25%']
q3 = df.describe().loc['75%']
iqr = q3 - q1
outlier_min = q1 - (iqr*1.5)
outlier_max = q3 + (iqr*1.5)
return df[(df >= outlier_min) & (df <= outlier_max)]\
.dropna(how='any', axis=0)
def feature_normalize(df):
return (df - df.mean()) / df.std()
if __name__ == '__main__':
# Download csv
csv_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
winequality_red_data = outlier_iqr(pd.read_csv(csv_url, sep=';'))
X = feature_normalize(
winequality_red_data.loc[:, [
'fixed acidity', 'volatile acidity', 'citric acid',
'residual sugar', 'chlorides', 'free sulfur dioxide',
'total sulfur dioxide', 'density', 'pH', 'sulphates',
'alcohol']]).values
Y = winequality_red_data['quality'].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33)
clf = linear_model.LinearRegression()
clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
print("Score:", clf.score(X_test, Y_test))
plt.scatter(Y_test, Y_pred)
plt.xlabel("True Values")
plt.ylabel("Predictions")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment