This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# THIS SCRIPT USES THE LIBRARY AT: | |
# https://github.com/hzeller/rpi-rgb-led-matrix | |
# BE SURE TO CLONE IT AND READ THE README, as highlighted in the video :) | |
import os, time, threading, random | |
import feedparser | |
from PIL import Image, ImageFont, ImageDraw | |
from random import shuffle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
%matplotlib inline | |
# Import the data using the file path | |
data = pd.read_csv('Ames_Housing_Sales.csv') | |
df = data.copy() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get a Pd.Series consisting of all the string categoricals | |
one_hot_encode_cols = data.dtypes[data.dtypes == object] # filtering by string categoricals | |
one_hot_encode_cols = one_hot_encode_cols.index.tolist() # list of categorical fields | |
# Encode these columns as categoricals so one hot encoding works on split data (if desired) | |
for col in one_hot_encode_cols: | |
data[col] = pd.Categorical(data[col]) | |
# Do the one hot encoding | |
data = pd.get_dummies(data, columns=one_hot_encode_cols, drop_first=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression | |
from sklearn.preprocessing import StandardScaler | |
lr = LinearRegression() | |
y_s_col = "SalePrice" | |
X_s = data.drop(y_s_col, axis=1) | |
y_s = data[y_s_col] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a list of float colums to check for skewing | |
mask = df.dtypes == float | |
float_cols = df.columns[mask] | |
skew_limit = 0.75 # define a limit above which we will log transform | |
skew_vals = df[float_cols].skew() | |
# Showing the skewed columns | |
skew_cols = (skew_vals | |
.sort_values(ascending=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
y_col = 'SalePrice' | |
# Split the data that is one-hot encoded | |
feature_cols = [x for x in data_ohc.columns if x != y_col] | |
X_data_ohc = data_ohc[feature_cols] | |
y_data_ohc = data_ohc[y_col] | |
X_train_ohc, X_test_ohc, y_train_ohc, y_test_ohc = train_test_split(X_data_ohc, y_data_ohc, | |
test_size=0.3, random_state=42) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_squared_error | |
LR = LinearRegression() | |
# Data that have been one-hot encoded | |
LR = LR.fit(X_train_ohc, y_train_ohc) | |
y_train_ohc_pred = LR.predict(X_train_ohc) | |
y_test_ohc_pred = LR.predict(X_test_ohc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(12,5)) | |
sns.set_context('talk') | |
sns.set_style('ticks') | |
sns.set_palette('dark') | |
ax = plt.axes() | |
# we are going to use y_test_ohc, y_test_ohc_pred | |
ax.scatter(y_test_ohc, y_test_ohc_pred, alpha=.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import StandardScaler, PolynomialFeatures | |
from sklearn.model_selection import KFold, cross_val_predict | |
from sklearn.linear_model import LinearRegression, Lasso, Ridge | |
from sklearn.metrics import r2_score | |
from sklearn.pipeline import Pipeline | |
kf = KFold(shuffle=True, random_state=72018, n_splits=3) | |
X = data.drop('SalePrice', axis=1) | |
y = data.SalePrice |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import RidgeCV | |
from sklearn.metrics import mean_squared_error | |
# root-mean-squared error function | |
def rmse(ytrue, ypredicted): | |
return np.sqrt(mean_squared_error(ytrue, ypredicted)) | |
alphas = [0.005, 0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 80] | |
ridgeCV = RidgeCV(alphas=alphas, |
OlderNewer