Skip to content

Instantly share code, notes, and snippets.

@betterdatascience
betterdatascience / ridgelines.py
Created December 1, 2020 08:36
004_ridgeline_plots
plt.figure()
joyplot(
data=sydney[['MaxTemp', 'Month']],
by='Month',
figsize=(12, 8)
)
plt.title('Ridgeline Plot of Max Temperatures in Sydney', fontsize=20)
plt.show()
@betterdatascience
betterdatascience / ridgelines.py
Created December 1, 2020 08:26
003_ridgeline_plots
from pandas.api.types import CategoricalDtype
cat_month = CategoricalDtype(
['January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
)
sydney['Month'] = sydney['Month'].astype(cat_month)
sydney.dtypes
@betterdatascience
betterdatascience / ridgelines.py
Created December 1, 2020 08:21
002_ridgeline_plots
sydney = df.query("Location == 'Sydney'")
sydney = sydney.drop('Location', axis=1)
sydney['Date'] = sydney['Date'].astype('datetime64')
sydney['Month'] = sydney['Date'].dt.month_name()
sydney.head()
@betterdatascience
betterdatascience / ridgelines.py
Created December 1, 2020 08:13
001_ridgeline_plots
import pandas as pd
import matplotlib.pyplot as plt
from joypy import joyplot
df = pd.read_csv('weatherAUS.csv', usecols=['Date', 'Location', 'MinTemp', 'MaxTemp'])
df.head()
exp = explainer.explain_instance(
data_row=X_test.iloc[4],
predict_fn=model.predict_proba
)
exp.show_in_notebook(show_table=True)
exp = explainer.explain_instance(
data_row=X_test.iloc[1],
predict_fn=model.predict_proba
)
exp.show_in_notebook(show_table=True)
import lime
from lime import lime_tabular
explainer = lime_tabular.LimeTabularExplainer(
training_data=np.array(X_train),
feature_names=X_train.columns,
class_names=['bad', 'good'],
mode='classification'
)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
from sklearn.model_selection import train_test_split
X = wine.drop('quality', axis=1)
y = wine['quality']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
import numpy as np
import pandas as pd
wine = pd.read_csv('wine.csv')
wine.head()