Skip to content

Instantly share code, notes, and snippets.

@JarrydWannenburg
Last active August 18, 2022 16:10
Show Gist options
  • Save JarrydWannenburg/10db702c011aaaa572938678186dff55 to your computer and use it in GitHub Desktop.
Save JarrydWannenburg/10db702c011aaaa572938678186dff55 to your computer and use it in GitHub Desktop.
House_Prices_Adv_Reg_EDA
import altair as alt
import pandas as pd
# Ignore size limits
alt.data_transformers.enable('default', max_rows=None)
# Create a function that takes the dataset and column name and returns an interactive histogram
def chart(dataset, column_name, target_var):
w = 500
single = alt.selection_single()
# Histogram/barchart
# If categorical don't bin
if (column_name in dataset.select_dtypes(include='object').columns.to_list()):
a = alt.Chart(dataset).mark_bar().encode(
alt.X(column_name + ':N', bin=False),
alt.Y('count()'),
color = alt.condition(single, alt.value('#4c78a8'), alt.value('lightgray')),
tooltip=['count()', alt.Tooltip(column_name, bin=False)]
).add_selection(single).properties(width=w)
else: //# bin if numerical
a = alt.Chart(dataset).mark_bar().encode(
alt.X(column_name + ':Q', bin=True),
alt.Y('count()'),
color = alt.condition(single, alt.value('#4c78a8'), alt.value('lightgray')),
tooltip=['count()', alt.Tooltip(column_name, bin=True)]
).add_selection(single).properties(width=w)
# Where possible, create a boxplot for categorical variables and a scatterplot of numeric variables against target
try:
if (column_name in dataset.select_dtypes(include='object').columns.to_list()):
b = alt.Chart(dataset).mark_boxplot().encode( #, title="Boxplot of " + column_name
alt.X(column_name + ':N'),
alt.Y(target_var),
color = alt.condition(single, alt.value('#4c78a8'), alt.value('lightgray')),
tooltip=[target_var]
).add_selection(single).properties(width=w)
else:
b = alt.Chart(dataset).mark_point().encode(
alt.X(column_name + ':Q'),
alt.Y(target_var),
color = alt.condition(single, alt.value('#4c78a8'), alt.value('lightgray')),
tooltip=[target_var]
).add_selection(single).properties(width=w)
except:
pass
return(a | b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment