Plot initial Data
# Ages are more relevant when we bucket them | |
ds$AgeRanges <- cut(ds$Age,breaks=c(0,30,50,70,90),labels=c("11-30","31-50","51-70","71+")) | |
# Income ranges are also more interesting when we bin them | |
ds$IncomeCategories <- cut(x=ds$YearlyIncome,breaks=c(0,20000,50000,70000,100000,250000),labels=c("Low","Lower","Middle","Upper","Wealthy")) | |
# Helps with building the plots | |
ds$Count <- 1 | |
features <- c("AgeRanges","MaritalStatus","Gender", | |
"IncomeCategories","TotalChildren","NumberChildrenAtHome", | |
"Education","Occupation" ,"IsHomeOwner", | |
"NumberCarsOwned","YearsSinceFirstPurchase","CommuteDistance", | |
"CountryRegionCode") | |
plts <- lapply(features,FUN=function(f) { | |
# Take only the columns used for the plots | |
df <- ds[,c(f,"Count","IsCardUser")] | |
# Standardize the name of the feature column in order to | |
# make the function more generic | |
colnames(df)[1] <- "Feature" | |
ggplot(df) + | |
aes(y=Count,x=Feature,fill=Feature) + | |
geom_col() + | |
xlab(f) + | |
coord_flip() + | |
facet_wrap(~IsCardUser,ncol = 2) + | |
theme(legend.position="none") | |
}) | |
multiplot(plts) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment