Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Plot initial Data
# Ages are more relevant when we bucket them
ds$AgeRanges <- cut(ds$Age,breaks=c(0,30,50,70,90),labels=c("11-30","31-50","51-70","71+"))
# Income ranges are also more interesting when we bin them
ds$IncomeCategories <- cut(x=ds$YearlyIncome,breaks=c(0,20000,50000,70000,100000,250000),labels=c("Low","Lower","Middle","Upper","Wealthy"))
# Helps with building the plots
ds$Count <- 1
features <- c("AgeRanges","MaritalStatus","Gender",
"IncomeCategories","TotalChildren","NumberChildrenAtHome",
"Education","Occupation" ,"IsHomeOwner",
"NumberCarsOwned","YearsSinceFirstPurchase","CommuteDistance",
"CountryRegionCode")
plts <- lapply(features,FUN=function(f) {
# Take only the columns used for the plots
df <- ds[,c(f,"Count","IsCardUser")]
# Standardize the name of the feature column in order to
# make the function more generic
colnames(df)[1] <- "Feature"
ggplot(df) +
aes(y=Count,x=Feature,fill=Feature) +
geom_col() +
xlab(f) +
coord_flip() +
facet_wrap(~IsCardUser,ncol = 2) +
theme(legend.position="none")
})
multiplot(plts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.