Skip to content

Instantly share code, notes, and snippets.

@gibsramen
Created January 5, 2017 19:08
Show Gist options
  • Save gibsramen/fea6ade81e587cc910bf72aee1d19e48 to your computer and use it in GitHub Desktop.
Save gibsramen/fea6ade81e587cc910bf72aee1d19e48 to your computer and use it in GitHub Desktop.
library(dplyr)
d <- read.csv('default of credit card clients.csv', header=F, stringsAsFactors=F)
colnames(d) <- unlist(d[2,])
d <- d[-c(1,2),]
colnames(d)[25] <- 'dflt'
d$LIMIT_BAL <- as.numeric(d$LIMIT_BAL)
# MALE VS. FEMALE
# ----------------
# 1 = male, 2 = female
by_sex <- group_by(d, SEX)
summarize(by_sex, median(LIMIT_BAL))
summarize(by_sex, length(dflt[dflt=='1'])/length(SEX))
# HIGH SCHOOL VS. HIGHER
# ----------------------
# ignoring 4, 5, 6
# 1 = grad school, 2 = univ, 3 = high school
d$HE <- ifelse(d$EDUCATION=='1' | d$EDUCATION=='2', 1,
ifelse(d$EDUCATION=='3', 0,
-1))
# 1 = higher education
# 0 = high school
by_he <- group_by(d, HE)
summarize(by_he, median(LIMIT_BAL))
summarize(by_he, length(dflt[dflt=='1'])/length(HE))
# 30-39 VS. 45-55
# ---------------
d$AGE2 <- ifelse(d$AGE>=30 & d$AGE<=39, 0,
ifelse(d$AGE>=45 & d$AGE<=55, 1,
-1))
by_age <- group_by(d, AGE2)
# 1 = 30-39
# 0 = 45-55
summarize(by_age, median(LIMIT_BAL))
summarize(by_age, length(dflt[dflt=='1'])/length(AGE2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment