Skip to content

Instantly share code, notes, and snippets.

@greimel
Created April 22, 2020 09:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greimel/76ce72f6c95153b88dfc329f55dfec9c to your computer and use it in GitHub Desktop.
Save greimel/76ce72f6c95153b88dfc329f55dfec9c to your computer and use it in GitHub Desktop.
cleaning SCF
library("haven")
library("data.table")
#library("ggplot2")
wd <- getwd()
#setwd("scf-data")
#setwd(wd)
### SCF 1989--2016
scf89 <- read_dta("raw-data-2018/rscfp1989.dta")
scf92 <- read_dta("raw-data-2018/rscfp1992.dta")
scf95 <- read_dta("raw-data-2018/rscfp1995.dta")
scf98 <- read_dta("raw-data-2018/rscfp1998.dta")
scf01 <- read_dta("raw-data-2018/rscfp2001.dta")
scf04 <- read_dta("raw-data-2018/rscfp2004.dta")
scf07 <- read_dta("raw-data-2018/rscfp2007.dta")
scf10 <- read_dta("raw-data-2018/rscfp2010.dta")
scf13 <- read_dta("raw-data-2018/rscfp2013.dta")
scf16 <- read_dta("raw-data-2018/rscfp2016.dta")
### SCF 1983
scf83b <- data.table(read_dta("raw-data-2018/scf83b.dta"))
old = c("b3305", "b3320", "v1", "b3005", "b3201")
new = c("asset", "debt", "Y1", "wgt", "income")
old2 = c("b3705", "b3708", "b4001", "b3802", "b3602")
new2 = c("monthly rent", "houses", "NH_MORT", "other mortgages", "land contracts/notes")
old3 <- c("b3814", "b3831", "b3848", "b3815", "b3832", "b3849")
new3 <- c("paym1", "paym2", "paym3", "freq1", "freq2", "freq3")
## original mortgage, year purchased, purchase price, available
old4 <- c("b3101", "b3102", "b3104", "b3109", "b3112", "b3108", "b3107")
new4 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away")
setnames(scf83b, old, new)
setnames(scf83b, old2, new2)
setnames(scf83b, old3, new3)
setnames(scf83b, old4, new4)
scf83b[, resdbt := `other mortgages` + `land contracts/notes`]
## frequency: 5 ... monthly, 6 ... annually
scf83b[, PAYMORT1 := 0]
scf83b[freq1 == 5, PAYMORT1 := paym1]
scf83b[freq1 == 6, PAYMORT1 := paym1 / 12]
scf83b[, PAYMORT2 := 0]
scf83b[freq2 == 5, PAYMORT2 := paym2]
scf83b[freq2 == 6, PAYMORT2 := paym2 / 12]
scf83b[, PAYMORT3 := 0]
scf83b[freq3 == 5, PAYMORT3 := paym3]
scf83b[freq3 == 6, PAYMORT3 := paym3 / 12]
scf83b[, paymorto := 0]
paynames = c(paste0("PAYMORT", 1:3), "paymorto")
scf83b[, kids := kids_total - kids_away]
scf83b[, adults := no_18_plus + no_18_minus - kids]
scf83 = scf83b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new, new2,
"resdbt", paynames,
"kids", "adults")]
scf83[, networth := asset - debt]
scf83[, YY1 := Y1]
### SCF 1986
scf86b <- data.table(read_dta("raw-data-2018/scf86b.dta"))
old_86 <- c("c1449", "c1455", "c1457", "c1", "c1012", "b1", "c1009", "c1301")
new_86 <- c("asset", "debt", "networth", "Y1", "wgt", "Y1_83", "wgt_83", "income")
old_83 <- c("c1450", "c1456", "c1458")
new_83 <- paste0(new_86[1:3], "83")
old2_86 <- c("c1506", "c1512", "c1525", "c1423")
new2_86 <- c("monthly rent", "houses", "NH_MORT", "other mortgages")
old2_83 <- c( "c1513", "c1526", "c1424")
new2_83 <- paste0(new2_86[2:4], "83")
old3_86 <- c("c1535", "c1550", "c1536", "c1551")
new3_86 <- c("paym1", "paym2", "freq1", "freq2")
## original mortgage, year purchased, purchase price, available
#old4_83<- c("b3101", "b3102", "b3104", "b3109", "c1125", "b3108", "b3107")
old4_86 <- c("c1101", "c1102", "c1104", "c1112", "c1126", "c1111", "c1110")
new4_86 <- c("fam_size", "no_18_plus", "no_18_minus", "family_type", "marital", "kids_total", "kids_away")
## marital == 1 ... married, 6 and 7 ... life with spouse
## c1104 ... persons 18-
## c1107 ... (grand-)children 18+
setnames(scf86b, old_86, new_86)
setnames(scf86b, old2_86, new2_86)
setnames(scf86b, old3_86, new3_86)
setnames(scf86b, old4_86, new4_86)
setnames(scf86b, old_83, new_83)
setnames(scf86b, old2_83, new2_83)
scf86b[, resdbt := `other mortgages`]
## frequency: 5 ... monthly, 6 ... annually
scf86b[, PAYMORT1 := 0]
scf86b[freq1 == 5, PAYMORT1 := paym1]
scf86b[freq1 == 6, PAYMORT1 := paym1 / 12]
scf86b[, PAYMORT2 := 0]
scf86b[freq2 == 5, PAYMORT2 := paym2]
scf86b[freq2 == 6, PAYMORT2 := paym2 / 12]
scf86b[, PAYMORT3 := 0]
scf86b[, paymorto := 0]
paynames = c(paste0("PAYMORT", 1:3), "paymorto")
scf86b[, kids := no_18_minus + c1107]
scf86b[, adults := fam_size - kids]
scf86 = scf86b[!is.na(asset) & !is.na(debt), .SD, .SDcols = c(new_83, new_86, new2_83, new2_86,
"resdbt", paynames,
"kids", "adults")]
scf86[, YY1 := Y1]
### note: 1983 and 1986 are in original dollars, since 1989 in 2016 dollars
library(quantmod)
getSymbols('CPIAUCSL',src='FRED')
cpi_xts = apply.yearly(CPIAUCSL, colMeans)
cpi.dt = data.table(year = year(time(cpi_xts)), cpi = coredata(cpi_xts))
setnames(cpi.dt, "cpi.CPIAUCSL", "cpi")
cpi.dt[, cpi_2016 := cpi/cpi[year==2016][1L]]
cpi_factor_1983 = cpi.dt[year==1983, cpi_2016]
cpi_factor_1986 = cpi.dt[year==1986, cpi_2016]
## scale all numeric variables
cols <- colnames(scf83)[!colnames(scf83) %in% c("Y1", "wgt", "YY1", "kids", "adults")]
scf83[, (cols) := lapply(.SD, function(x) x/cpi_factor_1983), .SDcols=cols]
cols <- colnames(scf86)[!colnames(scf86) %in% c("Y1", "wgt", "YY1", "kids", "adults")]
scf86[, (cols) := lapply(.SD, function(x) x/cpi_factor_1986), .SDcols=cols]
### Combining the data sets
datasets = list(scf89, scf92, scf95, scf98, scf01, scf04, scf07, scf10, scf13, scf16, scf83, scf86)
years = c(seq(from=1989, to=2016, by=3), 1983, 1986)
dt = data.table(datasets[[1]])
dt[, year := years[1]]
## Rename 1989 version, it uses X instead of Y
setnames(dt, c("X1", "XX1"), c("Y1", "YY1"))
## add years to the identifiers
dt[, Z1 := paste0(Y1,"_",year)]
dt[, ZZ1 := paste0(YY1,"_",year)]
## set identifiers as keys
setkeyv(dt, c("Z1", "ZZ1"))
for (i in 2:length(years)) {
dt_next = data.table(datasets[[i]])
dt_next[, year := years[i]]
dt_next[, Z1 := paste0(Y1,"_",year)]
dt_next[, ZZ1 := paste0(YY1,"_",year)]
setkeyv(dt_next, c("Z1", "ZZ1"))
dt = rbind(dt, dt_next, fill=TRUE)
}
scf = dt
save(scf, file="cleaned-data/scf_1983_2016.RData")
#write_dta(scf, "cleaned-data/scf_1983_2016.dta")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment