Skip to content

Instantly share code, notes, and snippets.

@saveyak
Created December 21, 2021 21:00
Show Gist options
  • Save saveyak/37827796ae68043fe5f81a22698e8a17 to your computer and use it in GitHub Desktop.
Save saveyak/37827796ae68043fe5f81a22698e8a17 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(readxl)
#Notes:
#Excel file from here, "Child Tax Credit Table": https://www.census.gov/data/tables/2021/demo/hhp/hhp39.html
#Headers:
#Category = Spending category
#Total = total number of adults
#CTC_spent = total number of adults in households that received a child tax credit in the last four weeks and reported that they mostly spent it
#CTC_saved = received credit and mostly saved it
#CTC_debt = received credit and mostly used it to pay off debt
#CTC_dnr = received credit but did say how they mostly spent it
#No_CTC = did not receive credit
#DNR = did not respond (I guess this is the estimate of how many people would not respond if everyone in the US got this survey?)
xlsx_file <- "ctc1_week39.xlsx" #read Excel file
headers = c("category","total","ctc_spent","ctc_saved","ctc_debt","ctc_dnr","no_ctc","dnr","sheetname") #List of header names
mysheetlist <- excel_sheets(xlsx_file) #List of the names of each sheet in the Excel file
for (i in 1:length(mysheetlist[])){ #Loop through every sheet in the Excel file
df = read_excel(path=xlsx_file, sheet = i) #Read each sheet
df$sheetname = mysheetlist[i] #Add sheetname column
df = df[c(7,9:25),] #Select rows 7,9-25
colnames(df) = headers #Rename headers
df = df %>% mutate_at(c(2:8), as.numeric) #Convert columns to numeric
df[is.na(df)] = 0 #Replace NAs with 0s
total_ctc = rowSums(df[1, 3:6]) #Find total number of adults who are in a household that received tax credit in last four weeks
df$pct = df$total/total_ctc*100 #Find percent of adults with tax credit who reported spending it on this category
df[1,1] = "Total population 18+" #This cell just says "Total" so I'm clarifying total of what
df[1,10] = total_ctc/df[1,2]*100 #Replace row 1, column 10 with percent of adult population in household that received tax credit
assign(mysheetlist[[i]], df) #Return a dataframe with same name as the sheet
datalist[[i]] = df #create list of all dataframes
}
big_data = do.call(rbind, datalist) #bind all dataframes together in one giant dataframe
write_csv(big_data,"household_pulse_survey.csv") #save as CSV
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment