Created
December 21, 2021 21:00
-
-
Save saveyak/37827796ae68043fe5f81a22698e8a17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(readxl) | |
#Notes: | |
#Excel file from here, "Child Tax Credit Table": https://www.census.gov/data/tables/2021/demo/hhp/hhp39.html | |
#Headers: | |
#Category = Spending category | |
#Total = total number of adults | |
#CTC_spent = total number of adults in households that received a child tax credit in the last four weeks and reported that they mostly spent it | |
#CTC_saved = received credit and mostly saved it | |
#CTC_debt = received credit and mostly used it to pay off debt | |
#CTC_dnr = received credit but did say how they mostly spent it | |
#No_CTC = did not receive credit | |
#DNR = did not respond (I guess this is the estimate of how many people would not respond if everyone in the US got this survey?) | |
xlsx_file <- "ctc1_week39.xlsx" #read Excel file | |
headers = c("category","total","ctc_spent","ctc_saved","ctc_debt","ctc_dnr","no_ctc","dnr","sheetname") #List of header names | |
mysheetlist <- excel_sheets(xlsx_file) #List of the names of each sheet in the Excel file | |
for (i in 1:length(mysheetlist[])){ #Loop through every sheet in the Excel file | |
df = read_excel(path=xlsx_file, sheet = i) #Read each sheet | |
df$sheetname = mysheetlist[i] #Add sheetname column | |
df = df[c(7,9:25),] #Select rows 7,9-25 | |
colnames(df) = headers #Rename headers | |
df = df %>% mutate_at(c(2:8), as.numeric) #Convert columns to numeric | |
df[is.na(df)] = 0 #Replace NAs with 0s | |
total_ctc = rowSums(df[1, 3:6]) #Find total number of adults who are in a household that received tax credit in last four weeks | |
df$pct = df$total/total_ctc*100 #Find percent of adults with tax credit who reported spending it on this category | |
df[1,1] = "Total population 18+" #This cell just says "Total" so I'm clarifying total of what | |
df[1,10] = total_ctc/df[1,2]*100 #Replace row 1, column 10 with percent of adult population in household that received tax credit | |
assign(mysheetlist[[i]], df) #Return a dataframe with same name as the sheet | |
datalist[[i]] = df #create list of all dataframes | |
} | |
big_data = do.call(rbind, datalist) #bind all dataframes together in one giant dataframe | |
write_csv(big_data,"household_pulse_survey.csv") #save as CSV |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment