Skip to content

Instantly share code, notes, and snippets.

@ashander
Last active January 16, 2017 21:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashander/6f55b569df0eb933ec1c5d976ff7a849 to your computer and use it in GitHub Desktop.
Save ashander/6f55b569df0eb933ec1c5d976ff7a849 to your computer and use it in GitHub Desktop.
SNAP foods typically purchased
library(dplyr)
# load data
dat <- read.csv('snap_report_summary.csv', stringsAsFactors=FALSE)
# annotate SNAP status
snap_cols <- 2:4
non_snap_cols <- 5:7
dat_non_snap <- dat %>% select(1, non_snap_cols)
dat_snap <- dat %>% select(1, snap_cols)
dat_non_snap$household <- "non-SNAP"
dat_snap$household <- "SNAP"
# combine data
names(dat_non_snap)[snap_cols] <- names(dat_snap)[snap_cols]
dat_combined <- rbind(dat_snap, dat_non_snap)
# convert to $
dat_combined$expenditures <- 1e6 * as.numeric(gsub('(\\$|,)', '', as.character(dat_combined$raw)))
dat_combined$percent <- as.numeric(gsub('(\\%)', '', as.character(dat_combined$percent)))
d <- dat_combined # for bevity
# look SNAP vs non-SNAP expenditures
library(tidyr)
d_summary <- select(d, commodity, household, percent) %>%
group_by(household) %>%
mutate(id = row_number()) %>%
spread(household, percent)
d_summary <- d_summary[order(d_summary$SNAP - d_summary$`non-SNAP`, decreasing=TRUE), ]
d_summary$`Broad Category` <- factor(d_summary$commodity, levels=d_summary$commodity)
# does the distribution among categories differ between SNAP and non-SNAP
# households?
# the broad categories used in the summary do not differ
d_chisq <- with(d_summary, chisq.test(`non-SNAP`, SNAP)) # dists are same
library(ggplot2)
png('absolute_differences.png')
ggplot(d_summary) +
geom_hline(aes(yintercept=0)) +
geom_bar(aes(x=`Broad Category`, y=SNAP - `non-SNAP`), stat='identity') +
ylab("difference in % expenses (SNAP - non-SNAP)") +
labs(
title='How food budgets really differ between SNAP and non-SNAP households*',
caption=paste0("*They don't (p=", round(d_chisq$p.value, 2), " in Chi Sq test).\nBased on Table 1 of\nhttps://www.fns.usda.gov/sites/default/files/ops/SNAPFoodsTypicallyPurchased-Summary.pdf\nSee https://gist.github.com/ashander/6f55b569df0eb933ec1c5d976ff7a849")
) +
theme(axis.text.x = element_text(angle=90, vjust=0.5))
dev.off()
png('proportional_differences.png')
ggplot(d_summary) +
geom_hline(aes(yintercept=1)) +
geom_bar(aes(x=`Broad Category`, y=SNAP / `non-SNAP`), stat='identity') +
ylab("proportional % expenses (SNAP / non-SNAP)") +
labs(
title='How food budgets really differ between SNAP and non-SNAP households*',
caption=paste0("*They don't (p=", round(d_chisq$p.value, 2), " in Chi Sq test) but that Baby Food.\nBased on Table 1 of\nhttps://www.fns.usda.gov/sites/default/files/ops/SNAPFoodsTypicallyPurchased-Summary.pdf\nSee https://gist.github.com/ashander/6f55b569df0eb933ec1c5d976ff7a849")
) +
theme(axis.text.x = element_text(angle=90, vjust=0.5))
dev.off()
dat <- read.csv('tabula-SNAPFoodsTypicallyPurchased-Summary.csv')
names(dat) <- c("commodity", "rank", "raw", "percent", "rank", "raw", "percent")
write.csv(dat, 'snap_report_summary.csv', row.names=FALSE)
commodity rank raw percent rank raw percent
Meat, Poultry and Seafood 1 $1,262.90 19.2% 1 $5,016.30 15.9%
Sweetened Beverages 2 $608.70 9.3% 5 $2,238.80 7.1%
Vegetables 3 $473.40 7.2% 2 $2,873.90 9.1%
Frozen Prepared Foods 4 $455.20 6.9% 8 $1,592.30 5.1%
Prepared Desserts 5 $453.80 6.9% 6 $2,021.20 6.4%
High Fat Dairy/Cheese 6 $427.80 6.5% 3 $2,483.20 7.9%
Bread and Crackers 7 $354.90 5.4% 7 $1,978.20 6.3%
Fruits 8 $308.20 4.7% 4 $2,271.20 7.2%
Milk 9 $232.70 3.5% 9 $1,211.00 3.8%
Salty Snacks 10 $225.60 3.4% 10 $969.70 3.1%
Prepared Foods 11 $202.20 3.1% 14 $707.00 2.2%
Cereal 12 $186.90 2.8% 11 $933.90 3.0%
Condiments and Seasoning 13 $174.60 2.7% 12 $878.90 2.8%
Fats and Oils 14 $155.10 2.4% 13 $766.90 2.4%
Candy 15 $138.20 2.1% 15 $701.40 2.2%
Baby Food 16 $126.80 1.9% 27 $198.20 0.6%
Juices 17 $110.40 1.7% 16 $605.40 1.9%
Coffee and Tea 18 $83.40 1.3% 17 $568.80 1.8%
Bottled Water 19 $78.10 1.2% 22 $377.40 1.2%
Eggs 20 $73.80 1.1% 21 $388.20 1.2%
Other Dairy Products 21 $69.80 1.1% 18 $549.50 1.7%
Pasta, Cornmeal, Other Cereal Products 22 $66.40 1.0% 23 $281.50 0.9%
Soups 23 $62.70 1.0% 20 $414.10 1.3%
Sugars 24 $60.90 0.9% 24 $260.30 0.8%
Nuts and Seeds 25 $53.20 0.8% 19 $445.90 1.4%
Beans 26 $38.30 0.6% 25 $234.50 0.7%
Rice 27 $30.10 0.5% 28 $131.00 0.4%
Jams, Jellies, Preserves and Other Sweets 28 $29.10 0.4% 29 $117.50 0.4%
Flour and Prepared Flour Mixes 29 $18.70 0.3% 30 $94.90 0.3%
Miscellaneous 30 $18.60 0.3% 26 $202.60 0.6%
com Rank $ (millions) % of total Rank $ (millions) % of total
Meat, Poultry and Seafood 1 $1,262.90 19.2% 1 $5,016.30 15.9%
Sweetened Beverages 2 $608.70 9.3% 5 $2,238.80 7.1%
Vegetables 3 $473.40 7.2% 2 $2,873.90 9.1%
Frozen Prepared Foods 4 $455.20 6.9% 8 $1,592.30 5.1%
Prepared Desserts 5 $453.80 6.9% 6 $2,021.20 6.4%
High Fat Dairy/Cheese 6 $427.80 6.5% 3 $2,483.20 7.9%
Bread and Crackers 7 $354.90 5.4% 7 $1,978.20 6.3%
Fruits 8 $308.20 4.7% 4 $2,271.20 7.2%
Milk 9 $232.70 3.5% 9 $1,211.00 3.8%
Salty Snacks 10 $225.60 3.4% 10 $969.70 3.1%
Prepared Foods 11 $202.20 3.1% 14 $707.00 2.2%
Cereal 12 $186.90 2.8% 11 $933.90 3.0%
Condiments and Seasoning 13 $174.60 2.7% 12 $878.90 2.8%
Fats and Oils 14 $155.10 2.4% 13 $766.90 2.4%
Candy 15 $138.20 2.1% 15 $701.40 2.2%
Baby Food 16 $126.80 1.9% 27 $198.20 0.6%
Juices 17 $110.40 1.7% 16 $605.40 1.9%
Coffee and Tea 18 $83.40 1.3% 17 $568.80 1.8%
Bottled Water 19 $78.10 1.2% 22 $377.40 1.2%
Eggs 20 $73.80 1.1% 21 $388.20 1.2%
Other Dairy Products 21 $69.80 1.1% 18 $549.50 1.7%
Pasta, Cornmeal, Other Cereal Products 22 $66.40 1.0% 23 $281.50 0.9%
Soups 23 $62.70 1.0% 20 $414.10 1.3%
Sugars 24 $60.90 0.9% 24 $260.30 0.8%
Nuts and Seeds 25 $53.20 0.8% 19 $445.90 1.4%
Beans 26 $38.30 0.6% 25 $234.50 0.7%
Rice 27 $30.10 0.5% 28 $131.00 0.4%
Jams, Jellies, Preserves and Other Sweets 28 $29.10 0.4% 29 $117.50 0.4%
Flour and Prepared Flour Mixes 29 $18.70 0.3% 30 $94.90 0.3%
Miscellaneous 30 $18.60 0.3% 26 $202.60 0.6%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment