Last active
May 3, 2021 23:44
-
-
Save zacharystansell/fd1ebd6e3cda8a175680b78a4b271497 to your computer and use it in GitHub Desktop.
USDA-NASS crop acreage by county 2015-2020
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "NASS" | |
author: "Zachary J. Stansell" | |
date: '`r format(Sys.Date(), "%d-%b-%Y")`' | |
output: | |
bookdown::html_document2: | |
highlight: pygments | |
theme: united | |
number_sections: no | |
toc: yes | |
toc_float: | |
collapsed: yes | |
smooth_scroll: yes | |
pdf_document: | |
toc: yes | |
--- | |
sources and credit: | |
https://github.com/ropensci/rnassqs | |
https://www.r-bloggers.com/2019/11/rnassqs-accessing-usda-agricultural-data-via-api/ | |
https://sheilasaia.rbind.io/post/2020-06-30-nass-api-part2/ | |
https://cran.r-project.org/web/packages/usdarnass/vignettes/usdarnass.html | |
```{r setup, include=FALSE} | |
knitr::opts_chunk$set(echo = TRUE) | |
#install_github('ropensci/rnassqs') | |
#devtools::install_github("hrbrmstr/hrbrthemes") | |
api_key <- readLines("NASS_KEY") | |
nassqs_auth(key = api_key) | |
######################## | |
# Packages | |
######################## | |
library("devtools") | |
library("png") | |
library("rnassqs") | |
library("tidyverse") | |
library("socviz") | |
library("classInt") | |
library("showtext") | |
#library("magick") | |
``` | |
```{r plotting fun} | |
CROPMAP <- | |
function(BREAKS = breaks, | |
HEIGHT = height, | |
SELECT_CROP_GROUP = select_crop_group, | |
SELECT_CROP_NAME = select_crop_name, | |
YEAR_START = year_start, | |
YEAR_END = year_end, | |
OVERWRITE = overwrite, | |
FILE = file) { | |
# Read spreadsheet of crops and metadata from NASS (had to handmade it) | |
#filter by | |
crop <- read.csv(file, stringsAsFactors = FALSE) %>% | |
filter(action %in% select_crop_group) | |
ifelse( | |
SELECT_CROP_NAME == "all", | |
crop <- crop, | |
crop <- crop %>% filter(crop %in% SELECT_CROP_NAME) | |
) | |
# Start loop over crops to make county-level chlorphleths | |
for (i in dim(crop)[1]:1) { | |
#check to see if I've already made the plot | |
if (OVERWRITE == "n" & | |
file.exists(paste("rst/crops/", crop$crop[i], ".png", sep = ""))) { | |
next | |
} else { | |
# purge all data in case of error to prevent spillover | |
# set parameters | |
params <- list( | |
commodity_desc = crop$crop[i], | |
domaincat_desc = "NOT SPECIFIED", | |
agg_level_desc = "county", | |
#state_alpha = "OR", | |
year = year_start:year_end | |
) | |
# count number of records... | |
# api won't provide > 50K | |
# i looped by year and glued data together | |
rec.count <- nassqs_record_count(params) | |
# get crop acerage | |
tryCatch({ | |
if (rec.count < 49999) { | |
dat1 <- | |
nassqs_acres(params) %>% | |
# drop these weird (D) and (Z).. not sure what they mean | |
mutate(Value = na_if(Value, " (Z)")) %>% | |
mutate(Value = na_if(Value, " (D)")) %>% | |
# trim whitespace | |
mutate(Value = as.numeric(gsub(",", "", Value))) %>% | |
# create county level fips code | |
mutate(id = paste(state_fips_code, county_code, sep = "")) | |
} else { | |
# glue together multiple years if dataset is too big | |
data_list <- lapply(year_start:year_end, function(yr) { | |
params[['year']] <- yr | |
tryCatch( | |
nassqs_acres(params), | |
error = function(e) | |
NULL | |
) | |
}) | |
# same munging as above, but with rbind | |
dat1 <- bind_rows(data_list) %>% | |
mutate(Value = na_if(Value, " (Z)")) %>% | |
mutate(Value = na_if(Value, " (D)")) %>% | |
mutate(Value = as.numeric(gsub(",", "", Value))) %>% | |
#mutate(Value = replace_na(Value, 0)) %>% | |
mutate(id = paste(state_fips_code, county_code, sep = "")) | |
} | |
# cleaning data | |
dat2 <- dat1 %>% | |
group_by(id) %>% | |
# average across years | |
summarize(Value = mean(Value, na.rm = TRUE)) %>% | |
mutate(Value = replace_na(Value, 0)) | |
# was having issue with warnings, so i temp turned them off | |
defaultW <- getOption("warn") | |
options(warn = -1) | |
BREAKS.SPLIT <- | |
ifelse(rec.count$count > 1250, | |
breaks, | |
ifelse(rec.count$count > 50, 5, 3)) | |
# set the number of breaks.. i thought the kmeans did the best job | |
breaks_qt <- | |
classIntervals(var = dat2$Value, | |
n = BREAKS.SPLIT, | |
style = "kmeans") | |
# hacky attempt to fix the classInt issue where it will drop levels | |
# esp w/ small data | |
ifelse(BREAKS.SPLIT == length(breaks_qt$brks), | |
BREAKS.SPLIT <- BREAKS.SPLIT, | |
BREAKS.SPLIT <- length(breaks_qt$brks) - 1) | |
# turn warnings back onn | |
options(warn = defaultW) | |
dat3 <- left_join(county_map, dat2, by = "id") %>% | |
mutate(Value = replace_na(Value, 0)) | |
# kind of hacky rounding function for breaks..."shave off 2 digits" | |
# rounding <- unlist(lapply(breaks_qt$brks, function (x) { | |
# round(x, (2 - nchar(as.integer(x)))) | |
# })) | |
rounding <- unlist(lapply(breaks_qt$brks, function (x) { | |
signif(x, 3) | |
})) | |
# configure breaks into human readable lines | |
plot.breaks <- | |
c(0, rounding[2:(BREAKS.SPLIT-1) ], | |
paste(rounding[BREAKS.SPLIT], | |
"-", | |
rounding[BREAKS.SPLIT + 1], sep = "")) | |
# join data with county map | |
dat3 <- left_join(county_map, dat2, by = "id") %>% | |
mutate(Value = replace_na(Value, 0)) | |
# chop data up by breaks | |
dat4 <- | |
mutate( | |
dat3, | |
Value = cut( | |
Value, | |
right = FALSE, | |
breaks_qt$brks, | |
include.lowest = TRUE, | |
dig.lab = 2 | |
) | |
) | |
# make a plot of crop i | |
ggplot( | |
data = dat4, | |
mapping = aes( | |
x = long, | |
y = lat, | |
fill = Value, | |
group = group | |
) | |
) + | |
# county lines | |
geom_polygon(color = alpha("black", 0.75), size = 0.02) + | |
coord_equal() + | |
# set color according to spreadsheet | |
scale_fill_brewer(palette = crop$color[i], | |
labels = plot.breaks) + | |
labs( | |
title = paste(crop$crop[i]), | |
subtitle = paste("mean acres harvested", year_start, "to", year_end) | |
) + | |
# map theme from | |
theme_map() + | |
guides(fill = guide_legend(nrow = 1)) | |
# save plot | |
ggsave( | |
plot = last_plot(), | |
file = paste("rst/crops/", crop$crop[i], ".png", sep = ""), | |
height = height, | |
width = height * 1.3355, | |
units = "in", | |
dpi = "retina" | |
) | |
# sloppy attempt to bypass api errors | |
}, error = function(e) { | |
print(paste("there was an error: beep-boop", crop$crop[i])) | |
}) | |
rm(list = c("params", "dat4", "breaks_qt")) | |
} | |
} | |
} | |
``` | |
```{r theme map} | |
# https://socviz.co/appendix.html#appendix | |
# modified from the delightful website https://socviz.co/maps.html | |
theme_map <- function(base_size = 9, | |
base_family = "") { | |
require(grid) | |
theme_bw(base_size = base_size, base_family = base_family) %+replace% | |
theme( | |
axis.line = element_blank(), | |
axis.text = element_blank(), | |
axis.ticks = element_blank(), | |
axis.title = element_blank(), | |
panel.background = element_blank(), | |
panel.border = element_blank(), | |
panel.grid = element_blank(), | |
panel.spacing = unit(0, "lines"), | |
plot.background = element_rect(fill = "black"), | |
#plot.margin=grid::unit(c(0,0,0,0), "mm"), | |
legend.justification = c(0, 0), | |
legend.text = element_text(color = "white",size=11,family="roboto"), | |
legend.position = c(-0.005, 0.94), | |
strip.background = element_rect(fill = "black"), | |
legend.background = element_rect(fill = "black"), | |
plot.title = element_text( | |
color = "white", | |
hjust = 0, | |
vjust = -0, | |
size = 24, | |
face = "bold", | |
family="roboto" | |
), | |
plot.subtitle = element_text( | |
color = "white", | |
hjust = 0, | |
vjust = -1.05, | |
size = 12, | |
#face = "bold", | |
family="roboto" | |
) | |
) | |
} | |
``` | |
```{r make individual plots} | |
##################################################################### | |
# Set batch parameters | |
breaks = 7 | |
height = 8 | |
select_crop_group = c("redo", "good") # can be c("drop","redo","good") | |
select_crop_name = "all" # can be "all" or list e.g. c("apples","pears","peaches","plums") | |
year_start = 2000 | |
#year_end = as.numeric(format(as.Date(Sys.Date(), format = "%d/%m/%Y"), "%Y")) | |
year_end = 2020 | |
overwrite = "y" | |
file = "raw/crops.csv" | |
##################################################################### | |
CROPMAP(SELECT_CROP_NAME = "all", OVERWRITE = "n") | |
CROPMAP(SELECT_CROP_NAME = "all", OVERWRITE = "n") | |
CROPMAP(SELECT_CROP_NAME = "all", OVERWRITE = "n") | |
CROPMAP(SELECT_CROP_NAME = "all", OVERWRITE = "n") | |
CROPMAP(SELECT_CROP_NAME = "all", OVERWRITE = "n") | |
``` | |
```{r make the plot, eval=FALSE} | |
### Read data | |
START <- Sys.time() | |
#CROPMAP(OVERWRITE = "n") | |
maps <- | |
list.files(path = "rst/crops", | |
pattern = ".png", | |
full.names = TRUE) #%>% | |
# sample(size=4, replace=FALSE) | |
######################## | |
# Set plotting parameters | |
######################## | |
# Number of images | |
imgNo <- length(maps) | |
resolution <- 300 | |
# set plot dimensions in inches ~ eg 4" x 4" cell | |
cellHeight <- height | |
cellWidth <- height * 1.263 | |
# Automatically set grid size (eg. 10 x 10 for 97 images) | |
#gridSize <- ceiling(sqrt(imgNo)) | |
gridWidth = 9 | |
gridHeight = 12 | |
# Or manually define grid | |
#gridSize <- 9 | |
######################## | |
# Plotting | |
######################## | |
png( | |
"rst/multi/crop_production.png", | |
width = cellWidth * gridWidth, | |
height = cellHeight * gridHeight, | |
units = "in", | |
res = resolution | |
) | |
# pdf( | |
# "rst/multi/crop_production.pdf", | |
# width = cellWidth * gridWidth, | |
# height = cellHeight * gridHeight, | |
# units = "in", | |
# #res = resolution | |
# ) | |
par(mfrow = c(gridHeight, gridWidth), | |
mar = c(0, 0, 0, 0)) | |
for (i in 1:imgNo) { | |
plot( | |
1:1, | |
ty = "n", | |
xlab = NULL, | |
ylab = NULL, | |
xaxt = 'n', | |
yaxt = 'n', | |
ann = FALSE | |
) | |
image <- readPNG(maps[[i]]) | |
# i don't know why i need this majick number, lol. | |
majick <- .435 | |
rasterImage( | |
image = image, | |
ybottom = 1 - majick, | |
ytop = 1 + majick, | |
xleft = 1 - majick, | |
xright = 1 + majick, | |
interpolate = TRUE | |
) | |
} | |
dev.off() | |
END <- Sys.time() | |
END - START | |
``` | |
```{bash, convert, engine.opts='-l'} | |
# convert to 6K width, keep ratio | |
convert rst/multi/crop_production.png -resize 5000 rst/multi/crop_production_5K.png | |
convert rst/multi/crop_production.png -resize 12500 rst/multi/crop_production_12K.png | |
``` | |
```{bash, gif, engine.opts='-l'} | |
convert -resize 10% -delay 2 -loop 0 rst/crops/*.png rst/gif/agriculture.gif | |
ffmpeg \ | |
-framerate 60 \ | |
-pattern_type glob \ | |
-i rst/crops/*.png \ | |
-r 15 \ | |
-vf scale=512:-1 \ | |
rst/gif/agriculture.gif \ ; | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment