Skip to content

Instantly share code, notes, and snippets.

@slarge
Created February 5, 2019 15:37
Show Gist options
  • Save slarge/3326deedfd3147726082881e8d59d984 to your computer and use it in GitHub Desktop.
Save slarge/3326deedfd3147726082881e8d59d984 to your computer and use it in GitHub Desktop.
Quick demo of fedregs package to explore northeastern US fisheries regulatoins
install.packages("fedregs")
library(fedregs)
library(dplyr)
library(ggplot2)
library(quanteda)
library(magick)
## For more info, check out the fedregs github repository:
## https://github.com/slarge/fedregs
## What does the text look like for northeastern US Fisheries regulations?
regs <- cfr_text(year = 2017,
title_number = 50,
chapter = 6,
part = 648,
#token = "ngrams", # uncomment for ngrams of length 2
#n = 2, # uncomment for ngrams of length 2
return_tidytext = TRUE,
verbose = FALSE)
## "stop words" are common that don't hold a lot of significance
stop_words <- data_frame(word = quanteda::stopwords("english"))
## Clean up words to try and find most meaningful words
clean_words <- regs %>%
tidyr::unnest() %>%
mutate(word = gsub("[[:punct:]]", "", word), # remove any remaining punctuation
word = gsub("^[[:digit:]]*", "", word)) %>% # remove digits (e.g., 1st, 1881a, 15th, etc)
anti_join(stop_words, by = "word") %>% # remove "stop words"
filter(is.na(as.numeric(word)),
!grepl("^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$",
word), # adios Roman Numerals
!grepl("\\b[a-z]{1}\\b", word), # get rid of one letter words
!grepl("\\bwww*.", word)) %>% # get rid of web addresses
mutate(word = quanteda::tokens(word),
word = as.character(quanteda::tokens_wordstem(word)))
## Count the 50 most frequent words
count_words <- clean_words %>%
group_by(word) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(-n) %>%
top_n(n = 50, wt = n) %>%
mutate(word = reorder(word, n))
## Plot
gg <- ggplot(count_words, aes(n, word)) +
geom_segment(aes(xend=0, yend=word), size=3.5) +
labs(y = NULL,
x = "count",
title = "Code of Federal Regulations",
subtitle = "Title 50, Chapter VI, Part 648",
caption = sprintf("Data accessed on %s from:\n https://www.govinfo.gov/bulkdata/CFR/",
format(Sys.Date(), "%d %B %Y"))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.direction = "horizontal",
legend.position = "bottom",
text = element_text(size = 6)) +
theme_minimal()
## Save
fig <- image_graph(width=1016, height=573, res=72)
print(gg)
dev.off()
image_write(fig, "final.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment