Skip to content

Instantly share code, notes, and snippets.

@mbjoseph
Created March 29, 2017 20:24
Show Gist options
  • Save mbjoseph/b571f4d5f7dfd6e7699db532e33d93a2 to your computer and use it in GitHub Desktop.
Save mbjoseph/b571f4d5f7dfd6e7699db532e33d93a2 to your computer and use it in GitHub Desktop.
Parse and read ams data
# Importing data from USDA into large data file ---------------------------
library(stringr)
library(tidyverse)
convert_to_df <- function(file) {
# takes a text file as input and parses the file to generate a data.frame
lines <- readLines(file)
# identify blank lines which denote separate files
idx <- cumsum(lines == '')
sub_file_names <- paste0("subfile", unique(idx), '.txt')
# write little files for each chunk
for (i in seq_along(sub_file_names)) {
to_write <- lines[idx == i]
to_write <- to_write[grepl("[[:alpha:]]", to_write)]
if (length(to_write) > 0) {
cat(to_write, file = sub_file_names[i], sep = "\n")
} else {
sub_file_names <- sub_file_names[-i]
}
}
# read in the data
column_names <- c("Location", "Report Date", "Class Description",
"Selling Basis Description", "Grade Description",
"Head Count", "Weight Range Low", "Weight Range High",
"Weighted Average", "Price Low", "Price High",
"Average Price", "Comments", "Pricing Point")
read_subfile <- function(file) {
file_lines <- readLines(file)
matches <- file_lines %>%
`[`(1) %>%
str_locate(column_names)
starts <- matches[, 1]
ends <- c(starts[-1] - 1, matches[length(matches)])
read_fwf(file,
fwf_positions(starts, ends, column_names), skip = 1)
}
d <- lapply(sub_file_names, read_subfile) %>%
bind_rows
unlink(list.files(pattern = "subfile"))
d
}
df <- convert_to_df("~/Downloads/AMS_Bulls_all.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment