Skip to content

Instantly share code, notes, and snippets.

@aleszu
Last active August 6, 2019 15:55
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aleszu/c13bb718a0b1d7de429900ab5f8004f3 to your computer and use it in GitHub Desktop.
Save aleszu/c13bb718a0b1d7de429900ab5f8004f3 to your computer and use it in GitHub Desktop.
library(data.table)
library(dplyr)
library(R.utils)
# JUST MASS
mass <- gunzip("arcos-ma-statewide-itemized.tsv.gz")
mass_opioids <- fread(file = 'arcos-ma-statewide-itemized.tsv')
glimpse(mass_opioids) # 2,574,240 observations
summary(mass_opioids)
mass_opioids$DOSAGE_UNIT <- as.numeric(mass_opioids$DOSAGE_UNIT) # convert into numeric
# First count total number of records by BUYER_DEA_NO
worst_pharmacies_counts <- mass_opioids %>%
count(BUYER_DEA_NO, sort=TRUE, name = "total_records") %>%
glimpse()
# Then group by individual pharmacies and calculate total DOSAGE_UNIT for each
worst_pharmacies <- mass_opioids %>%
group_by(BUYER_DEA_NO, BUYER_NAME, BUYER_CITY) %>%
summarise(total_dosage_unit = sum(DOSAGE_UNIT)) %>%
arrange(desc(total_dosage_unit)) %>%
left_join(worst_pharmacies_counts, by="BUYER_DEA_NO") %>% # Merge in total_records counts
glimpse()
worst_pharmacies
# Write CSV
write.csv(worst_pharmacies, "worst_pharmacies.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment