Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Tadge-Analytics/052cd46b0fffbcc8d20c6203da5c49a5 to your computer and use it in GitHub Desktop.
Save Tadge-Analytics/052cd46b0fffbcc8d20c6203da5c49a5 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(httr)
latest_two_csv_files <-
GET("https://api.github.com/repos/CSSEGISandData/COVID-19/git/trees/master?recursive=1") %>%
jsonlite::parse_json() %>%
pluck("tree") %>%
map(pluck("path")) %>%
unlist() %>%
as_tibble() %>%
filter(str_detect(value, "csse_covid_19_data/csse_covid_19_daily_reports/") & str_detect(value, ".csv")) %>%
mutate(date = word(value, -1, sep = "/"),
date = word(date, 1, sep = "\\."),
date = lubridate::mdy(date)) %>%
arrange(desc(date)) %>%
head(2) %>% # keep only the latest 2 csv paths
mutate(full_url = paste0("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/", value)) %>%
mutate(downloaded_data = map(full_url, ~read_csv(.x
## if you are using older files, sometimes it is better to manually specify the column datatype
# , col_types = cols(.default = "c")
))) %>%
unnest(downloaded_data) %>%
## if combining with older files (which used different columns)
# unite(Latitude, Lat, na.rm = TRUE) %>%
# unite(Longitude, Long_, na.rm = TRUE) %>%
# unite(Last_Update, `Last Update`, na.rm = TRUE) %>%
# unite(Province_State, `Province/State`, na.rm = TRUE) %>%
# unite(Country_Region, `Country/Region`, na.rm = TRUE) %>%
janitor::clean_names() %>%
## if using cols(.default = "c") above
# mutate(last_update = lubridate::mdy_hm(last_update)) %>%
# mutate_at(.vars = c("latitude", "longitude", "confirmed", "deaths", "recovered", "active"),
# .funs = as.numeric) %>%
select(-value, -incidence_rate, -case_fatality_ratio)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment