Skip to content

Instantly share code, notes, and snippets.

@bgonzalezbustamante
Last active April 20, 2020 14:19
Show Gist options
  • Save bgonzalezbustamante/15363fb0d33ed373dcba16f6cf6ce17f to your computer and use it in GitHub Desktop.
Save bgonzalezbustamante/15363fb0d33ed373dcba16f6cf6ce17f to your computer and use it in GitHub Desktop.
Johns Hopkins University CSSE COVID-19 Dataset Scraper UPDATED
##############################################################
## Johns Hopkins University CSSE COVID-19 Dataset Scraper
## R version 3.6.1 (2019-07-05) -- "Action of the Toes"
## Date: April 2020
## Bastián González-Bustamante
## University of Oxford
## E-mail: bastian.gonzalezbustamante@politics.ox.ac.uk
## Website: http://users.ox.ac.uk/~shil5311/
## COVID-19 Pandemic in South America Project
## OSF-Project DOI: 10.17605/OSF.IO/6FM7X
## http://bgonzalezbustamante.github.io/COVID-19-South-America/
##############################################################
## Packages
library(tidyverse)
library(lubridate)
## Local Directory
setwd("00.Data")
## Johns Hopkins University CSSE COVID-19 Dataset
## Deprecated
## JHU_data <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv", sep = ",")
## Current
JHU_data <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv", sep = ",")
## Period Coverage
## Period Coverage
dd_JHU <- as.numeric((today()-1) - as.Date("2020-01-22"))
begin_JHU <- ((today()-1)-dd_JHU) ## 22 January
dates_JHU <- seq(as.Date(begin_JHU), as.Date((today()-1)), by="days")
dates_JHU
## UNITED KINGDOM
## which(JHU_data[,2] == "United Kingdom")
## Caymand and Channel Islands, Gibraltar, Isle of Man, and Montserrat excluded
which(JHU_data[,1] == "" & JHU_data[,2] == "United Kingdom")
uk <- slice(JHU_data, which(JHU_data[,1] == ""
& JHU_data[,2] == "United Kingdom"):
which(JHU_data[,1] == "" & JHU_data[,2] == "United Kingdom")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(uk)
uk <- data.frame(uk, dates_JHU)
uk$Province.State <- NULL
uk$Country.Region <- NULL
uk$Lat <- NULL
uk$Long <- NULL
uk$Date <- NULL
uk$cumulative_cases <- NULL
names(uk)[1] = "GBR"
sum(uk$GBR)
## SPAIN
which(JHU_data[,2] == "Spain")
esp <- slice(JHU_data, which(JHU_data[,2] == "Spain"):
which(JHU_data[,2] == "Spain")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(esp)
esp <- data.frame(esp, dates_JHU)
esp$Province.State <- NULL
esp$Country.Region <- NULL
esp$Lat <- NULL
esp$Long <- NULL
esp$Date <- NULL
esp$cumulative_cases <- NULL
names(esp)[1] = "ESP"
sum(esp$ESP)
## ITALY
which(JHU_data[,2] == "Italy")
ita <- slice(JHU_data, which(JHU_data[,2] == "Italy"):
which(JHU_data[,2] == "Italy")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(ita)
ita <- data.frame(ita, dates_JHU)
ita$Province.State <- NULL
ita$Country.Region <- NULL
ita$Lat <- NULL
ita$Long <- NULL
ita$Date <- NULL
ita$cumulative_cases <- NULL
names(ita)[1] = "ITA"
sum(ita$ITA)
## ARGENTINA
which(JHU_data[,2] == "Argentina")
arg <- slice(JHU_data, which(JHU_data[,2] == "Argentina"):
which(JHU_data[,2] == "Argentina")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(arg)
arg <- data.frame(arg, dates_JHU)
arg$Province.State <- NULL
arg$Country.Region <- NULL
arg$Lat <- NULL
arg$Long <- NULL
arg$Date <- NULL
arg$cumulative_cases <- NULL
names(arg)[1] = "ARG"
sum(arg$ARG)
## BOLIVIA
which(JHU_data[,2] == "Bolivia")
bol <- slice(JHU_data, which(JHU_data[,2] == "Bolivia"):
which(JHU_data[,2] == "Bolivia")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(bol)
bol <- data.frame(bol, dates_JHU)
bol$Province.State <- NULL
bol$Country.Region <- NULL
bol$Lat <- NULL
bol$Long <- NULL
bol$Date <- NULL
bol$cumulative_cases <- NULL
names(bol)[1] = "BOL"
sum(bol$BOL)
## BRAZIL
which(JHU_data[,2] == "Brazil")
bra <- slice(JHU_data, which(JHU_data[,2] == "Brazil"):
which(JHU_data[,2] == "Brazil")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(bra)
bra <- data.frame(bra, dates_JHU)
bra$Province.State <- NULL
bra$Country.Region <- NULL
bra$Lat <- NULL
bra$Long <- NULL
bra$Date <- NULL
bra$cumulative_cases <- NULL
names(bra)[1] = "BRA"
sum(bra$BRA)
## CHILE
which(JHU_data[,2] == "Chile")
chl <- slice(JHU_data, which(JHU_data[,2] == "Chile"):
which(JHU_data[,2] == "Chile")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(chl)
chl <- data.frame(chl, dates_JHU)
chl$Province.State <- NULL
chl$Country.Region <- NULL
chl$Lat <- NULL
chl$Long <- NULL
chl$Date <- NULL
chl$cumulative_cases <- NULL
names(chl)[1] = "CHL"
sum(chl$CHL)
## COLOMBIA
which(JHU_data[,2] == "Colombia")
col <- slice(JHU_data, which(JHU_data[,2] == "Colombia"):
which(JHU_data[,2] == "Colombia")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(col)
col <- data.frame(col, dates_JHU)
col$Province.State <- NULL
col$Country.Region <- NULL
col$Lat <- NULL
col$Long <- NULL
col$Date <- NULL
col$cumulative_cases <- NULL
names(col)[1] = "COL"
sum(col$COL)
## ECUADOR
which(JHU_data[,2] == "Ecuador")
ecu <- slice(JHU_data, which(JHU_data[,2] == "Ecuador"):
which(JHU_data[,2] == "Ecuador")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(ecu)
ecu <- data.frame(ecu, dates_JHU)
ecu$Province.State <- NULL
ecu$Country.Region <- NULL
ecu$Lat <- NULL
ecu$Long <- NULL
ecu$Date <- NULL
ecu$cumulative_cases <- NULL
names(ecu)[1] = "ECU"
sum(ecu$ECU)
## PARAGUAY
which(JHU_data[,2] == "Paraguay")
pry <- slice(JHU_data, which(JHU_data[,2] == "Paraguay"):
which(JHU_data[,2] == "Paraguay")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(pry)
pry <- data.frame(pry, dates_JHU)
pry$Province.State <- NULL
pry$Country.Region <- NULL
pry$Lat <- NULL
pry$Long <- NULL
pry$Date <- NULL
pry$cumulative_cases <- NULL
names(pry)[1] = "PRY"
sum(pry$PRY)
## PERU
which(JHU_data[,2] == "Peru")
per <- slice(JHU_data, which(JHU_data[,2] == "Peru"):
which(JHU_data[,2] == "Peru")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(per)
per <- data.frame(per, dates_JHU)
per$Province.State <- NULL
per$Country.Region <- NULL
per$Lat <- NULL
per$Long <- NULL
per$Date <- NULL
per$cumulative_cases <- NULL
names(per)[1] = "PER"
sum(per$PER)
## URUGUAY
which(JHU_data[,2] == "Uruguay")
ury <- slice(JHU_data, which(JHU_data[,2] == "Uruguay"):
which(JHU_data[,2] == "Uruguay")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(ury)
ury <- data.frame(ury, dates_JHU)
ury$Province.State <- NULL
ury$Country.Region <- NULL
ury$Lat <- NULL
ury$Long <- NULL
ury$Date <- NULL
ury$cumulative_cases <- NULL
names(ury)[1] = "URY"
sum(ury$URY)
## VENEZUELA
which(JHU_data[,2] == "Venezuela")
ven <- slice(JHU_data, which(JHU_data[,2] == "Venezuela"):
which(JHU_data[,2] == "Venezuela")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(incident_cases = c(0, diff(cumulative_cases)))
## view(ven)
ven <- data.frame(ven, dates_JHU)
ven$Province.State <- NULL
ven$Country.Region <- NULL
ven$Lat <- NULL
ven$Long <- NULL
ven$Date <- NULL
ven$cumulative_cases <- NULL
names(ven)[1] = "VEN"
sum(ven$VEN)
# Combine Datasets
covid19 <- merge(uk, esp, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, ita, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, arg, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, bol, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, bra, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, chl, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, col, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, ecu, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, pry, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, per, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, ury, by = "dates_JHU", all = TRUE)
covid19 <- merge(covid19, ven, by = "dates_JHU", all = TRUE)
## Slice Data
covid19 <- slice(covid19, 10:nrow(covid19))
## Export CSV
write.csv(covid19,'data_covid19.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment