Skip to content

Instantly share code, notes, and snippets.

# Load and compile Julia packages for the current session
# This might take a while, needs to be done only once
# Recommended to list and compile all packages at once
using Distributions
using StatsBase
using CSV
using DataFrames
using StatsPlots
using Plots
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
"covid_19_global_data.csv"); # source of the data
data_df = DataFrame!(CSV.File("covid_19_global_data.csv")); # read data from file into a DataFrame
function find_country(data_df,country::String)
data_df_new = data_df[ismissing.(data_df[!,Symbol("Province/State")]), :] # keep only rows with missing entry for "Province/State", total numbers for countries can be read this way
loc = findfirst(data_df_new[!,Symbol("Country/Region")] .== country) # find the index of the row containing country
return data_df_new[loc,:] # select the matching row
end
date_strings = names(data_df)[5:end]; # read dates from the column names
format = Dates.DateFormat("m/d/y") # specify given format
x = parse.(Date, date_strings, format) .+ Year(2000) # Year(2000) converts year from 0020 to 2020
countries = ["Italy", "Germany", "India", "United Kingdom"];
y = DataFrame() # empty dataframe
for country in countries
data_dfr = find_country(data_df,country); # returns a dataframe row
data_dfr = DataFrame(data_dfr); # convert dataframe row back to a dataframe
rows, cols = size(data_dfr);
data_dfl = stack(data_dfr, 5:cols); # convert dataframe into long format
y[!,Symbol("$country")] = data_dfl[!,:value]
end
rows,cols = size(y)
gr(size=(900,600))
@df y plot(x, cols(1:cols),
label = reshape(names(y),(1,length(names(y)))),
xlabel = "Time",
ylabel = "Total number of reported cases",
xticks = x[1:7:end],
xrotation = 45,
marker = (:diamond,4),
############################# Calculate number of daily reported cases ####################################
y_tmp = deepcopy(y); # creates an independent copy, changes in y_tmp won't affect y
rows,cols = size(y_tmp)
dfrows = nrow(y_tmp);
name = names(y_tmp);
y_daily = similar(y_tmp,dfrows-1); # copy the structure to an empty dataframe with dfrows-1 rows
sort!(data_df,Symbol("7/24/20"),rev=true) # sort original DataFrame in descending order based on values in last column (latest date)
countries_sort = data_df[1:5,Symbol("Country/Region")] # list of top 5 countries
latest_reported_cases = data_df[1:5,Symbol("7/24/20")] # number of reported cases in top 5 countries
gr(size=(700,400))
display(bar(countries_sort, latest_reported_cases,
orientation = :h,
xlabel = "Total number of confirmed cases",
ylabel = "Countries",
legend = false,
# Download .toml files from GitHub respository, activate the environment
using InstantiateFromURL
github_project("vnegi10/covid19-notebooks-julia", path="", version = "master", force = true)
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv", "covid_19_global_data_recovered.csv" ); # source of the data
data_df_recovered = DataFrame!(CSV.File("covid_19_global_data_recovered.csv")); # read data from file into a DataFrame
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", "covid_19_global_data_deaths.csv" ); # source of the data
data_df_deaths = DataFrame!(CSV.File("covid_19_global_data_deaths.csv")); # read data from file into a DataFrame