Skip to content

Instantly share code, notes, and snippets.

@vnegi10
Created July 26, 2020 15:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save vnegi10/962855cace3f4dbd86b30d7408068591 to your computer and use it in GitHub Desktop.
Save vnegi10/962855cace3f4dbd86b30d7408068591 to your computer and use it in GitHub Desktop.
# Load and compile Julia packages for the current session
# This might take a while, needs to be done only once
# Recommended to list and compile all packages at once
using Distributions
using StatsBase
using CSV
using DataFrames
using StatsPlots
using Plots
gr()
using Dates
using PlotThemes
function find_country(data_df,country::String)
data_df_new = data_df[ismissing.(data_df[!,Symbol("Province/State")]), :] # keep only rows with missing entry for "Province/State", total numbers for countries can be read this way
loc = findfirst(data_df_new[!,Symbol("Country/Region")] .== country) # find the index of the row containing country
return data_df_new[loc,:] # select the matching row
end
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
"covid_19_global_data.csv"); # source of the data
data_df = DataFrame!(CSV.File("covid_19_global_data.csv")); # read data from file into a DataFrame
############################# Plot time series data for multiple countries ####################################
date_strings = names(data_df)[5:end]; # read dates from the column names
format = Dates.DateFormat("m/d/y") # specify given format
x = parse.(Date, date_strings, format) .+ Year(2000) # Year(2000) converts year from 0020 to 2020
countries = ["US", "Russia", "Italy", "Iran"];
y = DataFrame() # empty dataframe
for country in countries
data_dfr = find_country(data_df,country); # returns a dataframe row
data_dfr = DataFrame(data_dfr); # convert dataframe row back to a dataframe
rows, cols = size(data_dfr);
data_dfl = stack(data_dfr, 5:cols); # convert dataframe into long format
y[!,Symbol("$country")] = data_dfl[!,:value]
end
if "India" in names(y)
y[!,:India][141] = 286131 # correction for number of cases reported on 10-06-2020
end
rows,cols = size(y)
gr(size=(900,600))
@df y plot(x, cols(1:cols),
label = reshape(names(y),(1,length(names(y)))),
xlabel = "Time",
ylabel = "Total number of reported cases",
xticks = x[1:7:end],
xrotation = 45,
marker = (:diamond,4),
line = (:line, "gray"),
legend = :topleft,
grid = false,
framestyle = :semi,
legendfontsize = 9,
tickfontsize = 9,
formatter = :plain
)
y.One_million = Array{Union{Missing,Int64},1}(missing,size(y,1));
y.One_million .= 10^6;
display(@df y plot!(x, y[!,cols+1],
linestyle = :dot,
linewidth = 5,
color = :red,
label = names(y)[cols+1]))
y = select!(y, Not([:One_million])); # remove column One_million
savefig("Time_series_1.png")
############################# Calculate number of daily reported cases ####################################
y_tmp = deepcopy(y); # creates an independent copy, changes in y_tmp won't affect y
rows,cols = size(y_tmp)
dfrows = nrow(y_tmp);
name = names(y_tmp);
y_daily = similar(y_tmp,dfrows-1); # copy the structure to an empty dataframe with dfrows-1 rows
for j = 1:length(name)
for i = 1:dfrows-1
y_daily[!,name[j]][i] = y_tmp[!,name[j]][i+1] - y_tmp[!,name[j]][i] # calculate number of daily increase in reported cases by subtracting numbers of previous day
end
end
x_daily = deepcopy(x);
popfirst!(x_daily); # remove first entry, daily increase can only be reported from the next date
gr(size=(900,600))
display(@df y_daily bar(x_daily, cols(1:cols),
label = reshape(names(y),(1,length(names(y)))),
xlabel = "Time",
ylabel = "Daily number of reported cases",
xticks = x[1:7:end],
xrotation = 45,
legend = :topleft,
grid = false,
framestyle = :semi,
legendfontsize = 9,
formatter = :plain))
savefig("Daily_cases_1.png")
############################# Find top 5 countries with highest number of confirmed cases ####################################
sort!(data_df,Symbol("7/24/20"),rev=true) # sort original DataFrame in descending order based on values in last column (latest date)
countries_sort = data_df[1:5,Symbol("Country/Region")] # list of top 5 countries
latest_reported_cases = data_df[1:5,Symbol("7/24/20")] # number of reported cases in top 5 countries
gr(size=(700,400))
display(bar(countries_sort, latest_reported_cases,
orientation = :h,
xlabel = "Total number of confirmed cases",
ylabel = "Countries",
legend = false,
color = collect(1:length(countries_sort)),
bar_width = 0.50,
linecolor = :match,
yflip=true,
grid = false,
framestyle=:semi,
formatter = :plain
))
savefig("Top_five.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment