Created
July 26, 2020 15:38
-
-
Save vnegi10/962855cace3f4dbd86b30d7408068591 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load and compile Julia packages for the current session | |
# This might take a while, needs to be done only once | |
# Recommended to list and compile all packages at once | |
using Distributions | |
using StatsBase | |
using CSV | |
using DataFrames | |
using StatsPlots | |
using Plots | |
gr() | |
using Dates | |
using PlotThemes | |
function find_country(data_df,country::String) | |
data_df_new = data_df[ismissing.(data_df[!,Symbol("Province/State")]), :] # keep only rows with missing entry for "Province/State", total numbers for countries can be read this way | |
loc = findfirst(data_df_new[!,Symbol("Country/Region")] .== country) # find the index of the row containing country | |
return data_df_new[loc,:] # select the matching row | |
end | |
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv", | |
"covid_19_global_data.csv"); # source of the data | |
data_df = DataFrame!(CSV.File("covid_19_global_data.csv")); # read data from file into a DataFrame | |
############################# Plot time series data for multiple countries #################################### | |
date_strings = names(data_df)[5:end]; # read dates from the column names | |
format = Dates.DateFormat("m/d/y") # specify given format | |
x = parse.(Date, date_strings, format) .+ Year(2000) # Year(2000) converts year from 0020 to 2020 | |
countries = ["US", "Russia", "Italy", "Iran"]; | |
y = DataFrame() # empty dataframe | |
for country in countries | |
data_dfr = find_country(data_df,country); # returns a dataframe row | |
data_dfr = DataFrame(data_dfr); # convert dataframe row back to a dataframe | |
rows, cols = size(data_dfr); | |
data_dfl = stack(data_dfr, 5:cols); # convert dataframe into long format | |
y[!,Symbol("$country")] = data_dfl[!,:value] | |
end | |
if "India" in names(y) | |
y[!,:India][141] = 286131 # correction for number of cases reported on 10-06-2020 | |
end | |
rows,cols = size(y) | |
gr(size=(900,600)) | |
@df y plot(x, cols(1:cols), | |
label = reshape(names(y),(1,length(names(y)))), | |
xlabel = "Time", | |
ylabel = "Total number of reported cases", | |
xticks = x[1:7:end], | |
xrotation = 45, | |
marker = (:diamond,4), | |
line = (:line, "gray"), | |
legend = :topleft, | |
grid = false, | |
framestyle = :semi, | |
legendfontsize = 9, | |
tickfontsize = 9, | |
formatter = :plain | |
) | |
y.One_million = Array{Union{Missing,Int64},1}(missing,size(y,1)); | |
y.One_million .= 10^6; | |
display(@df y plot!(x, y[!,cols+1], | |
linestyle = :dot, | |
linewidth = 5, | |
color = :red, | |
label = names(y)[cols+1])) | |
y = select!(y, Not([:One_million])); # remove column One_million | |
savefig("Time_series_1.png") | |
############################# Calculate number of daily reported cases #################################### | |
y_tmp = deepcopy(y); # creates an independent copy, changes in y_tmp won't affect y | |
rows,cols = size(y_tmp) | |
dfrows = nrow(y_tmp); | |
name = names(y_tmp); | |
y_daily = similar(y_tmp,dfrows-1); # copy the structure to an empty dataframe with dfrows-1 rows | |
for j = 1:length(name) | |
for i = 1:dfrows-1 | |
y_daily[!,name[j]][i] = y_tmp[!,name[j]][i+1] - y_tmp[!,name[j]][i] # calculate number of daily increase in reported cases by subtracting numbers of previous day | |
end | |
end | |
x_daily = deepcopy(x); | |
popfirst!(x_daily); # remove first entry, daily increase can only be reported from the next date | |
gr(size=(900,600)) | |
display(@df y_daily bar(x_daily, cols(1:cols), | |
label = reshape(names(y),(1,length(names(y)))), | |
xlabel = "Time", | |
ylabel = "Daily number of reported cases", | |
xticks = x[1:7:end], | |
xrotation = 45, | |
legend = :topleft, | |
grid = false, | |
framestyle = :semi, | |
legendfontsize = 9, | |
formatter = :plain)) | |
savefig("Daily_cases_1.png") | |
############################# Find top 5 countries with highest number of confirmed cases #################################### | |
sort!(data_df,Symbol("7/24/20"),rev=true) # sort original DataFrame in descending order based on values in last column (latest date) | |
countries_sort = data_df[1:5,Symbol("Country/Region")] # list of top 5 countries | |
latest_reported_cases = data_df[1:5,Symbol("7/24/20")] # number of reported cases in top 5 countries | |
gr(size=(700,400)) | |
display(bar(countries_sort, latest_reported_cases, | |
orientation = :h, | |
xlabel = "Total number of confirmed cases", | |
ylabel = "Countries", | |
legend = false, | |
color = collect(1:length(countries_sort)), | |
bar_width = 0.50, | |
linecolor = :match, | |
yflip=true, | |
grid = false, | |
framestyle=:semi, | |
formatter = :plain | |
)) | |
savefig("Top_five.png") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment