Vikas Negi vnegi10

## Julia_load_packages.jl
# Load and compile Julia packages for the current session
# This might take a while, needs to be done only once
# Recommended to list and compile all packages at once

using Distributions
using StatsBase
using CSV
using DataFrames
using StatsPlots
using Plots

## Import_data.jl
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
    "covid_19_global_data.csv"); # source of the data

data_df = DataFrame!(CSV.File("covid_19_global_data.csv")); # read data from file into a DataFrame

## Find_country.jl
function find_country(data_df,country::String)
    data_df_new = data_df[ismissing.(data_df[!,Symbol("Province/State")]), :] # keep only rows with missing entry for "Province/State", total numbers for countries can be read this way
    loc = findfirst(data_df_new[!,Symbol("Country/Region")] .== country) # find the index of the row containing country
    return data_df_new[loc,:] # select the matching row
end

## Dates.jl
date_strings = names(data_df)[5:end]; # read dates from the column names
format = Dates.DateFormat("m/d/y")    # specify given format
x = parse.(Date, date_strings, format) .+ Year(2000) # Year(2000) converts year from 0020 to 2020

## Collect_data_for_countries.jl
countries = ["Italy", "Germany", "India", "United Kingdom"];
y = DataFrame() # empty dataframe

for country in countries
    data_dfr = find_country(data_df,country); # returns a dataframe row
    data_dfr = DataFrame(data_dfr);           # convert dataframe row back to a dataframe
    rows, cols = size(data_dfr);
    data_dfl = stack(data_dfr, 5:cols);       # convert dataframe into long format
    y[!,Symbol("$country")] = data_dfl[!,:value]
end

## Plot_countries_time_series.jl
rows,cols = size(y)

gr(size=(900,600))
@df y plot(x, cols(1:cols),
    label =  reshape(names(y),(1,length(names(y)))),
    xlabel = "Time",
    ylabel = "Total number of reported cases",
    xticks = x[1:7:end],
    xrotation = 45,
    marker = (:diamond,4),

## Daily_reported_cases.jl
############################# Calculate number of daily reported cases ####################################


y_tmp = deepcopy(y);               # creates an independent copy, changes in y_tmp won't affect y
rows,cols = size(y_tmp)
dfrows = nrow(y_tmp);
name = names(y_tmp);

y_daily = similar(y_tmp,dfrows-1); # copy the structure to an empty dataframe with dfrows-1 rows

## Top_five.jl
sort!(data_df,Symbol("7/24/20"),rev=true)               # sort original DataFrame in descending order based on values in last column (latest date)
countries_sort = data_df[1:5,Symbol("Country/Region")]  # list of top 5 countries
latest_reported_cases = data_df[1:5,Symbol("7/24/20")]  # number of reported cases in top 5 countries

gr(size=(700,400))
display(bar(countries_sort, latest_reported_cases,
    orientation = :h,
    xlabel = "Total number of confirmed cases",
    ylabel = "Countries",
    legend = false,

## Julia_load_toml.jl
# Download .toml files from GitHub respository, activate the environment
using InstantiateFromURL
github_project("vnegi10/covid19-notebooks-julia", path="", version = "master", force = true)

## Import_more_data.jl
file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv", "covid_19_global_data_recovered.csv" ); # source of the data

data_df_recovered = DataFrame!(CSV.File("covid_19_global_data_recovered.csv")); # read data from file into a DataFrame

file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", "covid_19_global_data_deaths.csv" ); # source of the data

data_df_deaths = DataFrame!(CSV.File("covid_19_global_data_deaths.csv")); # read data from file into a DataFrame
	# Load and compile Julia packages for the current session
	# This might take a while, needs to be done only once
	# Recommended to list and compile all packages at once

	using Distributions
	using StatsBase
	using CSV
	using DataFrames
	using StatsPlots
	using Plots
	file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
	"covid_19_global_data.csv"); # source of the data

	data_df = DataFrame!(CSV.File("covid_19_global_data.csv")); # read data from file into a DataFrame
	function find_country(data_df,country::String)
	data_df_new = data_df[ismissing.(data_df[!,Symbol("Province/State")]), :] # keep only rows with missing entry for "Province/State", total numbers for countries can be read this way
	loc = findfirst(data_df_new[!,Symbol("Country/Region")] .== country) # find the index of the row containing country
	return data_df_new[loc,:] # select the matching row
	end
	date_strings = names(data_df)[5:end]; # read dates from the column names
	format = Dates.DateFormat("m/d/y") # specify given format
	x = parse.(Date, date_strings, format) .+ Year(2000) # Year(2000) converts year from 0020 to 2020
	countries = ["Italy", "Germany", "India", "United Kingdom"];
	y = DataFrame() # empty dataframe

	for country in countries
	data_dfr = find_country(data_df,country); # returns a dataframe row
	data_dfr = DataFrame(data_dfr); # convert dataframe row back to a dataframe
	rows, cols = size(data_dfr);
	data_dfl = stack(data_dfr, 5:cols); # convert dataframe into long format
	y[!,Symbol("$country")] = data_dfl[!,:value]
	end
	rows,cols = size(y)

	gr(size=(900,600))
	@df y plot(x, cols(1:cols),
	label = reshape(names(y),(1,length(names(y)))),
	xlabel = "Time",
	ylabel = "Total number of reported cases",
	xticks = x[1:7:end],
	xrotation = 45,
	marker = (:diamond,4),
	############################# Calculate number of daily reported cases ####################################


	y_tmp = deepcopy(y); # creates an independent copy, changes in y_tmp won't affect y
	rows,cols = size(y_tmp)
	dfrows = nrow(y_tmp);
	name = names(y_tmp);

	y_daily = similar(y_tmp,dfrows-1); # copy the structure to an empty dataframe with dfrows-1 rows
	sort!(data_df,Symbol("7/24/20"),rev=true) # sort original DataFrame in descending order based on values in last column (latest date)
	countries_sort = data_df[1:5,Symbol("Country/Region")] # list of top 5 countries
	latest_reported_cases = data_df[1:5,Symbol("7/24/20")] # number of reported cases in top 5 countries

	gr(size=(700,400))
	display(bar(countries_sort, latest_reported_cases,
	orientation = :h,
	xlabel = "Total number of confirmed cases",
	ylabel = "Countries",
	legend = false,
	# Download .toml files from GitHub respository, activate the environment
	using InstantiateFromURL
	github_project("vnegi10/covid19-notebooks-julia", path="", version = "master", force = true)
	file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv", "covid_19_global_data_recovered.csv" ); # source of the data

	data_df_recovered = DataFrame!(CSV.File("covid_19_global_data_recovered.csv")); # read data from file into a DataFrame

	file = download("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", "covid_19_global_data_deaths.csv" ); # source of the data

	data_df_deaths = DataFrame!(CSV.File("covid_19_global_data_deaths.csv")); # read data from file into a DataFrame