-
-
Save 3dgiordano/0c239dadba452a0ab453cf2a99d7cf87 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
csse_loc_need_rename = { | |
# JHU Global | |
"Réunion": "Reunion", | |
"St Martin": "Saint Martin (French part)", | |
"Sint Maarten": "Sint Maarten (Dutch part)", | |
"Falkland Islands (Malvinas)": "Falkland Islands", | |
"Saint Helena, Ascension and Tristan da Cunha": "Saint Helena", | |
"Bonaire, Sint Eustatius and Saba": "Bonaire Sint Eustatius and Saba", | |
"Macau": "Macao", | |
"Saint Barthelemy": "Saint Barthélemy", | |
# JHU US | |
"United States Virgin Islands": "Virgin Islands", | |
} | |
def jhu_country_standardized(): | |
csse_loc_renames = pd.read_csv( | |
"https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/jhu/jhu_country_standardized.csv", | |
keep_default_na=False).rename( | |
columns={"Country": "CSSE", "Our World In Data Name": "location"} | |
) | |
csse_loc_renames = csse_loc_renames[csse_loc_renames["CSSE"] != csse_loc_renames["location"]] | |
csse_loc_renames = {**csse_loc_renames, **csse_loc_need_rename} | |
return csse_loc_renames | |
def get_owid_iso(): | |
return pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/iso/iso.csv") | |
def get_owid_cov_data(): | |
return pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv") | |
def get_csses_confirmed(): | |
csse = pd.read_csv( | |
"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" + | |
"csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") | |
# Relabel as 'International' | |
csse.loc[csse["Country/Region"].isin(["Diamond Princess", "MS Zaandam"]), "Country/Region"] = "International" | |
return csse | |
def get_csses_us_confirmed(): | |
csse = pd.read_csv( | |
"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" + | |
"csse_covid_19_time_series/time_series_covid19_confirmed_US.csv") | |
return csse | |
def get_owid_vax(): | |
return pd.read_csv( | |
"https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/locations.csv") | |
def main(): | |
csse_loc_renames = jhu_country_standardized() | |
ctry_region_replace = dict(zip(csse_loc_renames["CSSE"].tolist(), csse_loc_renames["location"].tolist())) | |
owid_iso = get_owid_iso() | |
cuss_owid_iso_counties = owid_iso[owid_iso['iso_code'].str.startswith('OWID_')]["location"].tolist() | |
owid_vax = get_owid_vax() | |
csses_confirmed = get_csses_confirmed() | |
csses_confirmed["Province/State"].replace(ctry_region_replace, inplace=True) | |
csses_confirmed["Country/Region"].replace(ctry_region_replace, inplace=True) | |
csses_us_confirmed = get_csses_us_confirmed() | |
csses_us_confirmed["Province_State"].replace(ctry_region_replace, inplace=True) | |
owid_cov_data = get_owid_cov_data() | |
countries_in_csse = set(csses_confirmed["Province/State"].tolist()).intersection(owid_iso["location"].tolist()) | |
countries_in_csse_us = set(csses_us_confirmed["Province_State"].tolist()).intersection( | |
owid_iso["location"].tolist()) | |
countries_with_vax_data = countries_in_csse.intersection(owid_vax["location"].tolist()) | |
countries_with_vax_data_us = countries_in_csse_us.intersection(owid_vax["location"].tolist()) | |
csse_ctry_list = csses_confirmed["Country/Region"].tolist() | |
countries_whitout_data = set(owid_iso["location"].tolist()).difference( | |
set(csses_confirmed["Province/State"].tolist())) | |
countries_whitout_data = set(countries_whitout_data).difference(set(csse_ctry_list)) | |
countries_no_match = set(csse_ctry_list).difference(set(owid_iso["location"].tolist())) | |
countries_with_data = set(owid_cov_data[owid_cov_data["total_cases"] > 0]["location"].tolist()) | |
countries_renamed = [*csse_loc_need_rename.values()] | |
print("\nJHU Province/State that is a County and not is on OWID") | |
for c in sorted(countries_in_csse): | |
if c not in countries_with_data: | |
print(f" {c} - VAX Data:{c in countries_with_vax_data}") | |
print("\nJHU US Province/State that is a County and not is on OWID") | |
for c in sorted(countries_in_csse_us): | |
if c not in countries_with_data: | |
print(f" {c} - VAX Data:{c in countries_with_vax_data_us}") | |
print("\nJHU Countries that needs renames") | |
for c in sorted(countries_renamed): | |
if c not in countries_with_data: | |
from_rename = [k for k, v in csse_loc_need_rename.items() if v == c][0] | |
print(f" {from_rename} -> {c}") | |
print("\nCountries in JHU that not matches (move to international?)") | |
for c in sorted(countries_no_match): | |
print(f" {c}") | |
print("\nSolved:") | |
for c in sorted(countries_in_csse): | |
if c in countries_with_data: | |
print(f" {c}") | |
print("\nOWID Countries without data in JHU (Verify if is needed to rename or add to location list)") | |
for c in sorted(countries_whitout_data): | |
if c not in csse_loc_need_rename and c not in cuss_owid_iso_counties and c not in countries_renamed and \ | |
c not in countries_in_csse_us: | |
print(f" {c}") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment