Skip to content

Instantly share code, notes, and snippets.

@3dgiordano
Last active December 17, 2021 06:31
Show Gist options
  • Save 3dgiordano/0c239dadba452a0ab453cf2a99d7cf87 to your computer and use it in GitHub Desktop.
Save 3dgiordano/0c239dadba452a0ab453cf2a99d7cf87 to your computer and use it in GitHub Desktop.
import pandas as pd
csse_loc_need_rename = {
# JHU Global
"Réunion": "Reunion",
"St Martin": "Saint Martin (French part)",
"Sint Maarten": "Sint Maarten (Dutch part)",
"Falkland Islands (Malvinas)": "Falkland Islands",
"Saint Helena, Ascension and Tristan da Cunha": "Saint Helena",
"Bonaire, Sint Eustatius and Saba": "Bonaire Sint Eustatius and Saba",
"Macau": "Macao",
"Saint Barthelemy": "Saint Barthélemy",
# JHU US
"United States Virgin Islands": "Virgin Islands",
}
def jhu_country_standardized():
csse_loc_renames = pd.read_csv(
"https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/jhu/jhu_country_standardized.csv",
keep_default_na=False).rename(
columns={"Country": "CSSE", "Our World In Data Name": "location"}
)
csse_loc_renames = csse_loc_renames[csse_loc_renames["CSSE"] != csse_loc_renames["location"]]
csse_loc_renames = {**csse_loc_renames, **csse_loc_need_rename}
return csse_loc_renames
def get_owid_iso():
return pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/iso/iso.csv")
def get_owid_cov_data():
return pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv")
def get_csses_confirmed():
csse = pd.read_csv(
"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" +
"csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
# Relabel as 'International'
csse.loc[csse["Country/Region"].isin(["Diamond Princess", "MS Zaandam"]), "Country/Region"] = "International"
return csse
def get_csses_us_confirmed():
csse = pd.read_csv(
"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" +
"csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
return csse
def get_owid_vax():
return pd.read_csv(
"https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/locations.csv")
def main():
csse_loc_renames = jhu_country_standardized()
ctry_region_replace = dict(zip(csse_loc_renames["CSSE"].tolist(), csse_loc_renames["location"].tolist()))
owid_iso = get_owid_iso()
cuss_owid_iso_counties = owid_iso[owid_iso['iso_code'].str.startswith('OWID_')]["location"].tolist()
owid_vax = get_owid_vax()
csses_confirmed = get_csses_confirmed()
csses_confirmed["Province/State"].replace(ctry_region_replace, inplace=True)
csses_confirmed["Country/Region"].replace(ctry_region_replace, inplace=True)
csses_us_confirmed = get_csses_us_confirmed()
csses_us_confirmed["Province_State"].replace(ctry_region_replace, inplace=True)
owid_cov_data = get_owid_cov_data()
countries_in_csse = set(csses_confirmed["Province/State"].tolist()).intersection(owid_iso["location"].tolist())
countries_in_csse_us = set(csses_us_confirmed["Province_State"].tolist()).intersection(
owid_iso["location"].tolist())
countries_with_vax_data = countries_in_csse.intersection(owid_vax["location"].tolist())
countries_with_vax_data_us = countries_in_csse_us.intersection(owid_vax["location"].tolist())
csse_ctry_list = csses_confirmed["Country/Region"].tolist()
countries_whitout_data = set(owid_iso["location"].tolist()).difference(
set(csses_confirmed["Province/State"].tolist()))
countries_whitout_data = set(countries_whitout_data).difference(set(csse_ctry_list))
countries_no_match = set(csse_ctry_list).difference(set(owid_iso["location"].tolist()))
countries_with_data = set(owid_cov_data[owid_cov_data["total_cases"] > 0]["location"].tolist())
countries_renamed = [*csse_loc_need_rename.values()]
print("\nJHU Province/State that is a County and not is on OWID")
for c in sorted(countries_in_csse):
if c not in countries_with_data:
print(f" {c} - VAX Data:{c in countries_with_vax_data}")
print("\nJHU US Province/State that is a County and not is on OWID")
for c in sorted(countries_in_csse_us):
if c not in countries_with_data:
print(f" {c} - VAX Data:{c in countries_with_vax_data_us}")
print("\nJHU Countries that needs renames")
for c in sorted(countries_renamed):
if c not in countries_with_data:
from_rename = [k for k, v in csse_loc_need_rename.items() if v == c][0]
print(f" {from_rename} -> {c}")
print("\nCountries in JHU that not matches (move to international?)")
for c in sorted(countries_no_match):
print(f" {c}")
print("\nSolved:")
for c in sorted(countries_in_csse):
if c in countries_with_data:
print(f" {c}")
print("\nOWID Countries without data in JHU (Verify if is needed to rename or add to location list)")
for c in sorted(countries_whitout_data):
if c not in csse_loc_need_rename and c not in cuss_owid_iso_counties and c not in countries_renamed and \
c not in countries_in_csse_us:
print(f" {c}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment