Last active
March 25, 2020 16:27
-
-
Save matteo-peltarion/18f530f526cf73ae322f9a513f8899cc to your computer and use it in GitHub Desktop.
COVID-19 analysis code 02 - data loading
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
csv_files = glob.glob(DATA_DIR + "/*.csv") | |
# Auxiliary function to extract date from file name | |
def extract_date(file_name): | |
date_str = os.path.basename(file_name)[:-4] | |
date = datetime.strptime(date_str, '%m-%d-%Y').date() | |
return date | |
# Create dataframe | |
world_df = None | |
fields_mapping_dic = { | |
"Province/State":"Province_State", | |
"Country/Region":"Country_Region", | |
"Last Update":"Last_Update", | |
"Confirmed":"Confirmed", | |
"Deaths":"Deaths", | |
"Recovered":"Recovered", | |
"Latitude":"Lat", | |
"Longitude":"Long_" | |
} | |
for csv_file in csv_files: | |
df = pd.read_csv(csv_file) | |
date = extract_date(csv_file) | |
# Data after 23/03/2020 must be adjusted due to change in data format | |
if date >= datetime.strptime("03-23-2020", '%m-%d-%Y').date(): | |
for k, v in fields_mapping_dic.items(): | |
df[k] = df[v] | |
df['Date'] = date | |
if world_df is None: | |
world_df = df | |
else: | |
world_df = pd.concat((world_df, df), ignore_index=True) | |
world_df.tail() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment