ThiagoFPMR/cleaning.py

## cleaning.py
# Defining the functio we'll use to convert the columns to snakecase

def to_snakecase (cols):
  map_dict = {}
  for col in cols:
    map_dict[col] = col.lower().strip().replace(' ', '_')
  return map_dict

# Defining the function we'll use to change the country names to the same format

def normalize_country (data):
  if pc.countries.get(official_name=data):
    return pc.countries.get(official_name=data).name
  elif pc.countries.get(name=data):
    return pc.countries.get(name=data).name
  elif pc.countries.get(alpha_3=data):
    return pc.countries.get(alpha_3=data).name
  elif pc.countries.get(alpha_2=data):
    return pc.countries.get(alpha_2=data).name

# Applying both functions to the COVID-19 Dataset

covid_df.rename(to_snakecase(covid_df.columns), axis=1, inplace=True)
covid_df = covid_df[covid_df.date == '2020-10-12'] # Dropping instances from previous dates
covid_df.drop('date', axis=1, inplace=True)
covid_df.country = covid_df.country.apply(normalize_country)

# Applying both functions to the Human Capital Index Dataset

hci_df.rename(to_snakecase(hci_df.columns), axis=1, inplace=True)
hci_df.rename({'country_name':'country'}, axis=1, inplace=True)
hci_df.country = hci_df.country.apply(normalize_country)
	# Defining the functio we'll use to convert the columns to snakecase

	def to_snakecase (cols):
	map_dict = {}
	for col in cols:
	map_dict[col] = col.lower().strip().replace(' ', '_')
	return map_dict

	# Defining the function we'll use to change the country names to the same format

	def normalize_country (data):
	if pc.countries.get(official_name=data):
	return pc.countries.get(official_name=data).name
	elif pc.countries.get(name=data):
	return pc.countries.get(name=data).name
	elif pc.countries.get(alpha_3=data):
	return pc.countries.get(alpha_3=data).name
	elif pc.countries.get(alpha_2=data):
	return pc.countries.get(alpha_2=data).name

	# Applying both functions to the COVID-19 Dataset

	covid_df.rename(to_snakecase(covid_df.columns), axis=1, inplace=True)
	covid_df = covid_df[covid_df.date == '2020-10-12'] # Dropping instances from previous dates
	covid_df.drop('date', axis=1, inplace=True)
	covid_df.country = covid_df.country.apply(normalize_country)

	# Applying both functions to the Human Capital Index Dataset

	hci_df.rename(to_snakecase(hci_df.columns), axis=1, inplace=True)
	hci_df.rename({'country_name':'country'}, axis=1, inplace=True)
	hci_df.country = hci_df.country.apply(normalize_country)