Skip to content

Instantly share code, notes, and snippets.

@granttremblay
Created January 22, 2020 03:20
Show Gist options
  • Save granttremblay/7fd4fc40a9e06ad668a1e0c8d38ccd2c to your computer and use it in GitHub Desktop.
Save granttremblay/7fd4fc40a9e06ad668a1e0c8d38ccd2c to your computer and use it in GitHub Desktop.
Make Elise's Excel Spreadsheets
#!/usr/bin/env python3
'''
A script for my cutie by her boy
'''
import numpy as np
import pandas as pd
import xlrd
savepath = '/Users/grant/Desktop/'
# Point to the Excel file here:
excel_file = 'data/Study_forG.xlsx'
# Read in the individual Excel sheets as Pandas dataframes
# We'll do this separately to be explicit, even though it's verbose
all_patients_sheet = pd.read_excel(excel_file, sheet_name='Demographic')
all_a1c_sheet = pd.read_excel(excel_file, sheet_name='A1C')
hispanic_patients_sheet = pd.read_excel(
excel_file, sheet_name='Hispanic Patients')
outpatient_visit_sheet = pd.read_excel(
excel_file, sheet_name='Outpatient visit')
providers_sheet = pd.read_excel(
excel_file, sheet_name='Endocrine Treating providers')
nutrition_sheet = pd.read_excel(excel_file, sheet_name='BCH nutrition visit')
ed_visits_sheet = pd.read_excel(excel_file, sheet_name='ED Visits')
inpatient_visit_sheet = pd.read_excel(excel_file, sheet_name='Inpatient visit')
hispanic_patient_mrns = hispanic_patients_sheet['MRN'].to_frame()
# Make average A1C for every MRN in the A1C sheet, then mask based on membership in Hispanic MRN list
all_average_a1cs = all_a1c_sheet.groupby('MRN').mean().reset_index()
hispanic_average_a1c = all_average_a1cs[all_average_a1cs['MRN'].isin(
hispanic_patient_mrns['MRN'])]
non_hispanic_average_a1c = all_average_a1cs[np.logical_not(
all_average_a1cs['MRN'].isin(hispanic_patient_mrns['MRN']))]
# Number of Outpatient visits
outpatient_visits = outpatient_visit_sheet.groupby(
'MRN').size().reset_index(name='Number of Outpatient Visits by MRN')
hispanic_outpatient_visits = outpatient_visits[outpatient_visits['MRN'].isin(hispanic_patient_mrns['MRN'])]
non_hispanic_outpatient_visits = outpatient_visits[np.logical_not(
outpatient_visits['MRN'].isin(hispanic_patient_mrns['MRN']))]
# Types of Providers
providers_all = providers_sheet.groupby(
['MDNE', 'NAME_SUFFIX']).size().reset_index(name='Number of Visits')
md_visits = providers_all[providers_all.values == "MD"]
dne_visits = providers_all[providers_all.values == "DNE"]
sw_visits = providers_all[providers_all.values == "SW"]
hispanic_md_visits = md_visits[md_visits['MDNE'].isin(
hispanic_patient_mrns['MRN'])]
hispanic_dne_visits = dne_visits[dne_visits['MDNE'].isin(
hispanic_patient_mrns['MRN'])]
hispanic_sw_visits = sw_visits[sw_visits['MDNE'].isin(
hispanic_patient_mrns['MRN'])]
non_hispanic_md_visits = md_visits[np.logical_not(
md_visits['MDNE'].isin(hispanic_patient_mrns['MRN']))]
non_hispanic_dne_visits = dne_visits[np.logical_not(
dne_visits['MDNE'].isin(hispanic_patient_mrns['MRN']))]
non_hispanic_sw_visits = sw_visits[np.logical_not(
sw_visits['MDNE'].isin(hispanic_patient_mrns['MRN']))]
# Nutrition visits
nutrition_visits = nutrition_sheet.groupby('MRN').size().reset_index(
name='Number of Nutrition Visits by MRN')
hispanic_nutrition_visits = nutrition_visits[nutrition_visits['MRN'].isin(
hispanic_patient_mrns['MRN'])]
non_hispanic_nutrition_visits = nutrition_visits[np.logical_not(
nutrition_visits['MRN'].isin(hispanic_patient_mrns['MRN']))]
# ED visits
ed_visits = ed_visits_sheet.groupby('MRN').size().reset_index(
name='Number of ED Visits by MRN')
hispanic_ed_visits = ed_visits[ed_visits['MRN'].isin(
hispanic_patient_mrns['MRN'])]
non_hispanic_ed_visits = ed_visits[np.logical_not(
ed_visits['MRN'].isin(hispanic_patient_mrns['MRN']))]
# Inpatient visits
inpatient_visits = inpatient_visit_sheet.groupby('MRN').size(
).reset_index(name='Number of Inpatient Visits by MRN')
hispanic_inpatient_visits = inpatient_visits[inpatient_visits['MRN'].isin(
hispanic_patient_mrns['MRN'])]
non_hispanic_inpatient_visits = inpatient_visits[np.logical_not(
inpatient_visits['MRN'].isin(hispanic_patient_mrns['MRN']))]
sheets_to_save = {'Hispanic Average A1C': hispanic_average_a1c.rename(columns={"MRN": "Hispanic MRN", "RESULT_VAL": "Hispanic Mean A1C"}),
'Non-Hispanic Average A1C': non_hispanic_average_a1c.rename(columns={"MRN": "Non-Hispanic MRN", "RESULT_VAL": "Non-Hispanic Mean A1C"}),
'Hispanic Outpatient Visits': hispanic_outpatient_visits.rename(columns={"MRN": "Hispanic MRN", "Number of Outpatient Visits by MRN": "Number of Outpatient Visits"}),
'Non-Hispanic Outpatient Visits': non_hispanic_outpatient_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of Outpatient Visits by MRN": "Number of Outpatient Visits"}),
'Hispanic MD Visits': hispanic_md_visits.rename(columns={"MDNE": "Hispanic MRN", "NAME_SUFFIX": "Provider Type"}),
'Non-Hispanic MD Visits': non_hispanic_md_visits.rename(columns={"MDNE": "Non-Hispanic MRN", "NAME_SUFFIX": "Provider Type"}),
'Hispanic DNE Visits': hispanic_dne_visits.rename(columns={"MDNE": "Hispanic MRN", "NAME_SUFFIX": "Provider Type"}),
'Non-Hispanic DNE Visits': non_hispanic_dne_visits.rename(columns={"MDNE": "Non-Hispanic MRN", "NAME_SUFFIX": "Provider Type"}),
'Hispanic SW Visits': hispanic_sw_visits.rename(columns={"MDNE": "Hispanic MRN", "NAME_SUFFIX": "Provider Type"}),
'Non-Hispanic SW Visits': non_hispanic_sw_visits.rename(columns={"MDNE": "Non-Hispanic MRN", "NAME_SUFFIX": "Provider Type"}),
'Hispanic Nutrition Visits': hispanic_nutrition_visits.rename(columns={"MRN": "Hispanic MRN", "Number of Nutrition Visits by MRN": "Number of Nutrition Visits"}),
'Non-Hispanic Nutrition Visits': non_hispanic_nutrition_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of Nutrition Visits by MRN": "Number of Nutrition Visits"}),
'Hispanic ED Visits': hispanic_ed_visits.rename(columns={"MRN": "Hispanic MRN", "Number of ED Visits by MRN": "Number of ED Visits"}),
'Non-Hispanic_Outpatient': non_hispanic_ed_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of ED Visits by MRN": "Number of ED Visits"}),
'Hispanic Inpatient Visits': hispanic_inpatient_visits.rename(columns={"MRN": "Hispanic MRN", "Number of Inpatient Visits by MRN": "Number of Inpatient Visits"}),
'Non-Hispanic Inpatient Visits': non_hispanic_inpatient_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of Inpatient Visits by MRN": "Number of Inpatient Visits"})}
with pd.ExcelWriter(savepath + 'data_for_E.xlsx') as writer: # doctest: +SKIP
for item in sheets_to_save:
sheets_to_save[item].to_excel(writer, sheet_name=item, index=False)
print("Saved {}".format(item))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment