Created
November 14, 2020 22:50
-
-
Save jalbertbowden/6e3d602ded904b9bac00ac97fd819f61 to your computer and use it in GitHub Desktop.
get-cms-nursing.py - automating accessing cms.gov's weekly nursing home dataset and chopping it up into digestable csvs: by state, and by state + date.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os | |
import csv | |
nursing_home_dataset_csv_url = 'https://data.cms.gov/api/views/s2uc-8wxp/rows.csv?accessType=DOWNLOAD' | |
nursing_home_dataset_csv_file = 'COVID-19_Nursing_Home_Dataset.csv' | |
file_base = 'covid-19-nursing-home-dataset-week-ending-' | |
file_date = '2020-11-01' | |
file_format = '.csv' | |
file_name = file_date + '/' + file_base + file_date + file_format | |
directory_current = os.getcwd() | |
def get_cms_file_name(file_date, nursing_home_dataset_csv_file): | |
current_nursing_home_dataset_csv_file = nursing_home_dataset_csv_file.split('.') | |
print(current_nursing_home_dataset_csv_file[0]) | |
get_cms_file_name(file_date, nursing_home_dataset_csv_file) | |
def make_date_directory(directory_current, file_date): | |
mode = 0755 | |
path = os.path.join(directory_current, file_date) | |
try: | |
os.mkdir(path, mode) | |
except OSError as error: | |
print("Directory '% s' already exists!" % file_date) | |
print(error) | |
else: | |
print("Directory '% s' created" % file_date) | |
def get_latest_cms_data(nursing_home_dataset_csv_url, nursing_home_dataset_csv_file, nursing_home_dataset_csv_file_local): | |
print "Downloading file:%s" % nursing_home_dataset_csv_file | |
r = requests.get(nursing_home_dataset_csv_url) | |
with open(nursing_home_dataset_csv_file_local, 'wb') as code: | |
code.write(r.content) | |
print "File:%s" % nursing_home_dataset_csv_file_local + " download complete!" | |
def get_local_file_name(file_date, nursing_home_dataset_csv_file): | |
local_nursing_home_dataset_csv_file = file_date + '/' + nursing_home_dataset_csv_file | |
return local_nursing_home_dataset_csv_file | |
def make_cms_data_clone(file_date, nursing_home_dataset_csv_file, file_name): | |
print('cloning cms dataset!') | |
csv_rows = [] | |
csv_headers = [] | |
with open(nursing_home_dataset_csv_file_local) as fin: | |
csvin = csv.DictReader(fin) | |
for header in csvin.fieldnames: | |
csv_headers.append(header) | |
for row in csvin: | |
csv_rows.append(row) | |
with open(file_name, 'w') as fout: | |
dw = csv.DictWriter(fout, fieldnames=csv_headers) | |
dw.writeheader() | |
for csv_row in csv_rows: | |
dw.writerow(csv_row) | |
print('cms data clone successful. new file: ' + file_name) | |
states_array = [] | |
def make_cms_state_csvs(file_name): | |
file_name_states = file_name.split('.') | |
file_name_states_path = file_name_states[0] + '-' | |
with open(file_name) as fin: | |
csvin = csv.DictReader(fin) | |
outputs = {} | |
for row in csvin: | |
cat = row['Provider State'] | |
# print(cat) | |
if cat not in outputs: | |
print('new state: ', cat) | |
states_array.append(cat) | |
state_file_name = file_name_states_path + cat | |
print(state_file_name) | |
fout = open(file_name_states_path + '{}.csv'.format(cat), 'w') | |
dw = csv.DictWriter(fout, fieldnames=csvin.fieldnames) | |
dw.writeheader() | |
outputs[cat] = fout, dw | |
outputs[cat][1].writerow(row) | |
for fout, _ in outputs.values(): | |
fout.close() | |
print('cms data split by state complete!') | |
def make_cms_state_date_csvs(file_name, states_array): | |
print('make cms state date csvs: ', file_name) | |
state_file_name = file_name.split('.') | |
state_file_name_path = state_file_name[0] + '-' | |
print('state file name path: ', state_file_name_path) | |
counter = 0 | |
for state in states_array: | |
print('counter: ', counter) | |
print('state: ', state) | |
print(state_file_name_path) | |
print(state_file_name_path + state) | |
counter = counter + 1 | |
state_file_path = state_file_name_path + state + '.csv' | |
print('state file name: ', state_file_path) | |
with open(state_file_path) as fin: | |
csvin = csv.DictReader(fin) | |
outputs = {} | |
for row in csvin: | |
cat = row['Week Ending'] | |
cat = cat.replace('/', '-') | |
date_arr = cat.split('-') | |
file_text_date = date_arr[2] + '-' + date_arr[0] + '-' + date_arr[1] | |
print('corrected date: ', file_text_date) | |
if cat not in outputs: | |
state_file_name_out = state_file_name_path + state + '-' + cat | |
print(state_file_name_out) | |
fout = open(state_file_name_path + state + '-{}.csv'.format(file_text_date), 'w') | |
dw = csv.DictWriter(fout, fieldnames=csvin.fieldnames) | |
dw.writeheader() | |
outputs[cat] = fout, dw | |
outputs[cat][1].writerow(row) | |
for fout, _ in outputs.values(): | |
fout.close() | |
print('cms state data split by date complete!') | |
# make directory for latest dataset's date | |
make_date_directory(directory_current, file_date) | |
# get local cms data file name/path | |
nursing_home_dataset_csv_file_local = get_local_file_name(file_date, nursing_home_dataset_csv_file) | |
# download latest cms.gov dataset csv | |
get_latest_cms_data(nursing_home_dataset_csv_url, nursing_home_dataset_csv_file, nursing_home_dataset_csv_file_local) | |
# make clone of cms data | |
make_cms_data_clone(file_date, nursing_home_dataset_csv_file, file_name) | |
# chop cms csv into individual state csvs | |
make_cms_state_csvs(file_name) | |
# chop cms state csvs into individual state by date csvs | |
make_cms_state_date_csvs(file_name, states_array) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
data
The only maintenance this requires is updating the date in the script for the new week.
Note: the date is from the link above, and not the date on the dataset's landing page, because the landing page's date is the date published on the open data portal, not the date of the data collection.