Created
March 10, 2019 21:33
-
-
Save erikgregorywebb/87b82b862e537b0998ae7ebbd2a8d4ec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# data source: https://health.data.ny.gov/Health/Hospital-Maternity-Information-Beginning-2008/net3-iygw | |
library(tidyverse) | |
library(scales) | |
library(ggplot2) | |
library(plotly) | |
library(RColorBrewer) | |
# import | |
setwd("~/Documents/Python/hospitals/Data") | |
hospitals = read.csv('Hospital_Maternity_Information__Beginning_2008.csv', stringsAsFactors = F) | |
### EXPLORE ### | |
# categories | |
categories = hospitals %>% | |
filter(Hospital.County != 'Statewide') %>% | |
distinct(Category, Denominator, Measure.Name) %>% | |
select(Category, Denominator, Measure = Measure.Name) %>% arrange(Category) | |
statewide_categories = hospitals %>% | |
filter(Hospital.County == 'Statewide') %>% | |
distinct(Category, Denominator, Measure.Name) %>% | |
select(Category, Denominator, Measure = Measure.Name) %>% arrange(Category) | |
# differences | |
dplyr::setdiff(categories$Measure, statewide_categories$Measure) | |
print(paste('categories: ', nrow(categories), ', statewide categoires: ', nrow(statewide_categories), sep = '')) | |
# hospitals per county | |
hospitals %>% | |
distinct(Hospital.County, Hospital.Name) %>% | |
select(County = Hospital.County, Hospital = Hospital.Name) %>% | |
group_by(County) %>% count(sort = T) %>% | |
head(10) %>% | |
ggplot(., aes(x = reorder(County, n), y = n, text = County)) + | |
geom_col(show.legend = FALSE, fill = 'dodgerblue4') + | |
coord_flip() + | |
theme_minimal() + | |
scale_x_discrete() + | |
labs(x = '', | |
y = 'Number of Hospitals', | |
title = 'Number of Hospitals per County, Top 10') | |
### VISUALIZE ### | |
# Where are the most babies born? | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Total Births', | |
Year == 2016) %>% | |
ggplot(., aes(x = reorder(Hospital.Name, Count), y = Count)) + | |
geom_bar(stat = 'identity', col = 'dodgerblue4', fill = 'dodgerblue4') + | |
scale_y_continuous(labels = comma) + | |
coord_flip() + | |
labs(title = 'Number of Births by Hospital in Westchester County, 2016') + | |
xlab('') + ylab('Number of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Total Births') %>% | |
group_by(Hospital.Name, Year) %>% | |
summarise(Total = sum(Count)) %>% | |
ggplot(., aes(x = Year, y = Total)) + | |
geom_smooth(method = 'lm', color = 'black', size = .5, se = F, linetype = 3) + | |
geom_line(color = 'dodgerblue4', size = 1) + | |
scale_y_continuous(labels = comma) + | |
theme_minimal() + | |
facet_wrap(~Hospital.Name) + | |
labs(title = 'Number of Births by Hospital in Westchester County, 2008-16') | |
# Where are births attended by a midwife? | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Attended by Licensed Midwife', | |
Year == 2016) %>% | |
ggplot(., aes(x = reorder(Hospital.Name, Percent), y = Percent)) + | |
geom_bar(stat = 'identity', col = 'dodgerblue4', fill = 'dodgerblue4') + | |
coord_flip() + | |
labs(title = 'Percentage of Births Attended by Licensed Midwife by Hospital in 2016', | |
subtitle = 'Westchester County') + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
# Trend of midwife-assisted births over time | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Attended by Licensed Midwife') %>% | |
filter(Hospital.Name == 'Hudson Valley Hospital Center' | | |
Hospital.Name == 'Phelps Memorial Hospital Assn' | | |
Hospital.Name == 'White Plains Hospital Center') %>% | |
group_by(Hospital.Name, Year) %>% | |
ggplot(., aes(x = Year)) + | |
geom_line(aes(y = Percent), size = 2, color = 'deepskyblue4') + | |
geom_smooth(aes(y = Percent), method = 'lm', se = FALSE, color = 'grey', | |
size = 1, linetype = 2, alpha = .75) + | |
facet_wrap(~Hospital.Name) + | |
labs(title = 'Percentage of Births Attended by Licensed Midwife by Hospital Over Time', | |
subtitle = 'Westchester County, Selected Hospitals') + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
# impact of midwives on labor outcomes | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', Measure.Name == 'Attended by Licensed Midwife') %>% | |
spread(Measure.Name, Percent) %>% | |
select(Year, Hospital.Name, `Attended by Licensed Midwife`) %>% | |
left_join( | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', Measure.Name == 'Augmented Labor') %>% | |
spread(Measure.Name, Percent) %>% | |
select(Year, Hospital.Name, `Augmented Labor`) | |
) %>% | |
mutate(coloring = ifelse(Hospital.Name == 'Phelps Memorial Hospital Assn' | | |
Hospital.Name == 'Hudson Valley Hospital Center', 'Low', 'High')) %>% | |
ggplot(., aes(x = `Attended by Licensed Midwife`, y = `Augmented Labor`, col = Hospital.Name)) + | |
geom_jitter(size = 3) + | |
scale_y_continuous(limits=c(0, 60)) + | |
scale_x_continuous(limits=c(0, 60)) + | |
theme_minimal() | |
# Vaginal versus cesarean births | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Vaginal Births' | Measure.Name == 'Cesarean Births') %>% | |
ggplot(., aes(x = Year, colour = Measure.Name)) + | |
geom_line(aes(y = Percent), size = 2) + | |
scale_color_manual(values = c('dodgerblue4', 'grey')) + | |
facet_wrap(~Hospital.Name) + | |
labs(title = 'Percentage of Vaginal versus Cesarean Births Over Time', | |
subtitle = 'Westchester County') + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() + | |
theme(legend.position = 'top', legend.title = element_blank()) | |
# function to visualize given measure over time | |
visMeasure <- function(Measure, County) { | |
hospitals %>% | |
filter(Hospital.County == County, | |
Measure.Name == Measure) %>% | |
ggplot(., aes(x = Year)) + | |
geom_line(aes(y = Percent), size = 2, color = 'deepskyblue4') + | |
facet_wrap(~Hospital.Name) + | |
scale_y_continuous(limits=c(0,100)) + | |
labs(title = paste(Measure, '(Percentage) by Hospital Over Time', sep = ' ')) + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
} | |
# demo | |
visMeasure('Fed Any Breast Milk', 'WESTCHESTER') | |
visMeasure('Mid Forceps Delivery', 'WESTCHESTER') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment