# data source: https://health.data.ny.gov/Health/Hospital-Maternity-Information-Beginning-2008/net3-iygw | |
library(tidyverse) | |
library(scales) | |
library(ggplot2) | |
library(plotly) | |
library(RColorBrewer) | |
# import | |
setwd("~/Documents/Python/hospitals/Data") | |
hospitals = read.csv('Hospital_Maternity_Information__Beginning_2008.csv', stringsAsFactors = F) | |
### EXPLORE ### | |
# categories | |
categories = hospitals %>% | |
filter(Hospital.County != 'Statewide') %>% | |
distinct(Category, Denominator, Measure.Name) %>% | |
select(Category, Denominator, Measure = Measure.Name) %>% arrange(Category) | |
statewide_categories = hospitals %>% | |
filter(Hospital.County == 'Statewide') %>% | |
distinct(Category, Denominator, Measure.Name) %>% | |
select(Category, Denominator, Measure = Measure.Name) %>% arrange(Category) | |
# differences | |
dplyr::setdiff(categories$Measure, statewide_categories$Measure) | |
print(paste('categories: ', nrow(categories), ', statewide categoires: ', nrow(statewide_categories), sep = '')) | |
# hospitals per county | |
hospitals %>% | |
distinct(Hospital.County, Hospital.Name) %>% | |
select(County = Hospital.County, Hospital = Hospital.Name) %>% | |
group_by(County) %>% count(sort = T) %>% | |
head(10) %>% | |
ggplot(., aes(x = reorder(County, n), y = n, text = County)) + | |
geom_col(show.legend = FALSE, fill = 'dodgerblue4') + | |
coord_flip() + | |
theme_minimal() + | |
scale_x_discrete() + | |
labs(x = '', | |
y = 'Number of Hospitals', | |
title = 'Number of Hospitals per County, Top 10') | |
### VISUALIZE ### | |
# Where are the most babies born? | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Total Births', | |
Year == 2016) %>% | |
ggplot(., aes(x = reorder(Hospital.Name, Count), y = Count)) + | |
geom_bar(stat = 'identity', col = 'dodgerblue4', fill = 'dodgerblue4') + | |
scale_y_continuous(labels = comma) + | |
coord_flip() + | |
labs(title = 'Number of Births by Hospital in Westchester County, 2016') + | |
xlab('') + ylab('Number of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Total Births') %>% | |
group_by(Hospital.Name, Year) %>% | |
summarise(Total = sum(Count)) %>% | |
ggplot(., aes(x = Year, y = Total)) + | |
geom_smooth(method = 'lm', color = 'black', size = .5, se = F, linetype = 3) + | |
geom_line(color = 'dodgerblue4', size = 1) + | |
scale_y_continuous(labels = comma) + | |
theme_minimal() + | |
facet_wrap(~Hospital.Name) + | |
labs(title = 'Number of Births by Hospital in Westchester County, 2008-16') | |
# Where are births attended by a midwife? | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Attended by Licensed Midwife', | |
Year == 2016) %>% | |
ggplot(., aes(x = reorder(Hospital.Name, Percent), y = Percent)) + | |
geom_bar(stat = 'identity', col = 'dodgerblue4', fill = 'dodgerblue4') + | |
coord_flip() + | |
labs(title = 'Percentage of Births Attended by Licensed Midwife by Hospital in 2016', | |
subtitle = 'Westchester County') + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
# Trend of midwife-assisted births over time | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Attended by Licensed Midwife') %>% | |
filter(Hospital.Name == 'Hudson Valley Hospital Center' | | |
Hospital.Name == 'Phelps Memorial Hospital Assn' | | |
Hospital.Name == 'White Plains Hospital Center') %>% | |
group_by(Hospital.Name, Year) %>% | |
ggplot(., aes(x = Year)) + | |
geom_line(aes(y = Percent), size = 2, color = 'deepskyblue4') + | |
geom_smooth(aes(y = Percent), method = 'lm', se = FALSE, color = 'grey', | |
size = 1, linetype = 2, alpha = .75) + | |
facet_wrap(~Hospital.Name) + | |
labs(title = 'Percentage of Births Attended by Licensed Midwife by Hospital Over Time', | |
subtitle = 'Westchester County, Selected Hospitals') + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
# impact of midwives on labor outcomes | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', Measure.Name == 'Attended by Licensed Midwife') %>% | |
spread(Measure.Name, Percent) %>% | |
select(Year, Hospital.Name, `Attended by Licensed Midwife`) %>% | |
left_join( | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', Measure.Name == 'Augmented Labor') %>% | |
spread(Measure.Name, Percent) %>% | |
select(Year, Hospital.Name, `Augmented Labor`) | |
) %>% | |
mutate(coloring = ifelse(Hospital.Name == 'Phelps Memorial Hospital Assn' | | |
Hospital.Name == 'Hudson Valley Hospital Center', 'Low', 'High')) %>% | |
ggplot(., aes(x = `Attended by Licensed Midwife`, y = `Augmented Labor`, col = Hospital.Name)) + | |
geom_jitter(size = 3) + | |
scale_y_continuous(limits=c(0, 60)) + | |
scale_x_continuous(limits=c(0, 60)) + | |
theme_minimal() | |
# Vaginal versus cesarean births | |
hospitals %>% | |
filter(Hospital.County == 'WESTCHESTER', | |
Measure.Name == 'Vaginal Births' | Measure.Name == 'Cesarean Births') %>% | |
ggplot(., aes(x = Year, colour = Measure.Name)) + | |
geom_line(aes(y = Percent), size = 2) + | |
scale_color_manual(values = c('dodgerblue4', 'grey')) + | |
facet_wrap(~Hospital.Name) + | |
labs(title = 'Percentage of Vaginal versus Cesarean Births Over Time', | |
subtitle = 'Westchester County') + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() + | |
theme(legend.position = 'top', legend.title = element_blank()) | |
# function to visualize given measure over time | |
visMeasure <- function(Measure, County) { | |
hospitals %>% | |
filter(Hospital.County == County, | |
Measure.Name == Measure) %>% | |
ggplot(., aes(x = Year)) + | |
geom_line(aes(y = Percent), size = 2, color = 'deepskyblue4') + | |
facet_wrap(~Hospital.Name) + | |
scale_y_continuous(limits=c(0,100)) + | |
labs(title = paste(Measure, '(Percentage) by Hospital Over Time', sep = ' ')) + | |
xlab('') + ylab('Percentage of Births') + | |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | |
theme_minimal() | |
} | |
# demo | |
visMeasure('Fed Any Breast Milk', 'WESTCHESTER') | |
visMeasure('Mid Forceps Delivery', 'WESTCHESTER') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment