Created March 10, 2019 21:33
# data source:
# import
hospitals = read.csv('Hospital_Maternity_Information__Beginning_2008.csv', stringsAsFactors = F)
### EXPLORE ###
# categories
categories = hospitals %>%
filter(Hospital.County != 'Statewide') %>%
distinct(Category, Denominator, Measure.Name) %>%
select(Category, Denominator, Measure = Measure.Name) %>% arrange(Category)
statewide_categories = hospitals %>%
filter(Hospital.County == 'Statewide') %>%
distinct(Category, Denominator, Measure.Name) %>%
select(Category, Denominator, Measure = Measure.Name) %>% arrange(Category)
# differences
dplyr::setdiff(categories$Measure, statewide_categories$Measure)
print(paste('categories: ', nrow(categories), ', statewide categoires: ', nrow(statewide_categories), sep = ''))
# hospitals per county
hospitals %>%
distinct(Hospital.County, Hospital.Name) %>%
select(County = Hospital.County, Hospital = Hospital.Name) %>%
group_by(County) %>% count(sort = T) %>%
head(10) %>%
ggplot(., aes(x = reorder(County, n), y = n, text = County)) +
geom_col(show.legend = FALSE, fill = 'dodgerblue4') +
coord_flip() +
theme_minimal() +
scale_x_discrete() +
labs(x = '',
y = 'Number of Hospitals',
title = 'Number of Hospitals per County, Top 10')
# Where are the most babies born?
hospitals %>%
filter(Hospital.County == 'WESTCHESTER',
Measure.Name == 'Total Births',
Year == 2016) %>%
ggplot(., aes(x = reorder(Hospital.Name, Count), y = Count)) +
geom_bar(stat = 'identity', col = 'dodgerblue4', fill = 'dodgerblue4') +
scale_y_continuous(labels = comma) +
coord_flip() +
labs(title = 'Number of Births by Hospital in Westchester County, 2016') +
xlab('') + ylab('Number of Births') +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) +
hospitals %>%
filter(Hospital.County == 'WESTCHESTER',
Measure.Name == 'Total Births') %>%
group_by(Hospital.Name, Year) %>%
summarise(Total = sum(Count)) %>%
ggplot(., aes(x = Year, y = Total)) +
geom_smooth(method = 'lm', color = 'black', size = .5, se = F, linetype = 3) +
geom_line(color = 'dodgerblue4', size = 1) +
scale_y_continuous(labels = comma) +
theme_minimal() +
facet_wrap(~Hospital.Name) +
labs(title = 'Number of Births by Hospital in Westchester County, 2008-16')
# Where are births attended by a midwife?
hospitals %>%
filter(Hospital.County == 'WESTCHESTER',
Measure.Name == 'Attended by Licensed Midwife',
Year == 2016) %>%
ggplot(., aes(x = reorder(Hospital.Name, Percent), y = Percent)) +
geom_bar(stat = 'identity', col = 'dodgerblue4', fill = 'dodgerblue4') +
coord_flip() +
labs(title = 'Percentage of Births Attended by Licensed Midwife by Hospital in 2016',
subtitle = 'Westchester County') +
xlab('') + ylab('Percentage of Births') +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) +
# Trend of midwife-assisted births over time
hospitals %>%
filter(Hospital.County == 'WESTCHESTER',
Measure.Name == 'Attended by Licensed Midwife') %>%
filter(Hospital.Name == 'Hudson Valley Hospital Center' |
Hospital.Name == 'Phelps Memorial Hospital Assn' |
Hospital.Name == 'White Plains Hospital Center') %>%
group_by(Hospital.Name, Year) %>%
ggplot(., aes(x = Year)) +
geom_line(aes(y = Percent), size = 2, color = 'deepskyblue4') +
geom_smooth(aes(y = Percent), method = 'lm', se = FALSE, color = 'grey',
size = 1, linetype = 2, alpha = .75) +
facet_wrap(~Hospital.Name) +
labs(title = 'Percentage of Births Attended by Licensed Midwife by Hospital Over Time',
subtitle = 'Westchester County, Selected Hospitals') +
xlab('') + ylab('Percentage of Births') +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) +
# impact of midwives on labor outcomes
hospitals %>%
filter(Hospital.County == 'WESTCHESTER', Measure.Name == 'Attended by Licensed Midwife') %>%
spread(Measure.Name, Percent) %>%
select(Year, Hospital.Name, `Attended by Licensed Midwife`) %>%
hospitals %>%
filter(Hospital.County == 'WESTCHESTER', Measure.Name == 'Augmented Labor') %>%
spread(Measure.Name, Percent) %>%
select(Year, Hospital.Name, `Augmented Labor`)
) %>%
mutate(coloring = ifelse(Hospital.Name == 'Phelps Memorial Hospital Assn' |
Hospital.Name == 'Hudson Valley Hospital Center', 'Low', 'High')) %>%
ggplot(., aes(x = `Attended by Licensed Midwife`, y = `Augmented Labor`, col = Hospital.Name)) +
geom_jitter(size = 3) +
scale_y_continuous(limits=c(0, 60)) +
scale_x_continuous(limits=c(0, 60)) +
# Vaginal versus cesarean births
hospitals %>%
filter(Hospital.County == 'WESTCHESTER',
Measure.Name == 'Vaginal Births' | Measure.Name == 'Cesarean Births') %>%
ggplot(., aes(x = Year, colour = Measure.Name)) +
geom_line(aes(y = Percent), size = 2) +
scale_color_manual(values = c('dodgerblue4', 'grey')) +
facet_wrap(~Hospital.Name) +
labs(title = 'Percentage of Vaginal versus Cesarean Births Over Time',
subtitle = 'Westchester County') +
xlab('') + ylab('Percentage of Births') +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) +
theme_minimal() +
theme(legend.position = 'top', legend.title = element_blank())
# function to visualize given measure over time
visMeasure <- function(Measure, County) {
hospitals %>%
filter(Hospital.County == County,
Measure.Name == Measure) %>%
ggplot(., aes(x = Year)) +
geom_line(aes(y = Percent), size = 2, color = 'deepskyblue4') +
facet_wrap(~Hospital.Name) +
scale_y_continuous(limits=c(0,100)) +
labs(title = paste(Measure, '(Percentage) by Hospital Over Time', sep = ' ')) +
xlab('') + ylab('Percentage of Births') +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) +
# demo
visMeasure('Fed Any Breast Milk', 'WESTCHESTER')
visMeasure('Mid Forceps Delivery', 'WESTCHESTER')
