Skip to content

Instantly share code, notes, and snippets.

@ilanman
Created March 19, 2014 11:54
Show Gist options
  • Save ilanman/830502496e6e17b2f133 to your computer and use it in GitHub Desktop.
Save ilanman/830502496e6e17b2f133 to your computer and use it in GitHub Desktop.
Analysis of healthcare data

The US government released medicare data in early 2013. This is an analysis of that data across the US. It compares the cost of treatments per hospital and state vs. total medicare coverage and shows where there is greatest discrepancy.

## Ilan Man
## 05/17/2013
## U.S Medicare Provider Charge data
library(ggplot2)
setwd('D:/Users/iman/Desktop')
hospital<-read.csv('healthcaredata.csv')
hospital[,3]<-sub(" HOSPITAL",'',as.character(hospital[,3]))
hospital[,3]<-sub(" CENTER",'',hospital[,3])
## narrow data
# Select 4 states in the northeast - PA, NY, NJ, MA
unique(hospital[,5])
data<-hospital[hospital[,6]%in%c('NY'),]
conditions<-unique(data[,1])
data<-data[data[,1]==conditions[52],] ## diabetes
### plot maximum payment
sort.payment<-data[order(data[,11]),]
ggplot(data=sort.payment,aes(y=sort.payment[,11],
x=reorder(sort.payment[,3],sort.payment[,11])))+
geom_bar(colour="black", fill="#DE1887", width=.5, stat="identity")+
xlab("Hospital")+ylab("Average cost")+
scale_x_discrete(breaks=NULL)+
ggtitle("Average cost for treating patients diagnosed with diabetes")
### plot maximum covered charge
sort.charges<-data[order(data[,10]),]
ggplot(data=sort.charges,aes(y=sort.charges[,10],
x=reorder(sort.charges[,3],sort.charges[,10])))+
geom_bar(colour="black", fill="#DE1887", width=.5, stat="identity")+
xlab("Hospital")+ylab("Average covered charge")+
scale_x_discrete(breaks=NULL)+
ggtitle("Average covered charge for treating patients diagnosed with diabetes")
### find maximum difference
diff<-data.frame("City"=data[,5],"Hospital"=data[,3], "Diff"=as.numeric(data[,10]-data[,11]))
diff<-diff[order(diff$Diff,decreasing=TRUE),][1:20,] ## top 20 hospitals
ggplot(data=diff,aes(y=Diff,x=reorder(diff$Hospital,diff$Diff)))+
geom_bar(colour="black", fill="#DE1887", width=.5, stat="identity")+
xlab("Top 20 Hospitals")+ylab("Average Difference")+
scale_x_discrete(breaks=NULL)+
ggtitle("Average difference between covered charges and total\n payments for treating patients diagnosed with diabetes")
library(maps)
hospital<-read.csv('healthcaredata.csv')
conditions<-unique(hospital[,1])
rm(avCost)
data<-hospital[hospital[,1]==conditions[52],]
avCost<-data.frame(Payment=unlist(tapply(data[,11],factor(data[,6]),mean)),
CovCharge=unlist(tapply(data[,10],factor(data[,6]),mean)))
avCost$State<-row.names(avCost$Payment)
avCost$PayCat <- as.numeric(cut(avCost$Payment,8))
avCost$CovCat <- as.numeric(cut(avCost$CovCharge,8))
cols <- c("#800000", "#8B0000", "#A52A2A", "#B22222",
"#FF7F50", "#DC143C","#FF6347", "#FF0000")
par(mfrow=c(2,1))
map("state", col=cols[avCost$PayCat],fill=TRUE)
title("$ amount of Total Payments")
map("state", col=cols[avCost$CovCat],fill=TRUE)
title("$ amount of Covered Charge")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment