The US government released medicare data in early 2013. This is an analysis of that data across the US. It compares the cost of treatments per hospital and state vs. total medicare coverage and shows where there is greatest discrepancy.
Created
March 19, 2014 11:54
-
-
Save ilanman/830502496e6e17b2f133 to your computer and use it in GitHub Desktop.
Analysis of healthcare data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Ilan Man | |
## 05/17/2013 | |
## U.S Medicare Provider Charge data | |
library(ggplot2) | |
setwd('D:/Users/iman/Desktop') | |
hospital<-read.csv('healthcaredata.csv') | |
hospital[,3]<-sub(" HOSPITAL",'',as.character(hospital[,3])) | |
hospital[,3]<-sub(" CENTER",'',hospital[,3]) | |
## narrow data | |
# Select 4 states in the northeast - PA, NY, NJ, MA | |
unique(hospital[,5]) | |
data<-hospital[hospital[,6]%in%c('NY'),] | |
conditions<-unique(data[,1]) | |
data<-data[data[,1]==conditions[52],] ## diabetes | |
### plot maximum payment | |
sort.payment<-data[order(data[,11]),] | |
ggplot(data=sort.payment,aes(y=sort.payment[,11], | |
x=reorder(sort.payment[,3],sort.payment[,11])))+ | |
geom_bar(colour="black", fill="#DE1887", width=.5, stat="identity")+ | |
xlab("Hospital")+ylab("Average cost")+ | |
scale_x_discrete(breaks=NULL)+ | |
ggtitle("Average cost for treating patients diagnosed with diabetes") | |
### plot maximum covered charge | |
sort.charges<-data[order(data[,10]),] | |
ggplot(data=sort.charges,aes(y=sort.charges[,10], | |
x=reorder(sort.charges[,3],sort.charges[,10])))+ | |
geom_bar(colour="black", fill="#DE1887", width=.5, stat="identity")+ | |
xlab("Hospital")+ylab("Average covered charge")+ | |
scale_x_discrete(breaks=NULL)+ | |
ggtitle("Average covered charge for treating patients diagnosed with diabetes") | |
### find maximum difference | |
diff<-data.frame("City"=data[,5],"Hospital"=data[,3], "Diff"=as.numeric(data[,10]-data[,11])) | |
diff<-diff[order(diff$Diff,decreasing=TRUE),][1:20,] ## top 20 hospitals | |
ggplot(data=diff,aes(y=Diff,x=reorder(diff$Hospital,diff$Diff)))+ | |
geom_bar(colour="black", fill="#DE1887", width=.5, stat="identity")+ | |
xlab("Top 20 Hospitals")+ylab("Average Difference")+ | |
scale_x_discrete(breaks=NULL)+ | |
ggtitle("Average difference between covered charges and total\n payments for treating patients diagnosed with diabetes") | |
library(maps) | |
hospital<-read.csv('healthcaredata.csv') | |
conditions<-unique(hospital[,1]) | |
rm(avCost) | |
data<-hospital[hospital[,1]==conditions[52],] | |
avCost<-data.frame(Payment=unlist(tapply(data[,11],factor(data[,6]),mean)), | |
CovCharge=unlist(tapply(data[,10],factor(data[,6]),mean))) | |
avCost$State<-row.names(avCost$Payment) | |
avCost$PayCat <- as.numeric(cut(avCost$Payment,8)) | |
avCost$CovCat <- as.numeric(cut(avCost$CovCharge,8)) | |
cols <- c("#800000", "#8B0000", "#A52A2A", "#B22222", | |
"#FF7F50", "#DC143C","#FF6347", "#FF0000") | |
par(mfrow=c(2,1)) | |
map("state", col=cols[avCost$PayCat],fill=TRUE) | |
title("$ amount of Total Payments") | |
map("state", col=cols[avCost$CovCat],fill=TRUE) | |
title("$ amount of Covered Charge") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment