Skip to content

Instantly share code, notes, and snippets.

@ajdamico
Created November 10, 2010 20:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajdamico/671495 to your computer and use it in GitHub Desktop.
Save ajdamico/671495 to your computer and use it in GitHub Desktop.
draw a curve showing the concentration of all healthcare expenditures in the united states
#warning: excludes the institutionalized population!
#install survey and foreign packages
#-- only run these lines of code the first time
#install.packages("foreign")
#install.packages("survey")
#designate each of the MEPS consolidated files -
consolidated_files <- c( 12 , 20 , 28 , 38 , 50 , 60 , 70 , 79 , 89 , 97 , 105 , 113 )
#-- and their respective years
meps_years <- c( "96" , "97" , "98" , "99" , "00" , "01" , "02" , "03" , "04" , "05" , "06" , "07" )
####################################################
#choose your year of MEPS data to use!
#for 1996, choose m <- 1. for 2007, choose m <- 12.
m <- 12
####################################################
#create the plot
plot(c(0,100),c(0,1),axes=FALSE,col="white",xlab="Percent of Population",ylab="Percent of Spending")
#draw the axes and the title
axis(1,seq(0,100,by=10),lty=1,tck=0,cex.axis=.7,labels=seq(0,100,by=10))
axis(2,at=c(0,.25,.5,.75,1),ylim=c(0,1),labels=c("0%","25%","50%","75%","100%"),cex.axis=.7)
title("Concentration of Health Care Spending in the U.S. Population",cex.main=1,line=3)
#load survey and foreign packages
library(foreign)
library(survey)
#assign the http:// location to download the consolidated file from
meps_zipped_file <-
paste("http://www.meps.ahrq.gov/mepsweb/data_files/pufs/h",consolidated_files[m],"ssp.zip",sep="")
#assign the weight, psu, and strata, depending on the year
weight <- ifelse( meps_years[m] %in% c("96","97","98") , paste("WTDPER",meps_years[m],sep="") , paste("PERWT",meps_years[m],"F",sep="") )
psu <- ifelse( m <= 6 , paste("VARPSU",meps_years[m],sep="") , "VARPSU" )
strata <- ifelse( m <= 6 , paste("VARSTR",meps_years[m],sep="") , "VARSTR" )
#assign the total expenditure variable
totexp <- paste("TOTEXP",meps_years[m],sep="")
#create a temporary file to save the consolidated file to
meps_temp_file <- tempfile()
#download that consolidated file, save to that temporary file
download.file( meps_zipped_file , meps_temp_file )
#unzip and read the consolidated file into R
meps_unzipped_file <- read.xport( unzip( meps_temp_file ) )
#immediately create a survey object, using the complex survey sampling variables assigned above
meps_analysis_file <- svydesign(id = ~ get(psu) ,
strata = ~ get(strata) , weights = ~ get(weight) ,
data = meps_unzipped_file , nest = TRUE)
#determine the cutpoints for each of the quantiles, from 0 to 100
qtiles <- as.vector(svyquantile( ~get(totexp) , meps_analysis_file , seq(.01 , 1 , by=.01) ) )
#return to the unzipped file and assign each individual record to the correct quantile
meps_unzipped_file$QT <- 0
for (i in 1:100){
meps_unzipped_file <- transform(
meps_unzipped_file , QT = ifelse(
eval(parse(text=totexp)) > qtiles[i] , i , QT ) )
}
#re-create the survey object, this time with the quantiles (QT) variable in place
meps_analysis_file <- svydesign(id = ~ get(psu) ,
strata = ~ get(strata) , weights = ~ get(weight) ,
data = meps_unzipped_file , nest = TRUE)
#calculate the total amount spent on healthcare by non-institutionalized civilian americans in the current year
full_year_spending <- svytotal(~get(totexp) , meps_analysis_file )
#calculate the total amount spent on healthcare by each respective quantile
qtile_totals <- svyby(~get(totexp) , ~QT , meps_analysis_file , svytotal)
#divide each respective quantile total by the full year spending total to determine each quantile's share
qtile_shares <- qtile_totals[,2] / full_year_spending
#tack on zeroes at the beginning, since the first ten to fifteen quantiles are all zeroes
num_zeroes <- 100 - length(unique(meps_unzipped_file$QT))
qtile_shares <- c( rep( 0 , num_zeroes) , qtile_shares)
#add each quantile share to the subsequent quantile share, in order to cumulatively grow to 100% of national health spending
qtile_sums<-qtile_shares
for (i in 2:length(qtile_sums)){
qtile_sums[i] <- qtile_sums[i] + qtile_sums[i-1]
}
#label certain points
for (n in c( 50 , 80 , 85 , 90, 95 , 99 ) ) {
text(n , qtile_sums[n] , paste(round(qtile_sums[n]*100,1),"%",sep="") , pos=3)
}
#draw the actual concentration curve
lines(1:99 , qtile_sums[1:99])
#write out the interpretation of the midpoint
text(0,.95,paste("Interpretations:\nThe lowest half are responsible for ",
round(qtile_sums[50]*100,1),"% of all health care spending.\n",
"The highest 1% are responsible for 100% - ",
round(qtile_sums[99]*100,1),"% = ",round(100-qtile_sums[99]*100,1),
"% of all health care spending.",sep=""),pos=4,cex=.75)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment