ajdamico/concentration of health expenditures.R

## concentration of health expenditures.R
#warning: excludes the institutionalized population!

#install survey and foreign packages
#-- only run these lines of code the first time

#install.packages("foreign")
#install.packages("survey")

#designate each of the MEPS consolidated files -
consolidated_files <- c( 12 , 20 , 28 , 38 , 50 , 60 , 70 , 79 , 89 , 97 , 105 , 113 )
#-- and their respective years
meps_years <- c( "96" , "97" , "98" , "99" , "00" , "01" , "02" , "03" , "04" , "05" , "06" , "07" )

####################################################
#choose your year of MEPS data to use!
#for 1996, choose m <- 1. for 2007, choose m <- 12.
m <- 12
####################################################

#create the plot
plot(c(0,100),c(0,1),axes=FALSE,col="white",xlab="Percent of Population",ylab="Percent of Spending")

#draw the axes and the title
axis(1,seq(0,100,by=10),lty=1,tck=0,cex.axis=.7,labels=seq(0,100,by=10))
axis(2,at=c(0,.25,.5,.75,1),ylim=c(0,1),labels=c("0%","25%","50%","75%","100%"),cex.axis=.7)
title("Concentration of Health Care Spending in the U.S. Population",cex.main=1,line=3)


#load survey and foreign packages
library(foreign)
library(survey)


#assign the http:// location to download the consolidated file from
meps_zipped_file <-
	paste("http://www.meps.ahrq.gov/mepsweb/data_files/pufs/h",consolidated_files[m],"ssp.zip",sep="")

#assign the weight, psu, and strata, depending on the year
weight <- ifelse( meps_years[m] %in% c("96","97","98") , paste("WTDPER",meps_years[m],sep="") , paste("PERWT",meps_years[m],"F",sep="") )
psu <- ifelse( m <= 6 , paste("VARPSU",meps_years[m],sep="") , "VARPSU" )
strata <- ifelse( m <= 6 , paste("VARSTR",meps_years[m],sep="") , "VARSTR" )

#assign the total expenditure variable
totexp <- paste("TOTEXP",meps_years[m],sep="")

#create a temporary file to save the consolidated file to
meps_temp_file <- tempfile()

#download that consolidated file, save to that temporary file
download.file( meps_zipped_file , meps_temp_file )

#unzip and read the consolidated file into R
meps_unzipped_file <- read.xport( unzip( meps_temp_file ) )

#immediately create a survey object, using the complex survey sampling variables assigned above
meps_analysis_file <- svydesign(id = ~ get(psu) ,
 strata = ~ get(strata) , weights = ~ get(weight) ,
 data = meps_unzipped_file , nest = TRUE)

#determine the cutpoints for each of the quantiles, from 0 to 100
qtiles <- as.vector(svyquantile( ~get(totexp) , meps_analysis_file , seq(.01 , 1 , by=.01) ) )

#return to the unzipped file and assign each individual record to the correct quantile
meps_unzipped_file$QT <- 0

	for (i in 1:100){
	meps_unzipped_file <- transform(
					meps_unzipped_file , QT = ifelse(
						eval(parse(text=totexp)) > qtiles[i] , i , QT ) )
	}

#re-create the survey object, this time with the quantiles (QT) variable in place
meps_analysis_file <- svydesign(id = ~ get(psu) ,
 strata = ~ get(strata) , weights = ~ get(weight) ,
 data = meps_unzipped_file , nest = TRUE)

#calculate the total amount spent on healthcare by non-institutionalized civilian americans in the current year
full_year_spending <- svytotal(~get(totexp) , meps_analysis_file )

#calculate the total amount spent on healthcare by each respective quantile
qtile_totals <- svyby(~get(totexp) , ~QT , meps_analysis_file , svytotal)

#divide each respective quantile total by the full year spending total to determine each quantile's share
qtile_shares <- qtile_totals[,2] / full_year_spending

#tack on zeroes at the beginning, since the first ten to fifteen quantiles are all zeroes
num_zeroes <- 100 - length(unique(meps_unzipped_file$QT))
qtile_shares <- c( rep( 0 , num_zeroes) , qtile_shares)

#add each quantile share to the subsequent quantile share, in order to cumulatively grow to 100% of national health spending
qtile_sums<-qtile_shares
	for (i in 2:length(qtile_sums)){
	qtile_sums[i] <- qtile_sums[i] + qtile_sums[i-1]
	}


#label certain points
for (n in c( 50 , 80 , 85 , 90, 95 , 99 ) ) {
text(n , qtile_sums[n] , paste(round(qtile_sums[n]*100,1),"%",sep="") , pos=3)
}

#draw the actual concentration curve
lines(1:99 , qtile_sums[1:99])

#write out the interpretation of the midpoint
text(0,.95,paste("Interpretations:\nThe lowest half are responsible for ",
					round(qtile_sums[50]*100,1),"% of all health care spending.\n",
					"The highest 1% are responsible for 100% - ",
					round(qtile_sums[99]*100,1),"% = ",round(100-qtile_sums[99]*100,1),
					"% of all health care spending.",sep=""),pos=4,cex=.75)
	#warning: excludes the institutionalized population!

	#install survey and foreign packages
	#-- only run these lines of code the first time

	#install.packages("foreign")
	#install.packages("survey")

	#designate each of the MEPS consolidated files -
	consolidated_files <- c( 12 , 20 , 28 , 38 , 50 , 60 , 70 , 79 , 89 , 97 , 105 , 113 )
	#-- and their respective years
	meps_years <- c( "96" , "97" , "98" , "99" , "00" , "01" , "02" , "03" , "04" , "05" , "06" , "07" )

	####################################################
	#choose your year of MEPS data to use!
	#for 1996, choose m <- 1. for 2007, choose m <- 12.
	m <- 12
	####################################################

	#create the plot
	plot(c(0,100),c(0,1),axes=FALSE,col="white",xlab="Percent of Population",ylab="Percent of Spending")

	#draw the axes and the title
	axis(1,seq(0,100,by=10),lty=1,tck=0,cex.axis=.7,labels=seq(0,100,by=10))
	axis(2,at=c(0,.25,.5,.75,1),ylim=c(0,1),labels=c("0%","25%","50%","75%","100%"),cex.axis=.7)
	title("Concentration of Health Care Spending in the U.S. Population",cex.main=1,line=3)


	#load survey and foreign packages
	library(foreign)
	library(survey)


	#assign the http:// location to download the consolidated file from
	meps_zipped_file <-
	paste("http://www.meps.ahrq.gov/mepsweb/data_files/pufs/h",consolidated_files[m],"ssp.zip",sep="")

	#assign the weight, psu, and strata, depending on the year
	weight <- ifelse( meps_years[m] %in% c("96","97","98") , paste("WTDPER",meps_years[m],sep="") , paste("PERWT",meps_years[m],"F",sep="") )
	psu <- ifelse( m <= 6 , paste("VARPSU",meps_years[m],sep="") , "VARPSU" )
	strata <- ifelse( m <= 6 , paste("VARSTR",meps_years[m],sep="") , "VARSTR" )

	#assign the total expenditure variable
	totexp <- paste("TOTEXP",meps_years[m],sep="")

	#create a temporary file to save the consolidated file to
	meps_temp_file <- tempfile()

	#download that consolidated file, save to that temporary file
	download.file( meps_zipped_file , meps_temp_file )

	#unzip and read the consolidated file into R
	meps_unzipped_file <- read.xport( unzip( meps_temp_file ) )

	#immediately create a survey object, using the complex survey sampling variables assigned above
	meps_analysis_file <- svydesign(id = ~ get(psu) ,
	strata = ~ get(strata) , weights = ~ get(weight) ,
	data = meps_unzipped_file , nest = TRUE)

	#determine the cutpoints for each of the quantiles, from 0 to 100
	qtiles <- as.vector(svyquantile( ~get(totexp) , meps_analysis_file , seq(.01 , 1 , by=.01) ) )

	#return to the unzipped file and assign each individual record to the correct quantile
	meps_unzipped_file$QT <- 0

	for (i in 1:100){
	meps_unzipped_file <- transform(
	meps_unzipped_file , QT = ifelse(
	eval(parse(text=totexp)) > qtiles[i] , i , QT ) )
	}

	#re-create the survey object, this time with the quantiles (QT) variable in place
	meps_analysis_file <- svydesign(id = ~ get(psu) ,
	strata = ~ get(strata) , weights = ~ get(weight) ,
	data = meps_unzipped_file , nest = TRUE)

	#calculate the total amount spent on healthcare by non-institutionalized civilian americans in the current year
	full_year_spending <- svytotal(~get(totexp) , meps_analysis_file )

	#calculate the total amount spent on healthcare by each respective quantile
	qtile_totals <- svyby(~get(totexp) , ~QT , meps_analysis_file , svytotal)

	#divide each respective quantile total by the full year spending total to determine each quantile's share
	qtile_shares <- qtile_totals[,2] / full_year_spending

	#tack on zeroes at the beginning, since the first ten to fifteen quantiles are all zeroes
	num_zeroes <- 100 - length(unique(meps_unzipped_file$QT))
	qtile_shares <- c( rep( 0 , num_zeroes) , qtile_shares)

	#add each quantile share to the subsequent quantile share, in order to cumulatively grow to 100% of national health spending
	qtile_sums<-qtile_shares
	for (i in 2:length(qtile_sums)){
	qtile_sums[i] <- qtile_sums[i] + qtile_sums[i-1]
	}



	#label certain points
	for (n in c( 50 , 80 , 85 , 90, 95 , 99 ) ) {
	text(n , qtile_sums[n] , paste(round(qtile_sums[n]*100,1),"%",sep="") , pos=3)
	}

	#draw the actual concentration curve
	lines(1:99 , qtile_sums[1:99])

	#write out the interpretation of the midpoint
	text(0,.95,paste("Interpretations:\nThe lowest half are responsible for ",
	round(qtile_sums[50]*100,1),"% of all health care spending.\n",
	"The highest 1% are responsible for 100% - ",
	round(qtile_sums[99]100,1),"% = ",round(100-qtile_sums[99]100,1),
	"% of all health care spending.",sep=""),pos=4,cex=.75)