Created
July 31, 2012 13:41
-
-
Save jwbowers/3217127 to your computer and use it in GitHub Desktop.
Helper functions for nbpMatching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## This file contains functions that I've found useful when working | |
## with the nbpMatching package. Context for the examples can be seen | |
## in Appendix E in | |
## http://jakebowers.org/PAPERS/WongRacialContext2012.pdf | |
## (forthcoming in the Journal of Politics). | |
## Make matrices of absolute distances | |
scalar.dist<-function(var){ | |
## Utility function to make n x n abs dist matrices | |
## Cribbed from older versions of optmatch | |
outer(var,var,FUN=function(x,y){ abs(x-y) }) | |
} | |
## Add rows and columns of 0s ("sinks'' or ``ghosts") to the distance | |
## matrices to enable the algorithmn to drop bad matches. | |
make.sinks<-function(nsinks,mat){ | |
## nsinks is number of observations one is willing to drop | |
## mat is a distance matrix | |
if(nsinks==0){ return(mat) } | |
##This function is trickier than I had thought. We have to turn | |
##vectors and matrices into a list of arguments for (r/c)bind | |
thelist<-as.list(rep(0,nsinks)) | |
thelist[[nsinks+1]]<-mat | |
tmp1<-do.call("cbind",thelist) | |
thelist[[nsinks+1]]<-tmp1 ## replace the matrix with the new matrix | |
newmat<-do.call("rbind",thelist) | |
newnames<-paste("sink",seq(1,nsinks),sep="") | |
dimnames(newmat)<-list(c(newnames,row.names(mat)), | |
c(newnames,row.names(mat))) | |
return(newmat) | |
} | |
## Usage example: | |
## obj.com.dist.mat3<- make.sinks(2,obj.com.dist.mat) | |
### Extract sets from nonbimatch as a factor for use in analysis | |
get.sets<-function(obj){ | |
##Function to get a factor variable out of a nonbimatch object | |
obj.sets<-factor(apply(obj$matches[,c("Group1.ID","Group2.ID")],1,function(x){paste(sort(x),collapse="-")})) | |
names(obj.sets)<-obj$matches[,"Group1.ID"] | |
obj.sets<-obj.sets[grep("sink|ghost",obj.sets,invert=TRUE),drop=TRUE] | |
return(obj.sets) | |
} | |
## Usage example: | |
## nbm.obj.com<-nonbimatch(obj.com.dist.mat.dm) | |
## nbm.obj.com.sets<-get.sets(nbm.obj.com) | |
## dat.w[names(nbm.obj.com.sets),"nbm.obj.com.sets"]<-nbm.obj.com.sets | |
### Plot distance on one var by distance on another var: Most useful | |
### if one var is the matching variable (so we should see horizontal | |
### or vertical lines) and the other var is the "treatment" (so we | |
### should see some within pair differences available to drive the | |
### analysis) | |
nbmplot<-function(thedata,xvar,yvar,strata,main=NULL,ylim=c(0,1),...){ ## Scatterplot with pairs connected by segments | |
thedata<-na.omit(thedata[,c(xvar,yvar,strata)]) | |
if(is.null(main)){main<-strata} | |
plot(thedata[,xvar],thedata[,yvar],pch=as.numeric(thedata[,strata]),ylim=ylim, | |
xlab=xvar,ylab=yvar,main=main) | |
gooddata<-thedata[thedata[,strata] %in% names(table(thedata[,strata])[table(thedata[,strata])==2]),,drop=TRUE] | |
gooddata[,strata]<-factor(gooddata[,strata]) ## trick to drop unused levels | |
thesets<-sapply(split(gooddata[,c(xvar,yvar)],gooddata[,strata]),function(dat){ unlist(dat) }) | |
segments(thesets[1,],thesets[3,],thesets[2,],thesets[4,],...) | |
} | |
## Usage example: | |
## nbmplot(dat.w,"suj.com","obj.com","nbm.obj.com.sets",main="Matching on Objective `Local Community'") | |
## Other functions of use for working with paired data: | |
pair.diff<-function (x, block,sort=TRUE) | |
{ ##I want higher x minus lower x within pairs | |
sapply(split(x, block), function(x) { | |
if(sort){ x<-sort(x) } | |
diff(x) | |
}) | |
} | |
pair.diff.xy<-function (x, y, block,sort=TRUE) | |
{ ##I want y with higher x minus y with lower x within pairs | |
sapply(split(data.frame(x,y), block), function(dat) { | |
diff(dat$x[order(dat$y)]) | |
}) | |
} | |
align.by.block<-function (x, block, fn = mean, thenames=NULL) | |
{ ## By default, this rescales each observation to be the distance from the group mean. | |
newx<-unsplit(lapply(split(x, block), function(x) { | |
x - fn(x) | |
}), block) | |
if(!is.null(names)){ names(newx)<-thenames } | |
return(newx) | |
} | |
rank.pairs<-function (x, block) | |
{ ## Identify the low and high subj in each pair | |
unsplit(lapply(split(x, block), function(x) { | |
rank(x) | |
}), block) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment