jwbowers/bowers-nbp-functions.R

## bowers-nbp-functions.R
## This file contains functions that I've found useful when working
## with the nbpMatching package. Context for the examples can be seen
## in Appendix E in
## http://jakebowers.org/PAPERS/WongRacialContext2012.pdf
## (forthcoming in the Journal of Politics).

## Make matrices of absolute distances
scalar.dist<-function(var){
  ## Utility function to make n x n abs dist matrices
  ## Cribbed from older versions of optmatch
  outer(var,var,FUN=function(x,y){ abs(x-y) })
}


## Add rows and columns of 0s ("sinks'' or ``ghosts") to the distance
## matrices to enable the algorithmn to drop bad matches.
make.sinks<-function(nsinks,mat){
  ## nsinks is number of observations one is willing to drop
  ## mat is a distance matrix
  if(nsinks==0){ return(mat) }
  ##This function is trickier than I had thought. We have to turn
  ##vectors and matrices into a list of arguments for (r/c)bind
  thelist<-as.list(rep(0,nsinks))
  thelist[[nsinks+1]]<-mat
  tmp1<-do.call("cbind",thelist)
  thelist[[nsinks+1]]<-tmp1 ## replace the matrix with the new matrix
  newmat<-do.call("rbind",thelist)
  newnames<-paste("sink",seq(1,nsinks),sep="")
  dimnames(newmat)<-list(c(newnames,row.names(mat)),
                         c(newnames,row.names(mat)))
  return(newmat)
}

## Usage example:
## obj.com.dist.mat3<- make.sinks(2,obj.com.dist.mat)

### Extract sets from nonbimatch as a factor for use in analysis
get.sets<-function(obj){
  ##Function to get a factor variable out of a nonbimatch object
  obj.sets<-factor(apply(obj$matches[,c("Group1.ID","Group2.ID")],1,function(x){paste(sort(x),collapse="-")}))
  names(obj.sets)<-obj$matches[,"Group1.ID"]
  obj.sets<-obj.sets[grep("sink|ghost",obj.sets,invert=TRUE),drop=TRUE]
  return(obj.sets)
}

## Usage example:
## nbm.obj.com<-nonbimatch(obj.com.dist.mat.dm)
## nbm.obj.com.sets<-get.sets(nbm.obj.com)
## dat.w[names(nbm.obj.com.sets),"nbm.obj.com.sets"]<-nbm.obj.com.sets

### Plot distance on one var by distance on another var: Most useful
### if one var is the matching variable (so we should see horizontal
### or vertical lines) and the other var is the "treatment" (so we
### should see some within pair differences available to drive the
### analysis)

nbmplot<-function(thedata,xvar,yvar,strata,main=NULL,ylim=c(0,1),...){ ## Scatterplot with pairs connected by segments
  thedata<-na.omit(thedata[,c(xvar,yvar,strata)])
  if(is.null(main)){main<-strata}
  plot(thedata[,xvar],thedata[,yvar],pch=as.numeric(thedata[,strata]),ylim=ylim,
       xlab=xvar,ylab=yvar,main=main)

  gooddata<-thedata[thedata[,strata] %in% names(table(thedata[,strata])[table(thedata[,strata])==2]),,drop=TRUE]
  gooddata[,strata]<-factor(gooddata[,strata]) ## trick to drop unused levels

  thesets<-sapply(split(gooddata[,c(xvar,yvar)],gooddata[,strata]),function(dat){ unlist(dat) })
  segments(thesets[1,],thesets[3,],thesets[2,],thesets[4,],...)
}


##  Usage example:
## nbmplot(dat.w,"suj.com","obj.com","nbm.obj.com.sets",main="Matching on Objective `Local Community'")

## Other functions of use for working with paired data:

pair.diff<-function (x, block,sort=TRUE)
{ ##I want higher x minus lower x within pairs
 sapply(split(x, block), function(x) {
   if(sort){ x<-sort(x) }
   diff(x)
 })
}

pair.diff.xy<-function (x, y, block,sort=TRUE)
{ ##I want y with higher x minus y with lower x within pairs
 sapply(split(data.frame(x,y), block), function(dat) {
    diff(dat$x[order(dat$y)])
 })
}


align.by.block<-function (x, block, fn = mean, thenames=NULL)
{ ## By default, this rescales each observation to be the distance from the group mean.
  newx<-unsplit(lapply(split(x, block), function(x) {
    x - fn(x)
  }), block)
  if(!is.null(names)){ names(newx)<-thenames }
  return(newx)
}


rank.pairs<-function (x, block)
{ ## Identify the low and high subj in each pair
    unsplit(lapply(split(x, block), function(x) {
        rank(x)
    }), block)
}
	## This file contains functions that I've found useful when working
	## with the nbpMatching package. Context for the examples can be seen
	## in Appendix E in
	## http://jakebowers.org/PAPERS/WongRacialContext2012.pdf
	## (forthcoming in the Journal of Politics).

	## Make matrices of absolute distances
	scalar.dist<-function(var){
	## Utility function to make n x n abs dist matrices
	## Cribbed from older versions of optmatch
	outer(var,var,FUN=function(x,y){ abs(x-y) })
	}


	## Add rows and columns of 0s ("sinks'' or ``ghosts") to the distance
	## matrices to enable the algorithmn to drop bad matches.
	make.sinks<-function(nsinks,mat){
	## nsinks is number of observations one is willing to drop
	## mat is a distance matrix
	if(nsinks==0){ return(mat) }
	##This function is trickier than I had thought. We have to turn
	##vectors and matrices into a list of arguments for (r/c)bind
	thelist<-as.list(rep(0,nsinks))
	thelist[[nsinks+1]]<-mat
	tmp1<-do.call("cbind",thelist)
	thelist[[nsinks+1]]<-tmp1 ## replace the matrix with the new matrix
	newmat<-do.call("rbind",thelist)
	newnames<-paste("sink",seq(1,nsinks),sep="")
	dimnames(newmat)<-list(c(newnames,row.names(mat)),
	c(newnames,row.names(mat)))
	return(newmat)
	}

	## Usage example:
	## obj.com.dist.mat3<- make.sinks(2,obj.com.dist.mat)

	### Extract sets from nonbimatch as a factor for use in analysis
	get.sets<-function(obj){
	##Function to get a factor variable out of a nonbimatch object
	obj.sets<-factor(apply(obj$matches[,c("Group1.ID","Group2.ID")],1,function(x){paste(sort(x),collapse="-")}))
	names(obj.sets)<-obj$matches[,"Group1.ID"]
	obj.sets<-obj.sets[grep("sink\|ghost",obj.sets,invert=TRUE),drop=TRUE]
	return(obj.sets)
	}

	## Usage example:
	## nbm.obj.com<-nonbimatch(obj.com.dist.mat.dm)
	## nbm.obj.com.sets<-get.sets(nbm.obj.com)
	## dat.w[names(nbm.obj.com.sets),"nbm.obj.com.sets"]<-nbm.obj.com.sets

	### Plot distance on one var by distance on another var: Most useful
	### if one var is the matching variable (so we should see horizontal
	### or vertical lines) and the other var is the "treatment" (so we
	### should see some within pair differences available to drive the
	### analysis)

	nbmplot<-function(thedata,xvar,yvar,strata,main=NULL,ylim=c(0,1),...){ ## Scatterplot with pairs connected by segments
	thedata<-na.omit(thedata[,c(xvar,yvar,strata)])
	if(is.null(main)){main<-strata}
	plot(thedata[,xvar],thedata[,yvar],pch=as.numeric(thedata[,strata]),ylim=ylim,
	xlab=xvar,ylab=yvar,main=main)

	gooddata<-thedata[thedata[,strata] %in% names(table(thedata[,strata])[table(thedata[,strata])==2]),,drop=TRUE]
	gooddata[,strata]<-factor(gooddata[,strata]) ## trick to drop unused levels

	thesets<-sapply(split(gooddata[,c(xvar,yvar)],gooddata[,strata]),function(dat){ unlist(dat) })
	segments(thesets[1,],thesets[3,],thesets[2,],thesets[4,],...)
	}


	## Usage example:
	## nbmplot(dat.w,"suj.com","obj.com","nbm.obj.com.sets",main="Matching on Objective `Local Community'")

	## Other functions of use for working with paired data:

	pair.diff<-function (x, block,sort=TRUE)
	{ ##I want higher x minus lower x within pairs
	sapply(split(x, block), function(x) {
	if(sort){ x<-sort(x) }
	diff(x)
	})
	}

	pair.diff.xy<-function (x, y, block,sort=TRUE)
	{ ##I want y with higher x minus y with lower x within pairs
	sapply(split(data.frame(x,y), block), function(dat) {
	diff(dat$x[order(dat$y)])
	})
	}


	align.by.block<-function (x, block, fn = mean, thenames=NULL)
	{ ## By default, this rescales each observation to be the distance from the group mean.
	newx<-unsplit(lapply(split(x, block), function(x) {
	x - fn(x)
	}), block)
	if(!is.null(names)){ names(newx)<-thenames }
	return(newx)
	}


	rank.pairs<-function (x, block)
	{ ## Identify the low and high subj in each pair
	unsplit(lapply(split(x, block), function(x) {
	rank(x)
	}), block)
	}