# danlwarren/thin.max.R Last active May 21, 2019

thin.max.R, a function for rarefying point data in any number of dimensions
 # Function to rarefy point data in any number of dimensions. The goal here is to # take a large data set and reduce it in size in such a way as to approximately maximize the # difference between points. For instance, if you have 2000 points but suspect a lot of # spatial autocorrelation between them, you can pass in your data frame, the names (or indices) # of the lat/lon columns, and the number 200, and you get back 200 points from your original data # set that are chosen to be as different from each other as possible given a randomly chosen # starting point # Input is: # # x, a data frame containing the columns to be used to calculate distances along with whatever other data you need # cols, a vector of column names or indices to use for calculating distances # npoints, the number of rarefied points to spit out # # e.g., thin.max(my.data, c("latitude", "longitude"), 200) thin.max <- function(x, cols, npoints){ #Create empty vector for output inds <- vector(mode="numeric") #Create distance matrix this.dist <- as.matrix(dist(x[,cols], upper=TRUE)) #Draw first index at random inds <- c(inds, as.integer(runif(1, 1, length(this.dist[,1])))) #Get second index from maximally distant point from first one #Necessary because apply needs at least two columns or it'll barf #in the next bit inds <- c(inds, which.max(this.dist[,inds])) while(length(inds) < npoints){ #For each point, find its distance to the closest point that's already been selected min.dists <- apply(this.dist[,inds], 1, min) #Select the point that is furthest from everything we've already selected this.ind <- which.max(min.dists) #Get rid of ties, if they exist if(length(this.ind) > 1){ print("Breaking tie...") this.ind <- this.ind } inds <- c(inds, this.ind) } return(x[inds,]) }
