Skip to content

Instantly share code, notes, and snippets.

@dgrapov
Created January 10, 2018 21:53
Show Gist options
  • Save dgrapov/cdae87fc973f7f1bb7108c2f261d8344 to your computer and use it in GitHub Desktop.
Save dgrapov/cdae87fc973f7f1bb7108c2f261d8344 to your computer and use it in GitHub Desktop.
fast (?) implementations of tanimoto distance calculations
#' @title fast_tanimoto
#' @param mat matrix or data frame of numeric values
#' @param output 'matrix' (default) or 'edge list' (non-redundant and undirected)
#' @param progress TRUE, show progress
#' @imports reshape2
fast_tanimoto<-function(mat,output='matrix',progress=TRUE){
mat[is.na(mat)]<-0
#scoring function
score<-function(x){sum(x==2)/sum(x>0)}
if (progress == TRUE){ pb <- txtProgressBar(min = 0, max = nrow(mat), style = 3)} else {pb<-NULL}
out<-lapply(1:nrow(mat), function(i){
tmp.mat<-sweep(mat,2,as.numeric(mat[i,]),"+")
if (progress == TRUE){setTxtProgressBar(pb, i)}
apply(tmp.mat,1,score)
})
if (progress == TRUE){close(pb)}
res<-do.call("rbind",out)
rownames(res)<-rownames(mat)
if(output=='edge list'){
res[upper.tri(res,diag=TRUE)]<-NA
na.omit(melt(res))
} else {
return(res)
}
}
#' @title fast_tanimoto_par
#' @description parallel implementation of \code{\link{fast_tanimoto}}
#' @param mat matrix or data frame of numeric values
#' @param output 'matrix' (default) or 'edge list' (non-redundant and undirected)
#' @param progress TRUE, print progress to 'log.txt'
#' @imports reshape2, foreach, doMC
fast_tanimoto_par<-function(mat,output='matrix'){
mat[is.na(mat)]<-0
#scoring function
score<-function(x){sum(x==2)/sum(x>0)}
sink(paste0(getwd(),"/log.txt"), append=TRUE)
out<-foreach(i=1:nrow(mat)) %dopar% {
sink(paste0(getwd(),"/log.txt"), append=TRUE)
cat(paste("Starting iteration",i,"\n"))
sink()
tmp.mat<-sweep(mat,2,as.numeric(mat[i,]),"+")
apply(tmp.mat,1,score)
}
res<-do.call("rbind",out) #would have to convert each element to a data frame to use faster data.table::rbindlist
rownames(res)<-rownames(mat)
if(output=='edge list'){
res[upper.tri(res,diag=TRUE)]<-NA
na.omit(melt(res))
} else {
return(res)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment