Skip to content

Instantly share code, notes, and snippets.

@Tensibai
Created December 31, 2015 09:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Tensibai/b06c81ece98c34c6099a to your computer and use it in GitHub Desktop.
Save Tensibai/b06c81ece98c34c6099a to your computer and use it in GitHub Desktop.
The code used for benchmarking
library(stringr)
library(microbenchmark)
str<-c("3412123401234")
replength<-function(pat,s) {
length( gregexpr(pat,s)[[1]] )
}
repeats<-function(s) {
r<-sapply(
unique(
regmatches( s, gregexpr("(\\d{2,})(?=.*\\1)", s, perl=T) )[[1]]
),
replength,s=s )
pat<-names(r[r==max(r)][1])
data.frame("Pattern"=pat,"Repeats"=unname(r[r==max(r)][1]),"Pat.length"=nchar(pat),stringsAsFactors=FALSE)
}
tensibai<-function(str) {
repeats(str)
}
make.matrix <- function(x,y) {
vl <- nchar(d)
OUT <- seq(x,nchar(y),1)
if(length(OUT) < vl) OUT <- c(OUT,rep(NA,vl-length(OUT)))
OUT
}
jimbou <- function(d) {
mm <- sapply(1:nchar(d), make.matrix, d)
mm2 <- sapply(2:nrow(mm), function(x,y) str_sub(string= y, start= mm[1,], end= mm[x,]), d)
counts <- sort(table(mm2),decreasing = T)
res <- data.frame("nlength"=sapply(names(counts),nchar),"counts"=counts)
res[res$counts>1,]
}
original <- function(string){
out = c()
check.till = floor(nchar(string)/2)
for (start in 1:nchar(string)){
for (end in 1:check.till){
pat = substr(string,start,start+end)
repeats = length(gregexpr(pat,string)[[1]])
length = nchar(pat)
if (length >= 2) out = rbind(out, c(pat, repeats, length))
}
}
out = as.data.frame(out, stringsAsFactors = FALSE)
colnames(out) = c("Pattern","Repeats","Pat.Length")
out$Repeats = as.integer(out$Repeats)
out$Pat.Length = as.integer(out$Pat.Length)
out = out[out$Repeats > 1,]
out = out[order(out[,2],out[,3],decreasing = T),]
out = out[1,]
out[is.na(out)] = 0
return(out)
}
microbenchmark(original(str),jimbou(str),tensibai(str),times=3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment