Skip to content

Instantly share code, notes, and snippets.

@Ironholds
Created November 28, 2014 02:18
Show Gist options
  • Save Ironholds/3829102e3fbc3a90a8c7 to your computer and use it in GitHub Desktop.
Save Ironholds/3829102e3fbc3a90a8c7 to your computer and use it in GitHub Desktop.
Benchmarks for my upcoming string anonymisation package.
library(anonymise)
library(digest)
library(microbenchmark)
#Generate some unique character strings. Say, 30,000 of them.
uniques <- character(30000)
for(i in seq_along(uniques)){
uniques[i] <- paste(sample(c(0:9,letters,LETTERS), 30), collapse = "")
}
#anonymise test, MD5
microbenchmark({
anon_test_out <- c_anonymise(uniques, algorithm = "md5")
})
# Unit: milliseconds
# min lq mean median uq max neval
# 77.73334 80.31 81.11888 80.88899 81.73885 85.85944 100
#digest test, MD5, no serialisation, for-loop
microbenchmark({
anon_test_out <- character(length(uniques))
for(i in seq_along(uniques)){
anon_test_out[i] <- digest(uniques[i], algo = "md5", serialize = FALSE)
}
})
# Unit: seconds
# min lq mean median uq max neval
# 1.397521 1.400441 1.411395 1.402358 1.411605 1.461821 100
#digest test, MD5, no serialisation, lapply
microbenchmark({
anon_test_out <- unlist(lapply(uniques, digest, algo = "md5", serialize = FALSE))
})
# Unit: seconds
# min lq mean median uq max neval
# 1.277343 1.318001 1.33729 1.336168 1.356127 1.414562 100
#anonymise test, SHA1
microbenchmark({
anon_test_out <- c_anonymise(uniques, algorithm = "sha1")
})
# Unit: milliseconds
# min lq mean median uq max neval
# 89.7401 91.42565 95.07778 93.92603 96.38657 124.4629 100
#digest test, SHA1, no serialisation, for-loop
microbenchmark({
anon_test_out <- character(length(uniques))
for(i in seq_along(uniques)){
anon_test_out[i] <- digest(uniques[i], algo = "sha1", serialize = FALSE)
}
})
# Unit: seconds
# min lq mean median uq max neval
# 1.395969 1.41004 1.421917 1.414723 1.432235 1.482069 100
#digest test, SHA1, no serialisation, lapply
microbenchmark({
anon_test_out <- unlist(lapply(uniques, digest, algo = "sha1", serialize = FALSE))
})
# Unit: seconds
# min lq mean median uq max neval
# 1.314194 1.369632 1.384138 1.382087 1.400361 1.480818 100
#anonymise test, SHA256
microbenchmark({
anon_test_out <- c_anonymise(uniques, algorithm = "sha256")
})
# Unit: milliseconds
# min lq mean median uq max neval
# 140.0948 143.5231 145.382 144.3923 145.9739 195.133 100
#digest test, SHA256, no serialisation, for-loop
microbenchmark({
anon_test_out <- character(length(uniques))
for(i in seq_along(uniques)){
anon_test_out[i] <- digest(uniques[i], algo = "sha256", serialize = FALSE)
}
})
# Unit: seconds
# min lq mean median uq max neval
# 1.431169 1.476169 1.492009 1.495912 1.50535 1.556519 100
#digest test, SHA256, no serialisation, lapply
microbenchmark({
anon_test_out <- unlist(lapply(uniques, digest, algo = "sha256", serialize = FALSE))
})
# Unit: seconds
# min lq mean median uq max neval
# 1.419841 1.45139 1.470209 1.462869 1.481413 1.728109 100
#anonymise test, SHA512
microbenchmark({
anon_test_out <- c_anonymise(uniques, algorithm = "sha512")
})
# Unit: milliseconds
# min lq mean median uq max neval
# 258.1498 259.3524 260.6519 259.7233 260.6193 305.5327 100
#digest test, SHA512, no serialisation, for-loop
microbenchmark({
anon_test_out <- character(length(uniques))
for(i in seq_along(uniques)){
anon_test_out[i] <- digest(uniques[i], algo = "sha512", serialize = FALSE)
}
})
# Unit: seconds
# min lq mean median uq max neval
# 1.307411 1.316431 1.334668 1.328401 1.342659 1.481539 100
#digest test, SHA512, no serialisation, lapply
microbenchmark({
anon_test_out <- unlist(lapply(uniques, digest, algo = "sha512", serialize = FALSE))
})
# Unit: seconds
# min lq mean median uq max neval
# 1.26656 1.302912 1.412134 1.31475 1.338408 1.993292 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment