Created
May 6, 2013 23:20
-
-
Save dsparks/5529067 to your computer and use it in GitHub Desktop.
A hashing function, I think! From http://stackoverflow.com/a/14366546/479554
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(digest) | |
set.seed(1) | |
(x <- sample(1e9, size=6)) | |
# [1] 265508664 372123900 572853364 908207790 201681932 898389685 | |
## To hash R's internal representation of these numbers | |
strtoi(substr(sapply(x, digest), 28, 32), 16L) %% 1e3 | |
# [1] 552 511 233 293 607 819 | |
## Or, for a hash mapping that's comparable to other programs' md5 hash | |
## implementations | |
strtoi(substr(sapply(as.character(x), digest, serialize=FALSE),28,32),16L) %% 1e3 | |
# [1] 153 180 892 294 267 807 | |
someWords <- apply(matrix(sample(LETTERS, 50000, replace = TRUE), ncol = 10), 1, paste, collapse = "") | |
someWords <- sort(someWords) | |
table(table(someWords)) | |
(hash1 <- strtoi(substr(sapply(someWords, digest), 28, 32), 16L) %% 1e2) | |
(hash2 <- strtoi(substr(sapply(as.character(someWords), digest, serialize=FALSE),28,32),16L) %% 1e2) | |
table(table(hash1)) | |
table(table(hash2)) | |
(y <- c("wub", "wub2", "wuc", "testing", "asdfkljwefmklemf,.msp")) | |
strtoi(substr(sapply(y, digest), 28, 32), 16L) %% 1e3 | |
strtoi(substr(sapply(as.character(y), digest, serialize=FALSE),28,32),16L) %% 1e3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment