Skip to content

Instantly share code, notes, and snippets.

@jtilly
Last active January 6, 2017 20:55
Show Gist options
  • Save jtilly/ac00ef85e0d5766a63c31637d4ac79ea to your computer and use it in GitHub Desktop.
Save jtilly/ac00ef85e0d5766a63c31637d4ac79ea to your computer and use it in GitHub Desktop.
rm(list = ls())
dict.orig = unique(readLines("https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english.txt"))
txt2numeric = function(word.orig, return.orig = TRUE) {
word = tolower(word.orig)
word = gsub("([^a-z]){1}", 1, word)
word = gsub("(a|b|c){1}", 2, word)
word = gsub("(d|e|f){1}", 3, word)
word = gsub("(g|h|i){1}", 4, word)
word = gsub("(j|k|l){1}", 5, word)
word = gsub("(m|n|o){1}", 6, word)
word = gsub("(p|q|r|s){1}", 7, word)
word = gsub("(t|u|v){1}", 8, word)
word = gsub("(w|x|y|z){1}", 9, word)
if(return.orig) {
return(c(word, word.orig))
} else {
return(word)
}
}
dict.numeric = matrix(unlist(lapply(X = as.list(dict.orig), FUN = txt2numeric)), ncol = 2, byrow = TRUE)
text.numeric = txt2numeric(unlist(strsplit(x = "Hello, how is it going? Does this actually work or what?",
split = "([\\ ,;\\?]{1})",
perl = TRUE)), return.orig = FALSE)
text.numeric = text.numeric[text.numeric != ""]
result = lapply(X = as.list(text.numeric), FUN = function(x) dict.numeric[dict.numeric[, 1] == x, 2])
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment