Created
June 28, 2017 10:10
-
-
Save lissahyacinth/65eeffc02da7e8db5884d8889aa58582 to your computer and use it in GitHub Desktop.
Creating ngrams for characters in base R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ngram = function(x, y, n = 2) { | |
if((which(x==y) + (n-2)) >= length(y)){return('')} | |
return( | |
paste0(unlist( | |
lapply(0:(n-1), function(z){ | |
y[which(x == y) + z] | |
})), collapse = ",") | |
) | |
} | |
#Usage | |
mapply(FUN = ngram, | |
x = strsplit(character_input), | |
MoreArgs = list(y = strsplit(character_input), | |
n = MaxNGrams) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment