Last active
November 18, 2016 00:21
-
-
Save valentinitnelav/95f7c755b5f8c942312fc039f5a14ca6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to get the first n words from a string | |
get_words <- function(strg, n = 3){ | |
# remove any leading and trailing whitespace | |
strg <- trimws(strg) | |
# replace all punctuation with space (except apostrophe) | |
strg <- gsub(pattern = "[^[:alnum:][:space:]']", replacement = ' ', strg, perl=TRUE) | |
# merge multiple spaces to single space | |
strg <- gsub(pattern = "(?<=[\\s])\\s*|^\\s+|\\s+$", replacement = "", strg, perl=TRUE) | |
# take first n elements when splitting by space | |
strg <- strsplit(strg, split=" ", fixed=TRUE)[[1]][1:n] | |
# ommit NA elements | |
toupper(paste0(strg[!is.na(strg)], collapse=" ")) | |
} | |
# Useful links: | |
# http://stackoverflow.com/questions/25707647/merge-multiple-spaces-to-single-space-remove-trailing-leading-spaces | |
# http://stackoverflow.com/questions/8697079/remove-all-punctuation-except-apostrophes-in-r |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment