#Clean the tweets by removing silly symblos/characters | |
cleanTweets <- function(tweets) | |
{ | |
tweets = gsub("@", "", tweets) | |
tweets = gsub("@\\w+", " ", tweets) | |
tweets = gsub("https", "", tweets) | |
tweets <- gsub("[ |\t]{2,}", " ", tweets) | |
tweets <- gsub("[ |\t]{2,}", " ", tweets) | |
tweets <- gsub("amp", " ", tweets) | |
tweets <- gsub("^ ", "", tweets) | |
tweets <- gsub(" $", "", tweets) | |
tweets <- gsub(" +", " ", tweets) | |
tweets <- unique(tweets) | |
return(tweets) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment