Instantly share code, notes, and snippets.

@WilsonMongwe WilsonMongwe/wordcloud.R
Last active Oct 8, 2017

Embed
What would you like to do?
texts_from_tweets=cleanTweets(texts_from_tweets)
tweets_corpus = Corpus(VectorSource(texts_from_tweets))
tweets_corpus_cleaned<- tm_map(tweets_corpus, function(x)removeWords(x,stopwords()))
# create document term matrix applying some transformations
add_more_stopwords=c("reports","report","opinion","column","can","will","still","read","wants","says","national","world","sport","life","video",
"lunchbox","comment","must","miss","new","editorial","popular", "cartoon","international","national","politics","companies",
"business","day","top","week","markets","economy","subscribe" , "case","missed","ahead","editor","premium","tomorrow","stories",
"click","keep","needs","interview","moneyweb","year","soapbox","news","expo","register","today","need","now","podcast","lineup",
"bafana","line-up","reader","question","wednesday","conversation","money","company","writes","how",
stopwords("english"))
tweets_corpus_cleaned<- tm_map(tweets_corpus, function(x)removeWords(x,add_more_stopwords))
term_document_matrix = TermDocumentMatrix(tweets_corpus_cleaned,control = list(removePunctuation = TRUE,stopwords=add_more_stopwords,removeNumbers = TRUE, tolower = TRUE))
# define tdm as matrix
term_document_matrix = as.matrix(term_document_matrix)
# get word counts in decreasing order
word_freqs = sort(rowSums(term_document_matrix), decreasing=TRUE)
# create a data frame with words and their frequencies
data_frame = data.frame(word=names(word_freqs), freq=word_freqs)
# plot wordcloud
wordcloud(data_frame$word, data_frame$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment