WilsonMongwe/wordcloud.R

## wordcloud.R
texts_from_tweets=cleanTweets(texts_from_tweets)
tweets_corpus = Corpus(VectorSource(texts_from_tweets))
tweets_corpus_cleaned<- tm_map(tweets_corpus, function(x)removeWords(x,stopwords()))

# create document term matrix applying some transformations
add_more_stopwords=c("reports","report","opinion","column","can","will","still","read","wants","says","national","world","sport","life","video",
                     "lunchbox","comment","must","miss","new","editorial","popular", "cartoon","international","national","politics","companies",
                     "business","day","top","week","markets","economy","subscribe" , "case","missed","ahead","editor","premium","tomorrow","stories",
                     "click","keep","needs","interview","moneyweb","year","soapbox","news","expo","register","today","need","now","podcast","lineup",
                     "bafana","line-up","reader","question","wednesday","conversation","money","company","writes","how",
                     stopwords("english"))

tweets_corpus_cleaned<- tm_map(tweets_corpus, function(x)removeWords(x,add_more_stopwords))

term_document_matrix = TermDocumentMatrix(tweets_corpus_cleaned,control = list(removePunctuation = TRUE,stopwords=add_more_stopwords,removeNumbers = TRUE, tolower = TRUE))
# define tdm as matrix
term_document_matrix = as.matrix(term_document_matrix)
# get word counts in decreasing order
word_freqs = sort(rowSums(term_document_matrix), decreasing=TRUE)
# create a data frame with words and their frequencies
data_frame = data.frame(word=names(word_freqs), freq=word_freqs)
# plot wordcloud
wordcloud(data_frame$word, data_frame$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))
	texts_from_tweets=cleanTweets(texts_from_tweets)
	tweets_corpus = Corpus(VectorSource(texts_from_tweets))
	tweets_corpus_cleaned<- tm_map(tweets_corpus, function(x)removeWords(x,stopwords()))

	# create document term matrix applying some transformations
	add_more_stopwords=c("reports","report","opinion","column","can","will","still","read","wants","says","national","world","sport","life","video",
	"lunchbox","comment","must","miss","new","editorial","popular", "cartoon","international","national","politics","companies",
	"business","day","top","week","markets","economy","subscribe" , "case","missed","ahead","editor","premium","tomorrow","stories",
	"click","keep","needs","interview","moneyweb","year","soapbox","news","expo","register","today","need","now","podcast","lineup",
	"bafana","line-up","reader","question","wednesday","conversation","money","company","writes","how",
	stopwords("english"))

	tweets_corpus_cleaned<- tm_map(tweets_corpus, function(x)removeWords(x,add_more_stopwords))

	term_document_matrix = TermDocumentMatrix(tweets_corpus_cleaned,control = list(removePunctuation = TRUE,stopwords=add_more_stopwords,removeNumbers = TRUE, tolower = TRUE))
	# define tdm as matrix
	term_document_matrix = as.matrix(term_document_matrix)
	# get word counts in decreasing order
	word_freqs = sort(rowSums(term_document_matrix), decreasing=TRUE)
	# create a data frame with words and their frequencies
	data_frame = data.frame(word=names(word_freqs), freq=word_freqs)
	# plot wordcloud
	wordcloud(data_frame$word, data_frame$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))