rtcastellano/WordCloud.Rmd Secret

## WordCloud.Rmd
```{r wordcloud}
library(dplyr)
library(ggplot2)
library(tm)
library(RColorBrewer)
library(wordcloud)

#Read in data
#Data taken from https://www.kaggle.com/snap/amazon-fine-food-reviews
reviews = read.csv("Reviews.csv", stringsAsFactors = F)

#############################Word clouds####################
#Function to make a word cloud. Can specify the dataframe, the filename of the resulting
# wordcloud, the colorschee, and additional words you don't want to appear (common
# english stopwords are already removed.) This code is adapted from:
#http://www.r-bloggers.com/word-cloud-in-r/
makewordcloud <- function(data, column, filename, colorscheme = "BuGn", extraRemove = NULL)
{
  data.corpus <- Corpus(DataframeSource(data.frame(data[[column]])))
  data.corpus <- tm_map(data.corpus, content_transformer(removePunctuation))
  data.corpus <- tm_map(data.corpus, content_transformer(tolower))
  data.corpus <- tm_map(data.corpus, content_transformer(function(x) removeWords(x, c(stopwords("english"),
                                                                                      extraRemove))))
  tdm <- TermDocumentMatrix(data.corpus)
  m <- as.matrix(tdm)
  v <- sort(rowSums(m),decreasing=TRUE)
  d <- data.frame(word = names(v),freq=v)
  pal <- brewer.pal(9, colorscheme)
  pal <- pal[-(1:2)]
  png(filename, width=1280,height=800)
  wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100,
            random.order=T, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
  dev.off()
}

#Positive/negative reviews
positive = filter(reviews, Score > 3)
negative = filter(reviews, Score < 3)

#Sample 35000 rows from positive and negative reviews. The wordcloud function
# cannot handle the entire postive and negative dataframes.
positivesample = positive[sample(nrow(positive), 35000),]
negativesample = negative[sample(nrow(negative), 35000),]

#Make wordclouds.
makewordcloud(data = positivesample, column = 'Text',
              filename = 'positiveSampleText.png', colorscheme = 'Greens')
makewordcloud(data = negativesample, column = 'Text',
              filename = 'negativeSampleText.png', colorscheme = 'OrRd')
```
	```{r wordcloud}
	library(dplyr)
	library(ggplot2)
	library(tm)
	library(RColorBrewer)
	library(wordcloud)

	#Read in data
	#Data taken from https://www.kaggle.com/snap/amazon-fine-food-reviews
	reviews = read.csv("Reviews.csv", stringsAsFactors = F)

	#############################Word clouds####################
	#Function to make a word cloud. Can specify the dataframe, the filename of the resulting
	# wordcloud, the colorschee, and additional words you don't want to appear (common
	# english stopwords are already removed.) This code is adapted from:
	#http://www.r-bloggers.com/word-cloud-in-r/
	makewordcloud <- function(data, column, filename, colorscheme = "BuGn", extraRemove = NULL)
	{
	data.corpus <- Corpus(DataframeSource(data.frame(data[[column]])))
	data.corpus <- tm_map(data.corpus, content_transformer(removePunctuation))
	data.corpus <- tm_map(data.corpus, content_transformer(tolower))
	data.corpus <- tm_map(data.corpus, content_transformer(function(x) removeWords(x, c(stopwords("english"),
	extraRemove))))
	tdm <- TermDocumentMatrix(data.corpus)
	m <- as.matrix(tdm)
	v <- sort(rowSums(m),decreasing=TRUE)
	d <- data.frame(word = names(v),freq=v)
	pal <- brewer.pal(9, colorscheme)
	pal <- pal[-(1:2)]
	png(filename, width=1280,height=800)
	wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100,
	random.order=T, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
	dev.off()
	}

	#Positive/negative reviews
	positive = filter(reviews, Score > 3)
	negative = filter(reviews, Score < 3)

	#Sample 35000 rows from positive and negative reviews. The wordcloud function
	# cannot handle the entire postive and negative dataframes.
	positivesample = positive[sample(nrow(positive), 35000),]
	negativesample = negative[sample(nrow(negative), 35000),]

	#Make wordclouds.
	makewordcloud(data = positivesample, column = 'Text',
	filename = 'positiveSampleText.png', colorscheme = 'Greens')
	makewordcloud(data = negativesample, column = 'Text',
	filename = 'negativeSampleText.png', colorscheme = 'OrRd')
	```