rer145/unnest_tokens.R

## unnest_tokens.R
library(dplyr)
library(tidytext)
library(janeaustenr)

# Using dplyr and janeaustenr, get the contents of 'Sense & Sensibility'
sns<-austen_books()
sns<-sns%>%
  filter(book=='Sense & Sensibility')

head(sns)

# tidytext has a function called unnest_tokens to split text into words
# Here we create a new dataframe with a column 'word' made up from the 'text' column in sns
words<-sns%>%
  unnest_tokens(word, text)

head(words)

# We can filter out common words (aka "stop words") using a dataframe from tidytext
words<-words%>%
  filter(!(word %in% stop_words$word))

head(words)
	library(dplyr)
	library(tidytext)
	library(janeaustenr)

	# Using dplyr and janeaustenr, get the contents of 'Sense & Sensibility'
	sns<-austen_books()
	sns<-sns%>%
	filter(book=='Sense & Sensibility')

	head(sns)

	# tidytext has a function called unnest_tokens to split text into words
	# Here we create a new dataframe with a column 'word' made up from the 'text' column in sns
	words<-sns%>%
	unnest_tokens(word, text)

	head(words)

	# We can filter out common words (aka "stop words") using a dataframe from tidytext
	words<-words%>%
	filter(!(word %in% stop_words$word))

	head(words)