wesslen/r-twitter-example.R

## r-twitter-example.R
# Step 1: Go to http://play-with-docker and create an instance (you will need to sign up for a Docker username/pwd)
# Step 2: Run "docker run -e PASSWORD=<YOUR_PASS> -p 8787:8787 rocker/tidyverse". NOTE: <YOUR_PASS> equals a unique password you set.
# Step 3: Click 8787 link to open in browser. Copy token and press ok. (username/pwd rstudio/<YOUR_PASS>)
# Step 4: Download this file by running:
# download.file("https://gist.githubusercontent.com/wesslen/ae9aca04b491a064764b13239fb17489/raw/8c35e746585f719c62e0437ec095a23c21c44ccb/r-docker.R", destfile = "r-docker.R")

# call tidyverse -- if you get an error, do you have tidyverse installed??
library(tidyverse)

# load tweets

# If you don't have the file, you can load it directly from GitHub
#file <- "https://github.com/wesslen/summer-2017-social-media-workshop/raw/master/data/CharlotteTweets20Sample.csv"

# Did you set your working directory?
file <- "CharlotteTweets20Sample.csv"
tweets <- read_csv(file)

# counts

tweets %>%
  group_by(actor.location.displayName) %>%
  summarise(Count=n()) %>%
  arrange(desc(Count)) %>%
  head(n=10)

# tidy text

# Make sure to install tidytext
# install.packages("tidytext")

library(tidytext)

tidy_tweets <- tweets %>%
  unnest_tokens(word, body)

# count words

counts <- tidy_tweets %>%
  count(word, sort = TRUE)

head(counts, n = 10)

# remove stop words

data("stop_words")
cleaned_tweets <- tidy_tweets %>%
  anti_join(stop_words) %>%
  count(word, sort = TRUE)

head(cleaned_tweets, n = 10)

# count sentiment net scores

bing <- get_sentiments("bing")

sentiment <- tidy_tweets %>%
  inner_join(bing) %>%
  count(day = as.Date(postedTime), sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = (positive - negative) / n())

# plot scores

ggplot(sentiment, aes(day, sentiment)) +
  geom_point() +
  geom_smooth(method = "loess", size = 1.5, span = 0.1) +
  labs(title = "Sentiment Analysis", subtitle = "Net Sentiment Score per Tweet", x = "Day", y = "Sentiment")

# locate beer-related tweets with leaflet

# Make sure to install leaflet if you don't have it
# install.packages("leaflet")
library(leaflet)

query <- "beer"

t <- subset(tweets[grep(query,tweets$body, ignore.case = TRUE),], !is.na(point_long))

leaflet(t) %>%
  addTiles() %>%
  addCircleMarkers(lng=t$point_lat, lat=t$point_long,   popup = t$body,
                   stroke = FALSE, fillOpacity = 0.5, radius = 10, clusterOptions = markerClusterOptions()
  )
	# Step 1: Go to http://play-with-docker and create an instance (you will need to sign up for a Docker username/pwd)
	# Step 2: Run "docker run -e PASSWORD=<YOUR_PASS> -p 8787:8787 rocker/tidyverse". NOTE: <YOUR_PASS> equals a unique password you set.
	# Step 3: Click 8787 link to open in browser. Copy token and press ok. (username/pwd rstudio/<YOUR_PASS>)
	# Step 4: Download this file by running:
	# download.file("https://gist.githubusercontent.com/wesslen/ae9aca04b491a064764b13239fb17489/raw/8c35e746585f719c62e0437ec095a23c21c44ccb/r-docker.R", destfile = "r-docker.R")

	# call tidyverse -- if you get an error, do you have tidyverse installed??
	library(tidyverse)

	# load tweets

	# If you don't have the file, you can load it directly from GitHub
	#file <- "https://github.com/wesslen/summer-2017-social-media-workshop/raw/master/data/CharlotteTweets20Sample.csv"

	# Did you set your working directory?
	file <- "CharlotteTweets20Sample.csv"
	tweets <- read_csv(file)

	# counts

	tweets %>%
	group_by(actor.location.displayName) %>%
	summarise(Count=n()) %>%
	arrange(desc(Count)) %>%
	head(n=10)

	# tidy text

	# Make sure to install tidytext
	# install.packages("tidytext")

	library(tidytext)

	tidy_tweets <- tweets %>%
	unnest_tokens(word, body)

	# count words

	counts <- tidy_tweets %>%
	count(word, sort = TRUE)

	head(counts, n = 10)

	# remove stop words

	data("stop_words")
	cleaned_tweets <- tidy_tweets %>%
	anti_join(stop_words) %>%
	count(word, sort = TRUE)

	head(cleaned_tweets, n = 10)

	# count sentiment net scores

	bing <- get_sentiments("bing")

	sentiment <- tidy_tweets %>%
	inner_join(bing) %>%
	count(day = as.Date(postedTime), sentiment) %>%
	spread(sentiment, n, fill = 0) %>%
	mutate(sentiment = (positive - negative) / n())

	# plot scores

	ggplot(sentiment, aes(day, sentiment)) +
	geom_point() +
	geom_smooth(method = "loess", size = 1.5, span = 0.1) +
	labs(title = "Sentiment Analysis", subtitle = "Net Sentiment Score per Tweet", x = "Day", y = "Sentiment")

	# locate beer-related tweets with leaflet

	# Make sure to install leaflet if you don't have it
	# install.packages("leaflet")
	library(leaflet)

	query <- "beer"

	t <- subset(tweets[grep(query,tweets$body, ignore.case = TRUE),], !is.na(point_long))

	leaflet(t) %>%
	addTiles() %>%
	addCircleMarkers(lng=t$point_lat, lat=t$point_long, popup = t$body,
	stroke = FALSE, fillOpacity = 0.5, radius = 10, clusterOptions = markerClusterOptions()
	)