Last active
September 1, 2018 02:49
-
-
Save wesslen/ae9aca04b491a064764b13239fb17489 to your computer and use it in GitHub Desktop.
RStudio Tidyverse Docker Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Step 1: Go to http://play-with-docker and create an instance (you will need to sign up for a Docker username/pwd) | |
# Step 2: Run "docker run -e PASSWORD=<YOUR_PASS> -p 8787:8787 rocker/tidyverse". NOTE: <YOUR_PASS> equals a unique password you set. | |
# Step 3: Click 8787 link to open in browser. Copy token and press ok. (username/pwd rstudio/<YOUR_PASS>) | |
# Step 4: Download this file by running: | |
# download.file("https://gist.githubusercontent.com/wesslen/ae9aca04b491a064764b13239fb17489/raw/8c35e746585f719c62e0437ec095a23c21c44ccb/r-docker.R", destfile = "r-docker.R") | |
# call tidyverse -- if you get an error, do you have tidyverse installed?? | |
library(tidyverse) | |
# load tweets | |
# If you don't have the file, you can load it directly from GitHub | |
#file <- "https://github.com/wesslen/summer-2017-social-media-workshop/raw/master/data/CharlotteTweets20Sample.csv" | |
# Did you set your working directory? | |
file <- "CharlotteTweets20Sample.csv" | |
tweets <- read_csv(file) | |
# counts | |
tweets %>% | |
group_by(actor.location.displayName) %>% | |
summarise(Count=n()) %>% | |
arrange(desc(Count)) %>% | |
head(n=10) | |
# tidy text | |
# Make sure to install tidytext | |
# install.packages("tidytext") | |
library(tidytext) | |
tidy_tweets <- tweets %>% | |
unnest_tokens(word, body) | |
# count words | |
counts <- tidy_tweets %>% | |
count(word, sort = TRUE) | |
head(counts, n = 10) | |
# remove stop words | |
data("stop_words") | |
cleaned_tweets <- tidy_tweets %>% | |
anti_join(stop_words) %>% | |
count(word, sort = TRUE) | |
head(cleaned_tweets, n = 10) | |
# count sentiment net scores | |
bing <- get_sentiments("bing") | |
sentiment <- tidy_tweets %>% | |
inner_join(bing) %>% | |
count(day = as.Date(postedTime), sentiment) %>% | |
spread(sentiment, n, fill = 0) %>% | |
mutate(sentiment = (positive - negative) / n()) | |
# plot scores | |
ggplot(sentiment, aes(day, sentiment)) + | |
geom_point() + | |
geom_smooth(method = "loess", size = 1.5, span = 0.1) + | |
labs(title = "Sentiment Analysis", subtitle = "Net Sentiment Score per Tweet", x = "Day", y = "Sentiment") | |
# locate beer-related tweets with leaflet | |
# Make sure to install leaflet if you don't have it | |
# install.packages("leaflet") | |
library(leaflet) | |
query <- "beer" | |
t <- subset(tweets[grep(query,tweets$body, ignore.case = TRUE),], !is.na(point_long)) | |
leaflet(t) %>% | |
addTiles() %>% | |
addCircleMarkers(lng=t$point_lat, lat=t$point_long, popup = t$body, | |
stroke = FALSE, fillOpacity = 0.5, radius = 10, clusterOptions = markerClusterOptions() | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment