Francisco Lima monogenea

## 1-poissonGoT.R
#!/Library/Frameworks/R.framework/Resources/Rscript
# Mon Apr 15 18:41:47 2019 ------------------------------
library(rtweet)

# Twitter API
create_token(app = "INSERT_HERE",
             consumer_key = "INSERT_HERE",
             consumer_secret = "INSERT_HERE",
             access_token = "INSERT_HERE",
             access_secret = "INSERT_HERE")

## 2-poissonGoT.R
# Google Maps API https://developers.google.com/maps/documentation/javascript/get-api-key
apiKey <- "INSERT_HERE"

## 3-poissonGoT.R
# Read GOT tweets from US
newTweets <- search_tweets(q = "game of thrones",
                    retryonratelimit = T, lang = "en",
                    geocode = lookup_coords("usa", apikey = apiKey),
                    include_rts = FALSE, n = 1e5) # 1st day 3e5, to go back ~1 week

# Specify dir
dirPath <- "~/Documents/INSERT_PATH"

# Create dir for storage

## 4-poissonGoT.R
# Wed May  8 21:22:45 2019 ------------------------------
# Use status_id to identify and exclude duplicates
library(rtweet)

# List all files
allFiles <- paste0("tweets/", list.files("tweets/"))

# Write function to merge tweets
mergeTweets <- function(recipient, donor){
      idx <- !donor$status_id %in% recipient$status_id

## 5-poissonGoT.R
# Load libraries
library(tidyverse)
library(reshape2)
library(ggplot2)
library(ggridges)
library(lubridate)
library(rtweet)
library(maps)
library(quanteda)

## 6-poissonGoT.R
# Convert UTC to EDT
allTweets %<>% dplyr::mutate(created_at = as_datetime(created_at, tz = "UTC")) %>%
      dplyr::mutate(created_at = with_tz(created_at, tzone = "America/New_York"))

# Produce lat and lng coordinates
allTweets <- lat_lng(allTweets)
# Plot
par(mar = rep(12, 4))
map("state", lwd = .25)
# plot lat and lng points onto state map

## 7-poissonGoT.R
# Tokenize words
tkn <- tokens(allTweets$text,
              remove_twitter = T,
              remove_separators = T,
              remove_symbols = T,
              remove_punct = T,
              remove_url = T,
              remove_hyphens = T,
              remove_numbers = T) %>%
      tokens_ngrams(n = 1:2)

## 8-poissonGoT.R
# Identify tweets containing any of the characters names (0/1)
popularity <- as.data.frame(lapply(gotChars, function(x){
      as.integer(sapply(tkn, function(k){any(k %in% x)}))
}))

# Write colnames
colnames(popularity) <- gotChars

# Add column with corresponding EST time
popularity$created_at <- allTweets$created_at

## 9-poissonGoT.R
# Sat Oct  5 10:06:01 2019 ------------------------------
# Bonus - rm bots, time-dependend wordclouds & sentiment analysis
rtStats <- do.call("rbind", by(allTweets, INDICES = allTweets$screen_name, function(x){
      return(data.frame(num_tweets = nrow(x),
                        mean_followers = mean(x$followers_count),
                        median_rt = median(x$retweet_count)))
}))

# Plot log10(num_tweets) vs. log10(median_rt)
with(log10(rtStats+1), plot(num_tweets, median_rt,

## 10-poissonGoT.R
# Wordcloud
# Remove potential bots w/ > 100 tweets in the dataset
bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)]
reducedTweet <- allTweets[!allTweets$screen_name %in% bots,]
reducedTweet$text <- texts(reducedTweet$text) %>%
      iconv(from = "UTF-8", to = "ASCII", sub = "") %>%
      gsub(pattern = "<[A-Z+0-9]+>", repl = " ")

# Tokenize words
tkn <- tokens(reducedTweet$text,
	#!/Library/Frameworks/R.framework/Resources/Rscript
	# Mon Apr 15 18:41:47 2019 ------------------------------
	library(rtweet)

	# Twitter API
	create_token(app = "INSERT_HERE",
	consumer_key = "INSERT_HERE",
	consumer_secret = "INSERT_HERE",
	access_token = "INSERT_HERE",
	access_secret = "INSERT_HERE")
	# Google Maps API https://developers.google.com/maps/documentation/javascript/get-api-key
	apiKey <- "INSERT_HERE"
	# Read GOT tweets from US
	newTweets <- search_tweets(q = "game of thrones",
	retryonratelimit = T, lang = "en",
	geocode = lookup_coords("usa", apikey = apiKey),
	include_rts = FALSE, n = 1e5) # 1st day 3e5, to go back ~1 week

	# Specify dir
	dirPath <- "~/Documents/INSERT_PATH"

	# Create dir for storage
	# Wed May 8 21:22:45 2019 ------------------------------
	# Use status_id to identify and exclude duplicates
	library(rtweet)

	# List all files
	allFiles <- paste0("tweets/", list.files("tweets/"))

	# Write function to merge tweets
	mergeTweets <- function(recipient, donor){
	idx <- !donor$status_id %in% recipient$status_id
	# Load libraries
	library(tidyverse)
	library(reshape2)
	library(ggplot2)
	library(ggridges)
	library(lubridate)
	library(rtweet)
	library(maps)
	library(quanteda)
	# Convert UTC to EDT
	allTweets %<>% dplyr::mutate(created_at = as_datetime(created_at, tz = "UTC")) %>%
	dplyr::mutate(created_at = with_tz(created_at, tzone = "America/New_York"))

	# Produce lat and lng coordinates
	allTweets <- lat_lng(allTweets)
	# Plot
	par(mar = rep(12, 4))
	map("state", lwd = .25)
	# plot lat and lng points onto state map
	# Tokenize words
	tkn <- tokens(allTweets$text,
	remove_twitter = T,
	remove_separators = T,
	remove_symbols = T,
	remove_punct = T,
	remove_url = T,
	remove_hyphens = T,
	remove_numbers = T) %>%
	tokens_ngrams(n = 1:2)
	# Identify tweets containing any of the characters names (0/1)
	popularity <- as.data.frame(lapply(gotChars, function(x){
	as.integer(sapply(tkn, function(k){any(k %in% x)}))
	}))

	# Write colnames
	colnames(popularity) <- gotChars

	# Add column with corresponding EST time
	popularity$created_at <- allTweets$created_at
	# Sat Oct 5 10:06:01 2019 ------------------------------
	# Bonus - rm bots, time-dependend wordclouds & sentiment analysis
	rtStats <- do.call("rbind", by(allTweets, INDICES = allTweets$screen_name, function(x){
	return(data.frame(num_tweets = nrow(x),
	mean_followers = mean(x$followers_count),
	median_rt = median(x$retweet_count)))
	}))

	# Plot log10(num_tweets) vs. log10(median_rt)
	with(log10(rtStats+1), plot(num_tweets, median_rt,
	# Wordcloud
	# Remove potential bots w/ > 100 tweets in the dataset
	bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)]
	reducedTweet <- allTweets[!allTweets$screen_name %in% bots,]
	reducedTweet$text <- texts(reducedTweet$text) %>%
	iconv(from = "UTF-8", to = "ASCII", sub = "") %>%
	gsub(pattern = "<[A-Z+0-9]+>", repl = " ")

	# Tokenize words
	tkn <- tokens(reducedTweet$text,