Skip to content

Instantly share code, notes, and snippets.

@csiu
Last active May 13, 2017 07:25
Show Gist options
  • Save csiu/b5a9d091d9cf5a0df08c775aaf88eb56 to your computer and use it in GitHub Desktop.
Save csiu/b5a9d091d9cf5a0df08c775aaf88eb56 to your computer and use it in GitHub Desktop.
Day 77: Modularizing functionality & Better emoji displays
library(twitteR)
library(dplyr)
library(readr)
library(stringr)
library(ggplot2)
# Prereq ------------------------------------------------------------------
# For visualization of emojis, you will need to download the
# EmojiOne*.ttf file from https://github.com/eosrei/emojione-color-font
# Functions ---------------------------------------------------------------
pull_tweets <- function(search_string, ...){
set.seed(20170202)
# Pull raw tweets
tweets_raw <-
searchTwitter(search_string, ...)
# Remove retweets & convert twitteR lists to data.frames
tweets_tidy1 <-
strip_retweets(tweets_raw, strip_manual = TRUE, strip_mt = TRUE) %>%
twListToDF()
tweets_tidy2 <-
tweets_tidy1 %>%
mutate(
# Add new columns containing the hashtag & tweet url
hashtag = search_string,
url = paste0('https://twitter.com/', screenName, '/status/', id),
# Convert character vector between encodings
text = iconv(text, from='latin1', to='ASCII', sub='byte'),
# Update type
created = lubridate::ymd_hms(created, tz = "UTC")
) %>%
rename(retweets = retweetCount)
list(
raw = tweets_tidy1,
tidy = tweets_tidy2
)
}
load_dict_emoji <- function(emoji_file = "emDict.csv"){
# Download emoji_file if it does not exist
if (!file.exists(emoji_file)) {
emoji_file <- "emDict.csv"
emoji_file_url <-
paste0(
"https://raw.githubusercontent.com/today-is-a-good-day/emojis/master/",
emoji_file)
download.file(emoji_file_url, destfile = emoji_file)
}
readr::read_delim(
emoji_file,
delim = ";",
col_names = c("description", "native", "bytes", "r_encoding"),
skip = 1,
progress = FALSE
) %>%
mutate(description = tolower(description))
}
count_emoji <- function(emoticons, the_tweets){
# Helper to: Count number of times a pattern occur in string
count_pattern <- function(string, pattern){
counts <- str_count(string, pattern)
data.frame(
counts,
tweet_id = 1:length(counts))
}
# Count number of emojis for each tweet
emoji_counts <-
emoticons %>%
mutate(
counts = purrr::map(r_encoding, ~count_pattern(the_tweets, .x))
) %>%
tidyr::unnest(counts) %>%
filter(counts != 0)
# Summarize the counts per emoji
(emoji_counts_summary <-
emoji_counts %>%
group_by(description, native) %>%
summarise(count = sum(counts)) %>%
arrange(desc(count))
)
}
visualize_counts <- function(count_summary, filename = NULL, titlename = "",
use_cowplot = TRUE, font_style = "EmojiOne", ...){
if (use_cowplot) {
library(cowplot)
}
plt <-
count_summary %>%
ggplot(aes(x=reorder(description, -count),
y=count,
label=native)) +
geom_bar(stat="identity", fill="grey88") +
xlab("") +
ggtitle(titlename) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=.5))
if (require("emojifont")) {
# EmojiOne.ttf file downloaded from
# https://github.com/eosrei/emojione-color-font
paste0(font_style, ".ttf") %>%
load.emojifont()
plt + geom_text(family=font_style, size=5)
} else {
plt
}
if (!is.null(filename)) ggsave(filename, ...)
}
# Do something ------------------------------------------------------------
# Create API keys from https://apps.twitter.com
# api_key <- 'XXX'
# api_secret <- 'XXX'
# access_token <- 'XXX'
# access_token_secret <- 'XXX'
# To load true values:
source("twitter_api_key.R")
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)
emoticons <- load_dict_emoji()
search_string <- "#BCVotes2017"
## During election day
tweets <- pull_tweets(search_string,
n = 10000,
lang = 'en',
since = '2017-05-09',
until = '2017-05-10')
emoji_counts_summary <-
count_emoji(emoticons, the_tweets = tweets$tidy$text)
visualize_counts(
emoji_counts_summary %>% filter(count > 1),
filename = "~/Desktop/2017-05-09_emoji.pdf",
titlename =
paste(search_string, "during Election Day (May 9, 2017)"),
height = 7)
## After election day
tweets <- pull_tweets(search_string,
n = 10000,
lang = 'en',
since = '2017-05-10',
until = '2017-05-11')
emoji_counts_summary <-
count_emoji(emoticons, the_tweets = tweets$tidy$text)
visualize_counts(
emoji_counts_summary %>% filter(count > 1),
filename = "~/Desktop/2017-05-10_emoji.pdf",
titlename =
paste(search_string, "a day after Election Day (May 10, 2017)"),
scale = 1.5, width = 7)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment