Last active
August 29, 2015 14:08
-
-
Save simecek/ba8a9b386ff21a501eb8 to your computer and use it in GitHub Desktop.
Analysis of #IMGC14 tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(twitteR) | |
library(tm) | |
library(wordcloud) | |
library(RColorBrewer) | |
library(dplyr) | |
library(ggplot2) | |
# load tweets | |
download.file("ftp://ftp.jax.org/petrs/other/IMGC14.rds", destfile = "IMGC14_tweets.rds", mode="wb") | |
tweet <- readRDS("IMGC14_tweets.rds") | |
# extract time, user, fav.count and retweet count | |
dt <- data.frame(time = sapply(tweet, function(x) as.character(x$created)), | |
user = sapply(tweet, function(x) x$screenName), | |
nRt = sapply(tweet, function(x) x$retweetCount), | |
nFav = sapply(tweet, function(x) x$favoriteCount)) | |
# time zone change | |
dt$time <- as.POSIXct(format(as.POSIXct(dt$time, tz="UTC"), tz="America/Thunder_Bay")) | |
### Number of tweets per user | |
dt %>% group_by(user) %>% | |
summarise(n=n()) %>% | |
arrange(-n, user) %>% | |
filter(n>1) %>% | |
ggplot(aes(x=reorder(user,n),y=n)) + | |
geom_bar(stat = "identity") + | |
ylab("Number of tweets") + | |
xlab("User") + | |
ggtitle("Users with at least two #IMGC14 tweets") + | |
coord_flip() + | |
theme(panel.border = element_rect(colour = 'darkgrey', fill = NA)) | |
ggsave("users.jpeg") | |
# number of users (79) | |
n_distinct(dt$user) | |
# number of tweets (1546) | |
nrow(dt) | |
# number of Steve Munger's tweets (679) | |
dt %>% group_by(user) %>% | |
summarise(n=n()) %>% | |
arrange(-n, user) %>% | |
head(n=1) | |
### Time distribution of tweets | |
ggplot(aes(x=time), data=dt) + | |
geom_density(fill="#1B9E77", col="#1B9E77", adjust=0.2) + | |
xlim(as.POSIXct("2014-10-26 4:00:00 EDT"), as.POSIXct("2014-10-30 04:00:00 EDT")) + | |
xlab("Time") + ylab("Intensity of tweeting") + ggtitle("Twitter timestamps") + | |
theme(panel.border = element_rect(colour = 'darkgrey', fill = NA)) | |
### Most retweeted and favorited tweets | |
tweet[which.max(dt$nRt)] | |
tweet[which.max(dt$nFav)] | |
### WORD CLOUD | |
# make corpus | |
tweet_text <- sapply(tweet, function(x) x$getText()) | |
tweet_text_corpus <- Corpus(VectorSource(tweet_text)) | |
# clean corpus | |
tweet_text_corpus <- tm_map(tweet_text_corpus, content_transformer(tolower)) | |
tweet_text_corpus <- tm_map(tweet_text_corpus, removePunctuation) | |
tweet_text_corpus <- tm_map(tweet_text_corpus, function(x) removeWords(x,stopwords("english"))) | |
tweet_text_corpus <- tm_map(tweet_text_corpus, function(x) removeWords(x,c("imgc14", "stevemunger", "redsoxgal0407", "mice"))) | |
# plot wordcloud | |
wordcloud(tweet_text_corpus, scale=c(3.5,0.5), max.words=100, min.freq=2, | |
random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark2")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment