makes a chart of tweet rate and 2nd degree followers based on code from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd and described in https://blog.codecentric.de/en/2017/07/combining-social-network-analysis-topic-modeling-characterize-codecentrics-twitter-friends-followers/
# make a chart of tweets by followers and with log2 axes | |
# as shown in https://blog.codecentric.de/en/2017/07/combining-social-network-analysis-topic-modeling-characterize-codecentrics-twitter-friends-followers/ | |
# and adapted from code at https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd | |
library(twitteR) | |
library(dplyr) | |
library(ggrepel) | |
library(cowplot) | |
api_key <- "your API_KEY here" | |
api_secret <- "your API_SECRET here" | |
access_token <- "your ACCESS_TOKEN here" | |
access_token_secret <- "your ACCESS_TOKEN_SECRET here" | |
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret) | |
user <- getUser("asianturfgrass") | |
friends <- user$getFriends() # who I follow | |
friends_df <- twListToDF(friends) | |
followers <- user$getFollowers() # my followers | |
followers_df <- twListToDF(followers) | |
atcFull <- followers_df | |
## # I want to get unique, also I would like to check on something, such as they follow | |
# both me and GCSAA; for that reason maybe it pulls out some of the non-turf accounts | |
# let me check gcsaa here | |
user2 <- getUser("gcsaa") | |
gcsaa <- user2$getFollowers() # gcsaa followers | |
gcsaa_followers_df <- twListToDF(gcsaa) | |
# let it be they must follow me and gcsaa or biggaltd | |
user3 <- getUser("BIGGALtd") | |
bigga <- user3$getFollowers() # bigga followers | |
bigga_followers_df <- twListToDF(bigga) | |
# and one more, AGCSA also | |
user4 <- getUser("AGCSA2") | |
agcsa <- user4$getFollowers() | |
agcsa_followers_df <- twListToDF(agcsa) | |
# to find the accounts that are probably really into the profession, | |
# subset to be only those that follow ATC, plus also follow GCSAA & BIGGA & AGCSA | |
atc_assoc_df <- subset(atcFull, screenName %in% gcsaa_followers_df$screenName & | |
screenName %in% bigga_followers_df$screenName & | |
screenName %in% agcsa_followers_df$screenName) | |
# find from all ATC followers who themselves have the most followers | |
top_fol <- atcFull %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(followersCount)) %>% | |
.[1:25, ] | |
# find from the subsetted followers who has the most followers | |
top_fol_assoc <- atc_assoc_df %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(followersCount)) %>% | |
.[1:25, ] | |
# find from all atc followers who tweets the most | |
top_tweet <- atcFull %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(statusesCount_pDay)) %>% | |
.[1:25, ] | |
# find from subsetted atc followers who tweets the most | |
top_tweet_assoc <- atc_assoc_df %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(statusesCount_pDay)) %>% | |
.[1:25, ] | |
# for all atc followers, combine the 25 with most followers | |
# and the 25 with the most tweets | |
top_fol_tweet <- rbind(top_fol, top_tweet) | |
# ensure that the screenName is not duplicated | |
top_fol_tweet <- top_fol_tweet[!duplicated(top_fol_tweet[ , 1]),] | |
# for atc followers who also follow AGCSA, BIGGA, and GCSAA, combine the 25 with most followers | |
# and the 25 with the most tweets | |
top_fol_tweet_assoc <- rbind(top_fol_assoc, top_tweet_assoc) | |
# ensure that the screenName is not duplicated | |
top_fol_tweet_assoc <- top_fol_tweet_assoc[!duplicated(top_fol_tweet_assoc[ , 1]),] | |
# function to make the decimal points less | |
axisDigits2 <- function(x) prettyNum(x, digits = 2) | |
full <- atcFull %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
ggplot(aes(x = followersCount, y = statusesCount_pDay)) + | |
background_grid(major = "xy") + | |
geom_smooth(method = "lm", se = FALSE, colour = "grey") + | |
geom_point(color = "#3f7300", alpha = 0.4) + | |
geom_text_repel(data = top_fol_tweet, aes(label = screenName), size = 3) + | |
scale_x_continuous(trans='log2') + | |
scale_y_continuous(trans='log2', labels = axisDigits2) + | |
labs(x = expression(log[2]~"number of followers"), | |
y = expression(log[2]~"average tweets per day"), | |
title = "asianturfgrass's most influential followers", | |
caption = "Second degree followers and tweet rate of @asianturfgrass Twitter followers (data from 30 July 2017)\nadapted from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd") | |
full | |
save_plot("atc_followers.svg", full, base_width = 12, base_height = 6.75) | |
atc_assoc <- atc_assoc_df %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
ggplot(aes(x = followersCount, y = statusesCount_pDay)) + | |
background_grid(major = "xy") + | |
geom_smooth(method = "lm", se = FALSE, colour = "grey") + | |
geom_point(color = "#3f7300", alpha = 0.4) + | |
geom_text_repel(data = top_fol_tweet_assoc, aes(label = screenName), size = 3) + | |
scale_x_continuous(trans='log2') + | |
scale_y_continuous(trans='log2', labels = axisDigits2) + | |
labs(x = expression(log[2]~"number of followers"), | |
y = expression(log[2]~"average tweets per day"), | |
title = "asianturfgrass's most influential followers", | |
subtitle = "among those accounts that also follow GCSAA, BIGGALtd, & AGCSA2", | |
caption = "Second degree followers and tweet rate of @asianturfgrass Twitter followers (data from 30 July 2017)\nadapted from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd") | |
atc_assoc | |
save_plot("atc_followers_assoc.svg", atc_assoc, base_width = 12, base_height = 6.75) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment