Last active
July 30, 2017 01:42
-
-
Save micahwoods/ba86d8779e087cc1c91ba2078ec7c425 to your computer and use it in GitHub Desktop.
makes a chart of tweet rate and 2nd degree followers based on code from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd and described in https://blog.codecentric.de/en/2017/07/combining-social-network-analysis-topic-modeling-characterize-codecentrics-twitter-friends-followers/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# make a chart of tweets by followers and with log2 axes | |
# as shown in https://blog.codecentric.de/en/2017/07/combining-social-network-analysis-topic-modeling-characterize-codecentrics-twitter-friends-followers/ | |
# and adapted from code at https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd | |
library(twitteR) | |
library(dplyr) | |
library(ggrepel) | |
library(cowplot) | |
api_key <- "your API_KEY here" | |
api_secret <- "your API_SECRET here" | |
access_token <- "your ACCESS_TOKEN here" | |
access_token_secret <- "your ACCESS_TOKEN_SECRET here" | |
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret) | |
user <- getUser("asianturfgrass") | |
friends <- user$getFriends() # who I follow | |
friends_df <- twListToDF(friends) | |
followers <- user$getFollowers() # my followers | |
followers_df <- twListToDF(followers) | |
atcFull <- followers_df | |
## # I want to get unique, also I would like to check on something, such as they follow | |
# both me and GCSAA; for that reason maybe it pulls out some of the non-turf accounts | |
# let me check gcsaa here | |
user2 <- getUser("gcsaa") | |
gcsaa <- user2$getFollowers() # gcsaa followers | |
gcsaa_followers_df <- twListToDF(gcsaa) | |
# let it be they must follow me and gcsaa or biggaltd | |
user3 <- getUser("BIGGALtd") | |
bigga <- user3$getFollowers() # bigga followers | |
bigga_followers_df <- twListToDF(bigga) | |
# and one more, AGCSA also | |
user4 <- getUser("AGCSA2") | |
agcsa <- user4$getFollowers() | |
agcsa_followers_df <- twListToDF(agcsa) | |
# to find the accounts that are probably really into the profession, | |
# subset to be only those that follow ATC, plus also follow GCSAA & BIGGA & AGCSA | |
atc_assoc_df <- subset(atcFull, screenName %in% gcsaa_followers_df$screenName & | |
screenName %in% bigga_followers_df$screenName & | |
screenName %in% agcsa_followers_df$screenName) | |
# find from all ATC followers who themselves have the most followers | |
top_fol <- atcFull %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(followersCount)) %>% | |
.[1:25, ] | |
# find from the subsetted followers who has the most followers | |
top_fol_assoc <- atc_assoc_df %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(followersCount)) %>% | |
.[1:25, ] | |
# find from all atc followers who tweets the most | |
top_tweet <- atcFull %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(statusesCount_pDay)) %>% | |
.[1:25, ] | |
# find from subsetted atc followers who tweets the most | |
top_tweet_assoc <- atc_assoc_df %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
select(screenName, followersCount, statusesCount_pDay) %>% | |
arrange(desc(statusesCount_pDay)) %>% | |
.[1:25, ] | |
# for all atc followers, combine the 25 with most followers | |
# and the 25 with the most tweets | |
top_fol_tweet <- rbind(top_fol, top_tweet) | |
# ensure that the screenName is not duplicated | |
top_fol_tweet <- top_fol_tweet[!duplicated(top_fol_tweet[ , 1]),] | |
# for atc followers who also follow AGCSA, BIGGA, and GCSAA, combine the 25 with most followers | |
# and the 25 with the most tweets | |
top_fol_tweet_assoc <- rbind(top_fol_assoc, top_tweet_assoc) | |
# ensure that the screenName is not duplicated | |
top_fol_tweet_assoc <- top_fol_tweet_assoc[!duplicated(top_fol_tweet_assoc[ , 1]),] | |
# function to make the decimal points less | |
axisDigits2 <- function(x) prettyNum(x, digits = 2) | |
full <- atcFull %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
ggplot(aes(x = followersCount, y = statusesCount_pDay)) + | |
background_grid(major = "xy") + | |
geom_smooth(method = "lm", se = FALSE, colour = "grey") + | |
geom_point(color = "#3f7300", alpha = 0.4) + | |
geom_text_repel(data = top_fol_tweet, aes(label = screenName), size = 3) + | |
scale_x_continuous(trans='log2') + | |
scale_y_continuous(trans='log2', labels = axisDigits2) + | |
labs(x = expression(log[2]~"number of followers"), | |
y = expression(log[2]~"average tweets per day"), | |
title = "asianturfgrass's most influential followers", | |
caption = "Second degree followers and tweet rate of @asianturfgrass Twitter followers (data from 30 July 2017)\nadapted from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd") | |
full | |
save_plot("atc_followers.svg", full, base_width = 12, base_height = 6.75) | |
atc_assoc <- atc_assoc_df %>% | |
mutate(date = as.Date(created, format = "%Y-%m-%d"), | |
today = as.Date("2017-07-30", format = "%Y-%m-%d"), | |
days = as.numeric(today - date), | |
statusesCount_pDay = statusesCount / days) %>% | |
ggplot(aes(x = followersCount, y = statusesCount_pDay)) + | |
background_grid(major = "xy") + | |
geom_smooth(method = "lm", se = FALSE, colour = "grey") + | |
geom_point(color = "#3f7300", alpha = 0.4) + | |
geom_text_repel(data = top_fol_tweet_assoc, aes(label = screenName), size = 3) + | |
scale_x_continuous(trans='log2') + | |
scale_y_continuous(trans='log2', labels = axisDigits2) + | |
labs(x = expression(log[2]~"number of followers"), | |
y = expression(log[2]~"average tweets per day"), | |
title = "asianturfgrass's most influential followers", | |
subtitle = "among those accounts that also follow GCSAA, BIGGALtd, & AGCSA2", | |
caption = "Second degree followers and tweet rate of @asianturfgrass Twitter followers (data from 30 July 2017)\nadapted from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd") | |
atc_assoc | |
save_plot("atc_followers_assoc.svg", atc_assoc, base_width = 12, base_height = 6.75) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment