Skip to content

Instantly share code, notes, and snippets.

@micahwoods
Last active July 30, 2017 01:42
Show Gist options
  • Save micahwoods/ba86d8779e087cc1c91ba2078ec7c425 to your computer and use it in GitHub Desktop.
Save micahwoods/ba86d8779e087cc1c91ba2078ec7c425 to your computer and use it in GitHub Desktop.
# make a chart of tweets by followers and with log2 axes
# as shown in https://blog.codecentric.de/en/2017/07/combining-social-network-analysis-topic-modeling-characterize-codecentrics-twitter-friends-followers/
# and adapted from code at https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd
library(twitteR)
library(dplyr)
library(ggrepel)
library(cowplot)
api_key <- "your API_KEY here"
api_secret <- "your API_SECRET here"
access_token <- "your ACCESS_TOKEN here"
access_token_secret <- "your ACCESS_TOKEN_SECRET here"
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)
user <- getUser("asianturfgrass")
friends <- user$getFriends() # who I follow
friends_df <- twListToDF(friends)
followers <- user$getFollowers() # my followers
followers_df <- twListToDF(followers)
atcFull <- followers_df
## # I want to get unique, also I would like to check on something, such as they follow
# both me and GCSAA; for that reason maybe it pulls out some of the non-turf accounts
# let me check gcsaa here
user2 <- getUser("gcsaa")
gcsaa <- user2$getFollowers() # gcsaa followers
gcsaa_followers_df <- twListToDF(gcsaa)
# let it be they must follow me and gcsaa or biggaltd
user3 <- getUser("BIGGALtd")
bigga <- user3$getFollowers() # bigga followers
bigga_followers_df <- twListToDF(bigga)
# and one more, AGCSA also
user4 <- getUser("AGCSA2")
agcsa <- user4$getFollowers()
agcsa_followers_df <- twListToDF(agcsa)
# to find the accounts that are probably really into the profession,
# subset to be only those that follow ATC, plus also follow GCSAA & BIGGA & AGCSA
atc_assoc_df <- subset(atcFull, screenName %in% gcsaa_followers_df$screenName &
screenName %in% bigga_followers_df$screenName &
screenName %in% agcsa_followers_df$screenName)
# find from all ATC followers who themselves have the most followers
top_fol <- atcFull %>%
mutate(date = as.Date(created, format = "%Y-%m-%d"),
today = as.Date("2017-07-30", format = "%Y-%m-%d"),
days = as.numeric(today - date),
statusesCount_pDay = statusesCount / days) %>%
select(screenName, followersCount, statusesCount_pDay) %>%
arrange(desc(followersCount)) %>%
.[1:25, ]
# find from the subsetted followers who has the most followers
top_fol_assoc <- atc_assoc_df %>%
mutate(date = as.Date(created, format = "%Y-%m-%d"),
today = as.Date("2017-07-30", format = "%Y-%m-%d"),
days = as.numeric(today - date),
statusesCount_pDay = statusesCount / days) %>%
select(screenName, followersCount, statusesCount_pDay) %>%
arrange(desc(followersCount)) %>%
.[1:25, ]
# find from all atc followers who tweets the most
top_tweet <- atcFull %>%
mutate(date = as.Date(created, format = "%Y-%m-%d"),
today = as.Date("2017-07-30", format = "%Y-%m-%d"),
days = as.numeric(today - date),
statusesCount_pDay = statusesCount / days) %>%
select(screenName, followersCount, statusesCount_pDay) %>%
arrange(desc(statusesCount_pDay)) %>%
.[1:25, ]
# find from subsetted atc followers who tweets the most
top_tweet_assoc <- atc_assoc_df %>%
mutate(date = as.Date(created, format = "%Y-%m-%d"),
today = as.Date("2017-07-30", format = "%Y-%m-%d"),
days = as.numeric(today - date),
statusesCount_pDay = statusesCount / days) %>%
select(screenName, followersCount, statusesCount_pDay) %>%
arrange(desc(statusesCount_pDay)) %>%
.[1:25, ]
# for all atc followers, combine the 25 with most followers
# and the 25 with the most tweets
top_fol_tweet <- rbind(top_fol, top_tweet)
# ensure that the screenName is not duplicated
top_fol_tweet <- top_fol_tweet[!duplicated(top_fol_tweet[ , 1]),]
# for atc followers who also follow AGCSA, BIGGA, and GCSAA, combine the 25 with most followers
# and the 25 with the most tweets
top_fol_tweet_assoc <- rbind(top_fol_assoc, top_tweet_assoc)
# ensure that the screenName is not duplicated
top_fol_tweet_assoc <- top_fol_tweet_assoc[!duplicated(top_fol_tweet_assoc[ , 1]),]
# function to make the decimal points less
axisDigits2 <- function(x) prettyNum(x, digits = 2)
full <- atcFull %>%
mutate(date = as.Date(created, format = "%Y-%m-%d"),
today = as.Date("2017-07-30", format = "%Y-%m-%d"),
days = as.numeric(today - date),
statusesCount_pDay = statusesCount / days) %>%
ggplot(aes(x = followersCount, y = statusesCount_pDay)) +
background_grid(major = "xy") +
geom_smooth(method = "lm", se = FALSE, colour = "grey") +
geom_point(color = "#3f7300", alpha = 0.4) +
geom_text_repel(data = top_fol_tweet, aes(label = screenName), size = 3) +
scale_x_continuous(trans='log2') +
scale_y_continuous(trans='log2', labels = axisDigits2) +
labs(x = expression(log[2]~"number of followers"),
y = expression(log[2]~"average tweets per day"),
title = "asianturfgrass's most influential followers",
caption = "Second degree followers and tweet rate of @asianturfgrass Twitter followers (data from 30 July 2017)\nadapted from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd")
full
save_plot("atc_followers.svg", full, base_width = 12, base_height = 6.75)
atc_assoc <- atc_assoc_df %>%
mutate(date = as.Date(created, format = "%Y-%m-%d"),
today = as.Date("2017-07-30", format = "%Y-%m-%d"),
days = as.numeric(today - date),
statusesCount_pDay = statusesCount / days) %>%
ggplot(aes(x = followersCount, y = statusesCount_pDay)) +
background_grid(major = "xy") +
geom_smooth(method = "lm", se = FALSE, colour = "grey") +
geom_point(color = "#3f7300", alpha = 0.4) +
geom_text_repel(data = top_fol_tweet_assoc, aes(label = screenName), size = 3) +
scale_x_continuous(trans='log2') +
scale_y_continuous(trans='log2', labels = axisDigits2) +
labs(x = expression(log[2]~"number of followers"),
y = expression(log[2]~"average tweets per day"),
title = "asianturfgrass's most influential followers",
subtitle = "among those accounts that also follow GCSAA, BIGGALtd, & AGCSA2",
caption = "Second degree followers and tweet rate of @asianturfgrass Twitter followers (data from 30 July 2017)\nadapted from https://github.com/ShirinG/blog_posts_prep/blob/master/twitter/twitter_codecentric.Rmd")
atc_assoc
save_plot("atc_followers_assoc.svg", atc_assoc, base_width = 12, base_height = 6.75)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment