Skip to content

Instantly share code, notes, and snippets.

@epijim
Last active November 20, 2023 23:29
Show Gist options
  • Star 10 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
  • Save epijim/8524938 to your computer and use it in GitHub Desktop.
Save epijim/8524938 to your computer and use it in GitHub Desktop.
scrape facebook from R. Based off
###############################################################################################
## ##
## Setup ##
## ##
###############################################################################################
# install.packages("Rfacebook") # from CRAN
# install.packages("Rook") # from CRAN
# install.packages("igraph") # from CRAN
setwd("//fs-home/home$/jab254/Desktop/Facebook R")
library(Rfacebook)
library(Rook)
library(igraph)
library(gtools)
library(reshape)
library(lubridate)
library(ggplot2)
library(scales)
#from https://developers.facebook.com/apps - setup an app and copy the stuff below
#fb_oauth <- fbOAuth(app_id="APP ID", app_secret="APP SECRET")
#now we have our fb_oauth connection
#so we will just save them to be able to use them later
#save(fb_oauth, file="fb_oauth")
#so if you want to connect to Facebook again you just have to call
load("fb_oauth")
#the getUsers function return public information about one or more Facebook user
me <- getUsers("me", token=fb_oauth)
me$name # my name
my_friends <- getFriends(token=fb_oauth, simplify=TRUE)
head(my_friends, n=10) #first 10 friends - ID is the order they joined FB
nrow(my_friends) # number of friends
###############################################################################################
## ##
## Download info ##
## ##
###############################################################################################
#Too many friends in one group gets rejected by FB API.
#Break into groups of 80.
split_my_friends_1 <- my_friends[1:80,]
split_my_friends_2 <- my_friends[81:160,]
split_my_friends_3 <- my_friends[161:240,]
split_my_friends_4 <- my_friends[241:320,]
split_my_friends_5 <- my_friends[321:400,]
split_my_friends_6 <- my_friends[401:480,]
split_my_friends_7 <- my_friends[481:nrow(my_friends),]
my_friends_info_1 <- getUsers(split_my_friends_1$id, token=fb_oauth, private_info=TRUE)
my_friends_info_2 <- getUsers(split_my_friends_2$id, token=fb_oauth, private_info=TRUE)
my_friends_info_3 <- getUsers(split_my_friends_3$id, token=fb_oauth, private_info=TRUE)
my_friends_info_4 <- getUsers(split_my_friends_4$id, token=fb_oauth, private_info=TRUE)
my_friends_info_5 <- getUsers(split_my_friends_5$id, token=fb_oauth, private_info=TRUE)
my_friends_info_6 <- getUsers(split_my_friends_6$id, token=fb_oauth, private_info=TRUE)
my_friends_info_7 <- getUsers(split_my_friends_7$id, token=fb_oauth, private_info=TRUE)
my_friends_info<- rbind.fill(my_friends_info_1,my_friends_info_2,
my_friends_info_3,my_friends_info_4,
my_friends_info_5,my_friends_info_6,
my_friends_info_7)
#SAVE IT!
save(my_friends_info,file="my_friends_info.Rda")
write.table(my_friends_info, "my_friends_info.txt", sep="\t")
###############################################################################################
## ##
## Take a gander ##
## ##
###############################################################################################
#Summary
table(my_friends_info$relationship_status)
table(my_friends_info$gender)
table(my_friends_info$location)
table(my_friends_info$hometown)
#Age histogram
# format year of birth
my_friends_info$year <- mdy(my_friends_info$birthday)
x <- my_friends_info$year
pdf('bornhist.pdf')
h<-hist(x, breaks=15, freq=T,
col="blue", xlab="Year born (according to FB)",
main="")
dev.off()
# Not suprisingly it's the least mature/youngest people I know on FB with these statuses...
print(
my_friends_info[
which(my_friends_info$relationship_status == "It's complicated") ,
c("name")
] )
print(
my_friends_info[
which(my_friends_info$relationship_status == "In an open relationship") ,
c("name")
] )
###############################################################################################
## ##
## prepare net ##
## ##
###############################################################################################
# Download who is friends with who (within those friends with me!)
my_network <- getNetwork(fb_oauth, format="adj.matrix")
# friends who are friends with me alone
singletons <- rowSums(my_network)==0
# remove singletons
my_graph <- graph.adjacency(my_network[!singletons,!singletons])
# make connections one way
my_graph_simple <- simplify(my_graph)
# set up plot
#actual model
layout <- layout.drl(my_graph_simple,options=list(simmer.attraction=0))
#styling of plot
E(my_graph_simple)$color <- rgb(.5, .5, 0, 0.15)
E(my_graph_simple)$width <- 0.0001
#Plot 0 - leaving it on auto
pdf('0_auto.pdf')
plot(my_graph_simple, layout=layout.auto,
vertex.label=NA,)
dev.off()
#Plot 1 - with names
pdf('1_default.pdf')
plot(my_graph_simple, vertex.size=2,
#vertex.label=NA,
vertex.label.cex=0.2,
edge.arrow.size=0, edge.curved=TRUE,layout=layout)
dev.off()
#Plot 2 - no names
pdf('2_default.pdf')
plot(my_graph_simple, vertex.size=2,
vertex.label=NA,
vertex.label.cex=0.2,
edge.arrow.size=0, edge.curved=TRUE,layout=layout)
dev.off()
# Let's colour based on connectedness.
#this is the number of "shortest paths" going
#through a particular individual
hc4 <- heat.colors(10)
g.bet <- betweenness(my_graph_simple)
vcolors <- factor(cut(g.bet, quantile(g.bet), include.lowest = TRUE))
vcolors <- quantcut(g.bet, q=seq(0,1,by=0.1))
vcolors2 <- hc4[vcolors]
#Plot 3 - no names
pdf('3_coloured.pdf')
plot(my_graph_simple, vertex.size=2, vertex.color=vcolors2,
vertex.label=NA,
vertex.label.cex=2,
edge.arrow.size=0, edge.curved=TRUE,layout=layout)
dev.off()
#Plot 4 - with names
pdf('4_coloured_withnames.pdf')
plot(my_graph_simple, vertex.size=2, vertex.color=vcolors2,
#vertex.label=NA,
vertex.label.cex=0.5,
edge.arrow.size=0, edge.curved=TRUE,layout=layout)
dev.off()
###############################################################################################
## ##
## A facebook page ##
## ##
###############################################################################################
# Our MCR social events page
mcrents <- getPage("jesusmcrents", fb_oauth, n = 5000)
# The pub on my street
brewhouse <- getPage("TheCambridgeBrewHouse", fb_oauth, n = 5000)
# A brilliant cartoon
archer <- getPage("ArcherFX", fb_oauth, n = 5000)
#number of likes
mcrents[which.max(mcrents$likes_count), ] # most popular post ever had 3 likes and 0 comments
brewhouse[which.max(brewhouse$likes_count), ] # most popular post ever had 16 likes and 2 comments
archer[which.max(archer$likes_count), ] # most popular post ever had 73,013 likes and 2146 comments,
# and 9315 shares and was this video
# https://www.facebook.com/photo.php?v=10200194370090771
# convert Facebook date format to R date format
format.facebook.date <- function(datestring) {
date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
}
# aggregate metric counts over month
aggregate.metric <- function(metric) {
m <- aggregate(archer[[paste0(metric, "_count")]], list(month = archer$month),
mean)
m$month <- as.Date(paste0(m$month, "-15"))
m$metric <- metric
return(m)
}
# create data frame with average metric counts per month
archer$datetime <- format.facebook.date(archer$created_time)
archer$month <- format(archer$datetime, "%Y-%m")
df.list <- lapply(c("likes", "comments", "shares"), aggregate.metric)
df <- do.call(rbind, df.list)
# visualize evolution in metric
#Plot last - archer page
pdf('archer.pdf')
ggplot(df, aes(x = month, y = x, group = metric)) + geom_line(aes(color = metric)) +
scale_x_date(breaks = "years", labels = date_format("%Y")) +
scale_y_log10("Average count per post",
breaks = c(10, 100, 1000, 10000, 50000)) +
theme_bw() + theme(axis.title.x = element_blank())
dev.off()
#########################################################################
#pull photos from facebook
# download small profile picture of each friend
dir.create("photos")
for (i in 1:length(friends.id))
download.file(paste(friends.pic[i]),mode="wb",
destfile=paste("photos/",friends.id[i],".jpg",sep=""))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment