Skip to content

Instantly share code, notes, and snippets.

@statsmaths
Last active August 29, 2015 14:26
Show Gist options
  • Save statsmaths/87d66ccd17f3e88013c6 to your computer and use it in GitHub Desktop.
Save statsmaths/87d66ccd17f3e88013c6 to your computer and use it in GitHub Desktop.
#' Title: Facebook Page API Downloads
#' Author: Taylor Arnold (taylor.arnold@acm.org)
#' Created: 2015-07-28 20:50
#' Updated: 2015-08-15 15:43
#' Description: Cycle over the Facebook API to grab all the
#' of the project page posts and save as a
#' single csv file.
# You need to fill these three fields in appropriately
access_token = ""
page_id = ""
output_path_posts = "~/Desktop/facebook_api_output_posts.csv"
output_path_photo = "~/Desktop/facebook_api_output_photos.csv"
output_path_posts_comments = "~/Desktop/facebook_api_output_posts_comments.csv"
output_path_photo_comments = "~/Desktop/facebook_api_output_photo_comments.csv"
output_dir_photo_files = "~/Desktop/img"
# Then, run the remainder of the file all at once; that's it!
dataOutput = NULL
url_start = "https://graph.facebook.com/v2.4/"
url_end = "/posts?limit=50"
since_str = ""
url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)
while(TRUE) {
curl_download(url, t <- tempfile())
l = fromJSON(t)
if (length(l$data) == 0L) break
dataOutput = rbind.fill(dataOutput, l$data)
url = l$paging$`next`
print(nrow(dataOutput))
}
dataOutput$message <- gsub('\n', ' ', dataOutput$message)
dataOutput$story <- gsub('\n', ' ', dataOutput$story)
dataOutput$message <- gsub('\t', ' ', dataOutput$message)
dataOutput$story <- gsub('\t', ' ', dataOutput$story)
write.table(dataOutput, output_path_posts, row.names=FALSE)
# Now, repeat for photos
dataOutput = NULL
url_start = "https://graph.facebook.com/v2.4/"
url_end = "/photos?fields=id,created_time,link,name,images&type=uploaded"
since_str = ""
url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)
while(TRUE) {
curl_download(url, t <- tempfile())
l = fromJSON(t)
dataOutput = rbind.fill(dataOutput, l$data)
url = l$paging$`next`
print(nrow(dataOutput))
if (is.null(url)) break
}
url <- sapply(lapply(dataOutput$images, function(v) v$source),function(v)v[1])
dataOutput <- dataOutput[,-5]
dataOutput$url = url
for (i in 1:ncol(dataOutput)) {
dataOutput[,i] = gsub('\n', ' ', dataOutput[,i])
dataOutput[,i] = gsub('\r', ' ', dataOutput[,i])
dataOutput[,i] = gsub('\t', ' ', dataOutput[,i])
}
dataOutput <- dataOutput[,-4]
write.table(dataOutput, output_path_photo, row.names=FALSE, sep="\t")
# grab comments from posts:
posts <- read.table(output_path_posts, as.is=TRUE, header=TRUE)$id
url_end = "/comments?fields=from,message,created_time,id"
dataOutput <- NULL
for (i in 1:length(posts)) {
url = paste0(url_start, posts[i], url_end, "&access_token=", access_token, since_str)
curl_download(url, t <- tempfile())
l = fromJSON(t)$data
if (length(l)) {
if (any(names(l) == "from")) {
l$from <- l$from$name
names(l)[names(l) == "from"] = "commenter_name"
}
if (length(l)) dataOutput = rbind.fill(dataOutput, l)
}
print(nrow(dataOutput))
}
write.table(dataOutput[,c(4,1,2,3)], output_path_posts_comments, row.names=FALSE, sep="\t")
# grab comments from photos:
photos <- as.character(read.table(output_path_photo, as.is=TRUE, header=TRUE)$id)
photos <- paste0(page_id,"_",photos)
url_end = "/comments?fields=from,message,created_time,id"
dataOutput <- NULL
for (i in 1:length(photos)) {
url = paste0(url_start, photos[i], url_end, "&access_token=", access_token, since_str)
curl_download(url, t <- tempfile())
l = fromJSON(t)$data
if (any(names(l) == "from")) {
l$from <- l$from$name
names(l)[names(l) == "from"] = "commenter_name"
}
if (length(l)) dataOutput = rbind.fill(dataOutput, l)
print(nrow(dataOutput))
}
write.table(dataOutput[,c(4,1,2,3)], output_path_photo_comments, row.names=FALSE, sep="\t")
# download all of the photos
dir.create(output_dir_photo_files)
photos <- read.table(output_path_photo, as.is=TRUE, header=TRUE)$url
for (i in length(photos)) {
out <- basename(photos[i])
out <- substr(out, 1, regexpr(".jpg", out, fixed=TRUE)+3L)
curl_download(photos[i], paste0(output_dir_photo_files, "/", out))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment