statsmaths/facebook_api_pull.R

## facebook_api_pull.R
#' Title: Facebook Page API Downloads
#' Author: Taylor Arnold (taylor.arnold@acm.org)
#' Created: 2015-07-28 20:50
#' Updated: 2015-08-15 15:43
#' Description: Cycle over the Facebook API to grab all the
#'                of the project page posts and save as a
#'                single csv file.

# You need to fill these three fields in appropriately
access_token = ""
page_id = ""
output_path_posts = "~/Desktop/facebook_api_output_posts.csv"
output_path_photo = "~/Desktop/facebook_api_output_photos.csv"
output_path_posts_comments = "~/Desktop/facebook_api_output_posts_comments.csv"
output_path_photo_comments = "~/Desktop/facebook_api_output_photo_comments.csv"
output_dir_photo_files = "~/Desktop/img"

# Then, run the remainder of the file all at once; that's it!
dataOutput = NULL
url_start = "https://graph.facebook.com/v2.4/"
url_end = "/posts?limit=50"
since_str = ""
url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

while(TRUE) {
  curl_download(url, t <- tempfile())
  l = fromJSON(t)
  if (length(l$data) == 0L) break
  dataOutput = rbind.fill(dataOutput, l$data)
  url = l$paging$`next`
  print(nrow(dataOutput))
}

dataOutput$message <- gsub('\n', ' ', dataOutput$message)
dataOutput$story <- gsub('\n', ' ', dataOutput$story)
dataOutput$message <- gsub('\t', ' ', dataOutput$message)
dataOutput$story <- gsub('\t', ' ', dataOutput$story)

write.table(dataOutput, output_path_posts, row.names=FALSE)

# Now, repeat for photos
dataOutput = NULL
url_start = "https://graph.facebook.com/v2.4/"
url_end = "/photos?fields=id,created_time,link,name,images&type=uploaded"

since_str = ""
url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

while(TRUE) {
  curl_download(url, t <- tempfile())
  l = fromJSON(t)
  dataOutput = rbind.fill(dataOutput, l$data)
  url = l$paging$`next`
  print(nrow(dataOutput))
  if (is.null(url)) break
}

url <- sapply(lapply(dataOutput$images, function(v) v$source),function(v)v[1])
dataOutput <- dataOutput[,-5]
dataOutput$url = url

for (i in 1:ncol(dataOutput)) {
 dataOutput[,i] = gsub('\n', ' ', dataOutput[,i])
 dataOutput[,i] = gsub('\r', ' ', dataOutput[,i])
 dataOutput[,i] = gsub('\t', ' ', dataOutput[,i])
}
dataOutput <- dataOutput[,-4]

write.table(dataOutput, output_path_photo, row.names=FALSE, sep="\t")

# grab comments from posts:
posts <- read.table(output_path_posts, as.is=TRUE, header=TRUE)$id
url_end = "/comments?fields=from,message,created_time,id"

dataOutput <- NULL
for (i in 1:length(posts)) {
  url = paste0(url_start, posts[i], url_end, "&access_token=", access_token, since_str)
  curl_download(url, t <- tempfile())
  l = fromJSON(t)$data
  if (length(l)) {
    if (any(names(l) == "from")) {
      l$from <- l$from$name
      names(l)[names(l) == "from"] = "commenter_name"
    }
    if (length(l)) dataOutput = rbind.fill(dataOutput, l)
  }
  print(nrow(dataOutput))
}

write.table(dataOutput[,c(4,1,2,3)], output_path_posts_comments, row.names=FALSE, sep="\t")

# grab comments from photos:
photos <- as.character(read.table(output_path_photo, as.is=TRUE, header=TRUE)$id)
photos <- paste0(page_id,"_",photos)
url_end = "/comments?fields=from,message,created_time,id"

dataOutput <- NULL
for (i in 1:length(photos)) {
  url = paste0(url_start, photos[i], url_end, "&access_token=", access_token, since_str)
  curl_download(url, t <- tempfile())
  l = fromJSON(t)$data
  if (any(names(l) == "from")) {
    l$from <- l$from$name
    names(l)[names(l) == "from"] = "commenter_name"
  }
  if (length(l)) dataOutput = rbind.fill(dataOutput, l)
  print(nrow(dataOutput))
}

write.table(dataOutput[,c(4,1,2,3)], output_path_photo_comments, row.names=FALSE, sep="\t")

# download all of the photos
dir.create(output_dir_photo_files)
photos <- read.table(output_path_photo, as.is=TRUE, header=TRUE)$url

for (i in length(photos)) {
  out <- basename(photos[i])
  out <- substr(out, 1, regexpr(".jpg", out, fixed=TRUE)+3L)
  curl_download(photos[i], paste0(output_dir_photo_files, "/", out))
}
	#' Title: Facebook Page API Downloads
	#' Author: Taylor Arnold (taylor.arnold@acm.org)
	#' Created: 2015-07-28 20:50
	#' Updated: 2015-08-15 15:43
	#' Description: Cycle over the Facebook API to grab all the
	#' of the project page posts and save as a
	#' single csv file.

	# You need to fill these three fields in appropriately
	access_token = ""
	page_id = ""
	output_path_posts = "~/Desktop/facebook_api_output_posts.csv"
	output_path_photo = "~/Desktop/facebook_api_output_photos.csv"
	output_path_posts_comments = "~/Desktop/facebook_api_output_posts_comments.csv"
	output_path_photo_comments = "~/Desktop/facebook_api_output_photo_comments.csv"
	output_dir_photo_files = "~/Desktop/img"

	# Then, run the remainder of the file all at once; that's it!
	dataOutput = NULL
	url_start = "https://graph.facebook.com/v2.4/"
	url_end = "/posts?limit=50"
	since_str = ""
	url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

	while(TRUE) {
	curl_download(url, t <- tempfile())
	l = fromJSON(t)
	if (length(l$data) == 0L) break
	dataOutput = rbind.fill(dataOutput, l$data)
	url = l$paging$`next`
	print(nrow(dataOutput))
	}

	dataOutput$message <- gsub('\n', ' ', dataOutput$message)
	dataOutput$story <- gsub('\n', ' ', dataOutput$story)
	dataOutput$message <- gsub('\t', ' ', dataOutput$message)
	dataOutput$story <- gsub('\t', ' ', dataOutput$story)

	write.table(dataOutput, output_path_posts, row.names=FALSE)

	# Now, repeat for photos
	dataOutput = NULL
	url_start = "https://graph.facebook.com/v2.4/"
	url_end = "/photos?fields=id,created_time,link,name,images&type=uploaded"

	since_str = ""
	url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

	while(TRUE) {
	curl_download(url, t <- tempfile())
	l = fromJSON(t)
	dataOutput = rbind.fill(dataOutput, l$data)
	url = l$paging$`next`
	print(nrow(dataOutput))
	if (is.null(url)) break
	}

	url <- sapply(lapply(dataOutput$images, function(v) v$source),function(v)v[1])
	dataOutput <- dataOutput[,-5]
	dataOutput$url = url

	for (i in 1:ncol(dataOutput)) {
	dataOutput[,i] = gsub('\n', ' ', dataOutput[,i])
	dataOutput[,i] = gsub('\r', ' ', dataOutput[,i])
	dataOutput[,i] = gsub('\t', ' ', dataOutput[,i])
	}
	dataOutput <- dataOutput[,-4]

	write.table(dataOutput, output_path_photo, row.names=FALSE, sep="\t")

	# grab comments from posts:
	posts <- read.table(output_path_posts, as.is=TRUE, header=TRUE)$id
	url_end = "/comments?fields=from,message,created_time,id"

	dataOutput <- NULL
	for (i in 1:length(posts)) {
	url = paste0(url_start, posts[i], url_end, "&access_token=", access_token, since_str)
	curl_download(url, t <- tempfile())
	l = fromJSON(t)$data
	if (length(l)) {
	if (any(names(l) == "from")) {
	l$from <- l$from$name
	names(l)[names(l) == "from"] = "commenter_name"
	}
	if (length(l)) dataOutput = rbind.fill(dataOutput, l)
	}
	print(nrow(dataOutput))
	}

	write.table(dataOutput[,c(4,1,2,3)], output_path_posts_comments, row.names=FALSE, sep="\t")

	# grab comments from photos:
	photos <- as.character(read.table(output_path_photo, as.is=TRUE, header=TRUE)$id)
	photos <- paste0(page_id,"_",photos)
	url_end = "/comments?fields=from,message,created_time,id"

	dataOutput <- NULL
	for (i in 1:length(photos)) {
	url = paste0(url_start, photos[i], url_end, "&access_token=", access_token, since_str)
	curl_download(url, t <- tempfile())
	l = fromJSON(t)$data
	if (any(names(l) == "from")) {
	l$from <- l$from$name
	names(l)[names(l) == "from"] = "commenter_name"
	}
	if (length(l)) dataOutput = rbind.fill(dataOutput, l)
	print(nrow(dataOutput))
	}

	write.table(dataOutput[,c(4,1,2,3)], output_path_photo_comments, row.names=FALSE, sep="\t")

	# download all of the photos
	dir.create(output_dir_photo_files)
	photos <- read.table(output_path_photo, as.is=TRUE, header=TRUE)$url

	for (i in length(photos)) {
	out <- basename(photos[i])
	out <- substr(out, 1, regexpr(".jpg", out, fixed=TRUE)+3L)
	curl_download(photos[i], paste0(output_dir_photo_files, "/", out))
	}