Skip to content

Instantly share code, notes, and snippets.

@miriamfs
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miriamfs/b78a8b26ea0f2895788f to your computer and use it in GitHub Desktop.
Save miriamfs/b78a8b26ea0f2895788f to your computer and use it in GitHub Desktop.
ESWC 2014 Tutorial. Facebook Data Collector
package uk.ac.open.kmi.data.collector;
import com.restfb.Connection;
import com.restfb.DefaultFacebookClient;
import com.restfb.FacebookClient;
import com.restfb.types.*;
import java.io.FileInputStream;
import java.io.PrintWriter;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
public class FacebookDataCollector {
/* Constants for the collection */
private static int DEFAULT_MAX_POSTS = 10;
/* Output files */
private static String POSTS = "posts.csv";
private static String REPLIES = "replies.csv";
private static String GROUPS = "groups.csv";
/* List of properties to read from the configuration file */
private String appId;
private String appSecret;
private String[] groupList;
private String[] pageList;
private int maxPosts;
/* Facebook client */
private FacebookClient facebookClient;
/* List of witters files */
private PrintWriter postsOutputFile;
private PrintWriter repliesOutputFile;
private PrintWriter groupsOutputFile;
/* Set to avoid duplications in the users information */
private HashSet<Long> userIds;
public FacebookDataCollector(String propertiesFile) {
try {
readProperties(propertiesFile);
FacebookClient.AccessToken accessToken = new DefaultFacebookClient().obtainAppAccessToken(appId, appSecret);
String myAccessToken = accessToken.getAccessToken();
this.facebookClient = new DefaultFacebookClient(myAccessToken);
openOutputFiles();
} catch (Exception e) {
e.printStackTrace();
}
}
public void collectData() {
for (String page : this.pageList) {
long start = System.currentTimeMillis();
collectPageData(page);
long duration = (System.currentTimeMillis() - start) / 1000;
System.out.println("Getting information for the page " + page + " took " + duration + " (secs)");
}
for (String group : this.groupList) {
long start = System.currentTimeMillis();
collectGroupData(group);
long duration = (System.currentTimeMillis() - start) / 1000;
System.out.println("Getting information for the group " + group + " took " + duration + " (secs)");
}
}
public void collectPageData(String pageID) {
try{
System.out.println("page -----> " + pageID);
Page page = facebookClient.fetchObject(pageID, Page.class);
storePageInformation(page);
collectPosts(pageID);
}catch (Exception e){
System.out.println("information for the page " + pageID + " could not be collected");
e.printStackTrace();
}
}
public void collectGroupData(String fbGroupId) {
try{
System.out.println("group -----> " + fbGroupId);
Group group = facebookClient.fetchObject(fbGroupId, Group.class);
storeGroupInformation(group);
collectPosts(fbGroupId);
}catch(Exception e){
System.out.println("information for the group " + fbGroupId + " could not be collected");
e.printStackTrace();
}
}
public void collectPosts(String id){
//Collect the corresponding posts for the last period
Connection<Post> pageFeed = facebookClient.fetchConnection(id + "/feed", Post.class);
this.userIds = new HashSet<Long>();
storePostInfo(pageFeed);
}
// -----------------------
// CSV Storage
// -----------------------
public void storePostInfo(Connection<Post> groupFeed) {
int numPostsCollected = 0;
for (List<Post> myFeedConnectionPage : groupFeed) {
for (Post post : myFeedConnectionPage) {
storePost(post);
extractCommentsFromPost(post);
numPostsCollected ++;
}
if(numPostsCollected > this.maxPosts){
break;
}
}
}
private void extractCommentsFromPost(Post post) {
Post.Comments comments = post.getComments();
if (comments != null) {
List<Comment> commentList = comments.getData();
for (Comment comment : commentList) {
storeComment(post, comment);
}
}
}
public void storePost(Post post) {
try {
String id = post.getId();
String[] groupPostId = id.split("_");
String fbGroupId = groupPostId[0];
String postId = groupPostId[1];
Date date = post.getCreatedTime();
String userId = post.getFrom().getId();
String userName = post.getFrom().getName();
String message = post.getMessage();
if (message != null) {
message = message.replaceAll("\"", " ");
message = message.replaceAll("\\\\", " ");
message = message.replaceAll(",", "");
message = message.replaceAll("\n", ".");
}
java.text.SimpleDateFormat sdf =
new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateTime = sdf.format(date);
this.postsOutputFile.println(postId + "," + fbGroupId + "," + userId + ",\"" + userName + "\",\"" + message + "\"," + dateTime + "," + true);
if(!this.userIds.contains(Long.parseLong(userId))){
this.userIds.add(Long.parseLong(userId));
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void storeComment(Post post, Comment comment) {
try {
String id = post.getId();
String[] groupPostId = id.split("_");
String fbGroupId = groupPostId[0];
String postId = groupPostId[1];
String comId = comment.getId();
String[] postCommentId = comId.split("_");
String commentId = null;
if(postCommentId.length ==1){
commentId = postCommentId[0];
}else if(postCommentId.length ==2){
commentId = postCommentId[1];
}else{
throw new Exception("illegal comment id exception");
}
String userId = comment.getFrom().getId();
String userName = comment.getFrom().getName();
Date date = comment.getCreatedTime();
String message = comment.getMessage();
if (message != null) {
message = message.replaceAll("\"", " ");
message = message.replaceAll("\\\\", " ");
message = message.replaceAll(",", "");
message = message.replaceAll("\n", ".");
}
java.text.SimpleDateFormat sdf =
new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateTime = sdf.format(date);
this.postsOutputFile.println(commentId + "," + fbGroupId + "," + userId + ",\"" + userName + "\",\"" + message + "\"," + dateTime + false);
this.repliesOutputFile.println(fbGroupId + "," + postId + "," + commentId);
if(!this.userIds.contains(Long.parseLong(userId))){
this.userIds.add(Long.parseLong(userId));
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void storeGroupInformation(Group group) {
try {
String groupId = group.getId();
String groupName = group.getName();
String groupPrivacy = group.getPrivacy();
String groupDescription = group.getDescription();
if(groupDescription!=null){
groupDescription = groupDescription.replaceAll("\"", " ");
groupDescription = groupDescription.replaceAll("\\\\", " ");
}
String groupLink = group.getLink();
NamedFacebookType owner = group.getOwner();
String userId = "0";
String userName = "";
if(owner!=null){
userId = owner.getId();
userName = owner.getName();
}
this.groupsOutputFile.println(groupId + ",\"" + groupName + "\",\"" + groupDescription + "\"," + groupPrivacy + "," + groupLink + "," + userId + ",\"" + userName + "\"");
} catch (Exception e) {
e.printStackTrace();
}
}
public void storePageInformation(Page page) {
try {
String pageId = page.getId();
String pageName = page.getName();
String pagePrivacy = (page.getIsPublished() ? "OPEN" : "CLOSED");
String pageDescription = page.getDescription();
String pageLink = page.getLink();
String userId = "0";
String userName = "";
this.groupsOutputFile.println(pageId + ",\"" + pageName + "\",\"" + pageDescription + "\"," + pagePrivacy + "," + pageLink + "," + userId + ",\"" + userName + "\"");
} catch (Exception e) {
e.printStackTrace();
}
}
// -----------------------
// Setting up properties
// -----------------------
public void readProperties(String propertiesFile) {
try{
Properties properties = new Properties();
properties.load(new FileInputStream(propertiesFile));
String fbGroups = properties.getProperty("FbGroups");
this.groupList = new String[0];
this.pageList = new String[0];
this.maxPosts = DEFAULT_MAX_POSTS;
//Get the key data to connect
this.appId = properties.getProperty("appId");
this.appSecret = properties.getProperty("appSecret");
//Get the set of fb groups and pages from which collect information
if(fbGroups!= null & !fbGroups.trim().isEmpty()){
this.groupList = fbGroups.split(",");
}
String fbPages = properties.getProperty("FbPages");
if(fbPages!= null & !fbPages.trim().isEmpty()){
this.pageList = fbPages.split(",");
}
String maxPostsNumber = properties.getProperty("maxPosts");
if(maxPostsNumber!= null & !maxPostsNumber.trim().isEmpty()){
this.maxPosts = Integer.parseInt(maxPostsNumber);
}
}catch(Exception e){
e.printStackTrace();
}
}
public void openOutputFiles(){
try{
this.postsOutputFile = new PrintWriter(POSTS);
this.postsOutputFile.println("post_id, group_id, user_id, name, message, created_time, starter");
this.repliesOutputFile = new PrintWriter(REPLIES);
this.repliesOutputFile.println("group_id, original_post_id, comment_id");
this.groupsOutputFile = new PrintWriter(GROUPS);
this.groupsOutputFile.println("group_id, group_name, group_description, group_privacy, group_link, group_owner, group_owner_name");
}catch(Exception e){
e.printStackTrace();
}
}
public void close(){
try{
this.postsOutputFile.close();
this.repliesOutputFile.close();
this.groupsOutputFile.close();
}catch (Exception e){
e.printStackTrace();
}
}
// -----------------------
// Main
// -----------------------
public static void main(String[] args) {
String propertiesFile = new String();
if(args.length < 1){
System.out.println("*** Input required: path to the .properties file [e.g. java -jar fb_data_collector.jar ./fbCollector.properties] ***");
System.out.println();
System.out.println("== Format of the properties file ===");
System.out.println();
System.out.println("appId= 12345");
System.out.println("appSecret= 37263d8a3eac");
System.out.println("FbGroups= 244737125545840,113652365383847");
System.out.println("FbPages= 12367");
System.out.println("maxPosts=100");
System.out.println();
System.out.println("== appId and appSecret are random numbers. To obtain yours go to https://developers.facebook.com/ ===");
}
else{
propertiesFile = args[0];
FacebookDataCollector fbDataCollector = new FacebookDataCollector(propertiesFile);
fbDataCollector.collectData();
fbDataCollector.close();
}
}
}
appId=
appSecret=
FbGroups=244737125545840,113652365383847
FbPages=
maxPosts=100
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>fb_data_collector</groupId>
<artifactId>fb_data_collector</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.restfb</groupId>
<artifactId>restfb</artifactId>
<version>1.6.12</version>
</dependency>
</dependencies>
</project>
@miriamfs
Copy link
Author

Hello ESWC Tutorial (http://tutorials.oeg-upm.net/socialweb/) attendees! :)

We just wanted to provide you a small piece of code so that you can quickly download and analyse Facebook data

For this exercise we provide you with three main files:

(1) FacebookDataCollector.java contains the code that you need to download data from Facebook open groups or Facebook pages.

(2) pom.xml contains the dependencies. If you prefer not to use a maven project, just go to http://restfb.com/#download and download the corresponding library

(3) fbCollector.properties. This is the properties file that you need to set up. You need to add:
- appId and appSecret. This are your facebook app's credentials. To get them you need to go to https://developers.facebook.com/
- FbGroups and FbPages are the lists of groups and pages for which you want to download information. If you have more than one group or one page, please provide them separated by ","
- maxPosts = maximum number of [initial] posts that you want to download for each group/page. Note that for each initial posts all of its corresponding comments will also be included in the download.

This program provides as output three main files:
(1) posts.csv -> contains all the posts and comments that have been downloaded including their text, time, user who generate them and group or page from where they come from.
(2) replies.csv -> contains the reply change, i.e., information of which posts have been generated as comments to other posts
(3) groups.csv -> contains information about the groups from which information have been downloaded

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment