Skip to content

Instantly share code, notes, and snippets.

@tf0054
Last active December 11, 2015 02:09
Show Gist options
  • Save tf0054/4528493 to your computer and use it in GitHub Desktop.
Save tf0054/4528493 to your computer and use it in GitHub Desktop.
Java code for extracting contents from facebook page.
package com.digipepper.test.html;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.io.FileUtils;
// http://jsoup.org/
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/*
* Facebook page
*/
public class GetContentsFromFbPage {
public final static void main(String[] args) throws Exception {
File objRead = new File("/Users/tf0054/test/fblog.txt");
File objWrite = new File("/Users/tf0054/test/fblog.html");
Document doc = Jsoup.parse(objRead, "UTF-8");
Elements timelineUnitContainers = doc.select(".timelineUnitContainer");
int i = 0;
Element objElement = null;
Elements objUiLinkSubtle = null;
Iterator<Element> objIte = timelineUnitContainers.iterator();
while(objIte.hasNext()){
objElement = objIte.next();
objUiLinkSubtle = objElement.select(".uiLinkSubtle");
if(objElement.select(".userContentWrapper").html().length() > 0){ //date(August 12, 2011)
//System.out.print(i+","+objElement.select(".userContentWrapper").html().length());
System.out.print(i);
System.out.print("\t");
System.out.print(objUiLinkSubtle.select("abbr").get(0).html().toString());
System.out.print("\t");
System.out.print(objElement.select(".userContent").html().replaceAll("\n", " "));
System.out.print("\t");
System.out.print(objElement.select(".scaledImageFitWidth").toString().replaceAll("\n", " "));
System.out.print("\t");
System.out.print(objElement.select(".shareUnit img").toString().replaceAll("\n", " "));
System.out.print("\t");
System.out.print(objElement.select(".photoUnit img").toString().replaceAll("\n", " "));
System.out.print("\n");
i++;
}
}
FileUtils.writeStringToFile(objWrite, timelineUnitContainers.toString());
}
private static String getListFromFile(String strFilename){
try {
return FileUtils.readFileToString(new File(strFilename), "UTF-8");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment