Skip to content

Instantly share code, notes, and snippets.

Created February 10, 2016 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anonymous/daa2917b7fba7a94f867 to your computer and use it in GitHub Desktop.
Save anonymous/daa2917b7fba7a94f867 to your computer and use it in GitHub Desktop.
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package so2;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
*
* @author tristan.wiley
*/
public class SO2 {
static BufferedWriter bw;
static File file;
static boolean preparingToFinal = false;
public static void main(String[] args) throws FileNotFoundException, IOException {
file = new File("H:\\My Documents\\temp\\Room15Transcript.txt");
FileOutputStream fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos));
try {
writeStuff("http://chat.stackoverflow.com/transcript/15/2010/10/15", false, false);
} catch (Exception e) {
System.out.println(e.getMessage());
}
bw.close();
}
public static void writeToFile(String msg) throws IOException {
bw.write(msg);
bw.newLine();
}
public static void writeStuff(String url, boolean gotoNext, boolean isFinal) throws IOException {
System.out.println(url);
Document doc = Jsoup.connect(url).get();
Elements posts = doc.getElementsByClass("content");
for (Element e : posts) {
writeToFile(e.text());
}
if(!gotoNext){
if(!doc.getElementsByClass("pager").isEmpty()){
Elements children = doc.getElementsByClass("pager").first().children();
if (!children.isEmpty()) {
if (children.size() > 1) {
String nextUrl = children.get(children.size()-1).attr("abs:href");
writeStuff(nextUrl, true, false);
}
}
}
}
if (!doc.select("a:contains(next day)").isEmpty()) {
String next = doc.select("a:contains(next day)").attr("abs:href");
writeStuff(next, false, false);
} else {
if (!isFinal) {
String next = doc.select("a:contains(day later)").attr("abs:href");
writeStuff(next, false, true);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment