Created
February 10, 2016 19:33
-
-
Save anonymous/daa2917b7fba7a94f867 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* To change this license header, choose License Headers in Project Properties. | |
* To change this template file, choose Tools | Templates | |
* and open the template in the editor. | |
*/ | |
package so2; | |
import java.io.BufferedWriter; | |
import java.io.File; | |
import java.io.FileNotFoundException; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.OutputStreamWriter; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
/** | |
* | |
* @author tristan.wiley | |
*/ | |
public class SO2 { | |
static BufferedWriter bw; | |
static File file; | |
static boolean preparingToFinal = false; | |
public static void main(String[] args) throws FileNotFoundException, IOException { | |
file = new File("H:\\My Documents\\temp\\Room15Transcript.txt"); | |
FileOutputStream fos = new FileOutputStream(file); | |
bw = new BufferedWriter(new OutputStreamWriter(fos)); | |
try { | |
writeStuff("http://chat.stackoverflow.com/transcript/15/2010/10/15", false, false); | |
} catch (Exception e) { | |
System.out.println(e.getMessage()); | |
} | |
bw.close(); | |
} | |
public static void writeToFile(String msg) throws IOException { | |
bw.write(msg); | |
bw.newLine(); | |
} | |
public static void writeStuff(String url, boolean gotoNext, boolean isFinal) throws IOException { | |
System.out.println(url); | |
Document doc = Jsoup.connect(url).get(); | |
Elements posts = doc.getElementsByClass("content"); | |
for (Element e : posts) { | |
writeToFile(e.text()); | |
} | |
if(!gotoNext){ | |
if(!doc.getElementsByClass("pager").isEmpty()){ | |
Elements children = doc.getElementsByClass("pager").first().children(); | |
if (!children.isEmpty()) { | |
if (children.size() > 1) { | |
String nextUrl = children.get(children.size()-1).attr("abs:href"); | |
writeStuff(nextUrl, true, false); | |
} | |
} | |
} | |
} | |
if (!doc.select("a:contains(next day)").isEmpty()) { | |
String next = doc.select("a:contains(next day)").attr("abs:href"); | |
writeStuff(next, false, false); | |
} else { | |
if (!isFinal) { | |
String next = doc.select("a:contains(day later)").attr("abs:href"); | |
writeStuff(next, false, true); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment