Skip to content

Instantly share code, notes, and snippets.

@wololock
Created November 1, 2014 09:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wololock/b0e31cb174123d463e3e to your computer and use it in GitHub Desktop.
Save wololock/b0e31cb174123d463e3e to your computer and use it in GitHub Desktop.
Extracting data from nested tables example
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.HashMap;
import java.util.Map;
public class JsoupTableExample {
public static void main(String[] args) {
String html = "<table id=\"tableMain\">\n" +
" <tr>\n" +
" <td class=\"location\">Location A text</td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td>\n" +
" <table id=\"titleList\">\n" +
" <tr>\n" +
" <td class=\"title\">Title A.A</td>\n" +
" <td class=\"date\">date</td>\n" +
" <td class=\"time\">time</td>\n" +
" </tr>\n" +
" </table>\n" +
" <table id=\"titleList\">\n" +
" <tr>\n" +
" <td class=\"title\">Title A.B</td>\n" +
" <td class=\"date\">date</td>\n" +
" <td class=\"time\">time</td>\n" +
" </tr>\n" +
" </table>\n" +
" <table id=\"titleList\">\n" +
" <tr>\n" +
" <td class=\"title\">Title A.C</td>\n" +
" <td class=\"date\">date</td>\n" +
" <td class=\"time\">time</td>\n" +
" </tr>\n" +
" </table>\n" +
" </td>\n" +
" </tr>\n" +
"\n" +
" <tr>\n" +
" <td class=\"location\">Location B text</td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td>\n" +
" <table id=\"titleList\">\n" +
" <tr>\n" +
" <td class=\"title\">Title B.A</td>\n" +
" <td class=\"date\">date</td>\n" +
" <td class=\"time\">time</td>\n" +
" </tr>\n" +
" </table>\n" +
" <table id=\"titleList\">\n" +
" <tr>\n" +
" <td class=\"title\">Title B.B</td>\n" +
" <td class=\"date\">date</td>\n" +
" <td class=\"time\">time</td>\n" +
" </tr>\n" +
" </table>\n" +
" <table id=\"titleList\">\n" +
" <tr>\n" +
" <td class=\"title\">Title B.C</td>\n" +
" <td class=\"date\">date</td>\n" +
" <td class=\"time\">time</td>\n" +
" </tr>\n" +
" </table>\n" +
" </td>\n" +
" </tr>\n" +
"</table>";
Document document = Jsoup.parse(html);
Elements elements = document.select("#tableMain tr:has(td.location) + tr");
for (Element element : elements) {
String location = element.previousElementSibling().select("td.location").text();
System.out.printf("Current location: '%s'%n", location);
Elements titleLists = element.select("#titleList > tbody > tr");
for (Element tr : titleLists) {
String title = tr.select("td.title").text();
String date = tr.select("td.date").text();
String time = tr.select("td.time").text();
System.out.printf("Title: %s, Date: %s, Time: %s%n", title, date, time);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment