Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import java.io.IOException;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class myscrapy {
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
//fetching web page via HTTP
Document page = Jsoup.connect("https://www.waterless-toilet.com/top-6-best-composting-toilets-to-choose/").get();
//selecting all hyperlinks
Elements pageElements = page.select("a[href]");
ArrayList<String> hyperLinks = new ArrayList<String>();
//iterating and extracting
for (Element e:pageElements) {
hyperLinks.add("Text: " + e.text());
hyperLinks.add("Link: " + e.attr("href"));
}
for (String s : hyperLinks) {
System.out.println(s);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment