Created
January 12, 2021 15:24
-
-
Save saasindustries/2de842bbf6930c1dd82994da84dafdd6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.ArrayList; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
public class myscrapy { | |
public static void main(String[] args) throws IOException { | |
// TODO Auto-generated method stub | |
//fetching web page via HTTP | |
Document page = Jsoup.connect("https://www.waterless-toilet.com/top-6-best-composting-toilets-to-choose/").get(); | |
//selecting all hyperlinks | |
Elements pageElements = page.select("a[href]"); | |
ArrayList<String> hyperLinks = new ArrayList<String>(); | |
//iterating and extracting | |
for (Element e:pageElements) { | |
hyperLinks.add("Text: " + e.text()); | |
hyperLinks.add("Link: " + e.attr("href")); | |
} | |
for (String s : hyperLinks) { | |
System.out.println(s); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment