Skip to content

Instantly share code, notes, and snippets.

@mezcel
Last active December 22, 2017 10:56
Show Gist options
  • Save mezcel/607d9832c7771ce75c24048738f39577 to your computer and use it in GitHub Desktop.
Save mezcel/607d9832c7771ce75c24048738f39577 to your computer and use it in GitHub Desktop.
Example Java webscrape using Jsoup and Eclipse
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ImageGrabber {
public static void main(String args[]) throws IOException {
Document doc;
String url ="http://www.kidzone.ws/lw/frogs/facts.htm";
doc = Jsoup.connect(url).get();
Elements images = doc.getElementsByTag("img");
//get a list of images from desired url
for(Element image: images) {
String l = image.attr("src");
if(l.length()>0) {
if(l.length()<4) {
l = doc.baseUri()+l.substring(1);
}
else if (!l.substring(0, 4).equals("http")) {
l = doc.baseUri()+l.substring(1);
}
}
System.out.println(l);
}
}
}
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class WebsiteParser {
public static void main(String args[]) throws IOException{
Document doc;
String url = "http://google.com";
url ="https://jsoup.org/download";
doc = Jsoup.connect(url).get();
Elements links = doc.getElementsByTag("a");
//get a list of links from desired url
for(Element link: links) {
String l = link.attr("href");
if(l.length()>0) {
if(l.length()<4) {
l = doc.baseUri()+l.substring(1);
}
else if (!l.substring(0, 4).equals("http")) {
l = doc.baseUri()+l.substring(1);
}
}
System.out.println(l);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment