Skip to content

Instantly share code, notes, and snippets.

@loopiezlol
Created May 17, 2015 16:16
Show Gist options
  • Save loopiezlol/d8e2923d2ceb6a314858 to your computer and use it in GitHub Desktop.
Save loopiezlol/d8e2923d2ceb6a314858 to your computer and use it in GitHub Desktop.
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import java.io.IOException;
public class Main {
public static void main(String[] args) throws IOException {
Document doc3 = Jsoup.connect("http://www.sfatulmedicului.ro/medicamente").get();
Element letters = doc3.select("div.container-insiruire").first();
Elements letters_links = letters.select("li a");
for(Element element: letters_links){
String s= element.attr("abs:href");
if(!(s.equals("http://www.sfatulmedicului.ro/medicamente_j") ||
s.equals("http://www.sfatulmedicului.ro/medicamente_w") ||
s.equals("http://www.sfatulmedicului.ro/medicamente_y") ||
s.equals("http://www.sfatulmedicului.ro/medicamente_q")))
{
int nr = getLastPage(s);
for (int i =1 ; i<=nr;i++){
String page = s+ "_pagina_" + String.valueOf(i);
Document doc1 = Jsoup.connect(page).get();
Element list_container = doc1.select("div.listare_index").first();
list_container.select("div.paginatie").remove();
Elements links = list_container.select("li a");
for(Element element1 : links){
getInfo(element1.attr("abs:href"));
}
}
}
else {
String page = s;
Document doc1 = Jsoup.connect(page).get();
Element list_container = doc1.select("div.listare_index").first();
list_container.select("div.paginatie").remove();
Elements links = list_container.select("li a");
for(Element element1 : links){
getInfo(element1.attr("abs:href"));
}
}
}
}
private static int getLastPage(String page) throws IOException {
Document doc2 = Jsoup.connect(page).get();
Element pags = doc2.select("div.listare_index").first().select("div.paginatie").select("a").last();
String mylink = pags.attr("abs:href");
String number = new StringBuilder(mylink).reverse().toString();
number = number.substring(0, number.indexOf("_"));
number = new StringBuilder(number).reverse().toString();
return Integer.parseInt(number);
}
private static void getInfo(String last_link) throws IOException {
Document doc = Jsoup.connect(last_link).get();
Element body = doc.body();
Element div1 = body.select("div#wrapper.wrapper").first();
Element div2 = div1.select("div#container.container").first();
Element content = div2.select("div.content").first();
Elements articol = content.select("div.col-alpha");
Element articol2 =articol.select("div.content-articol").first();
articol2.select("div.rating_medicament").first().remove();
articol2.select("a").remove();
removeComments(articol2);
String titlu = doc.title();
//TITLU E NUMELE MEDICAMENTULUI
System.out.println(titlu);
String prospect = articol2.text();
//PROSPECT E STRINGUL HTML FORMATTED
System.out.println(prospect);
}
private static void removeComments(Node node) {
for (int i = 0; i < node.childNodes().size();) {
Node child = node.childNode(i);
if (child.nodeName().equals("#comment")) child.remove();
else {
removeComments(child);
i++;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment