Skip to content

Instantly share code, notes, and snippets.

@hbprotoss
Created December 20, 2013 13:32
Show Gist options
  • Save hbprotoss/8054763 to your computer and use it in GitHub Desktop.
Save hbprotoss/8054763 to your computer and use it in GitHub Desktop.
抓取58代码片段
Document doc = Jsoups.getDocByUrl(url);
TPGoods goods = new TPGoods();
goods.setId(Long.valueOf(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("x.shtml"))));
goods.setRealUrl(url);
/* main */
Element main = doc.getElementById("main").getElementsByTag("div").first();
Element title = main.select("h1").first();
goods.setTitle(title.html());
Element time = main.select("li.time").first();
goods.setTime(time.html());
/* summary */
Elements summary = main.select("ul.suUl").first().select("div.su_con");
Element price = summary.get(0).select("span").first();
goods.setPrice(price.html() + "万元");
Element chengse = summary.get(1).select("span").first(); // 成色
goods.setChengse(chengse == null ? "" : chengse.html());
// Element area = summary.get(2).select("a").first();
goods.setArea("");
/* content */
Element element = doc.getElementsByClass("des_con").first();
Elements carInfo = element.getElementsByClass("item");
StringBuilder carSb = new StringBuilder();
for(int i = 0; i < carInfo.size(); i++) {
carSb.append(Htmls.simpleClean(carInfo.get(i).html()));
carSb.append("\n");
}
String descriptionStr = carSb.toString();
Document doc = Jsoups.getDocByUrl(url);
TPGoods goods = new TPGoods();
goods.setId(Long.valueOf(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("x.shtml"))));
goods.setRealUrl(url);
/* main */
Element main = doc.getElementById("main").getElementsByTag("div").first();
Element title = main.select("h1").first();
goods.setTitle(title.html());
// Element time = main.select("li.time").first();
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
goods.setTime(sdf.format(new java.util.Date()));
/* summary */
Elements summary = main.select("ul.suUl").first().select("div.su_con");
Element price = summary.get(0).select("span").first();
goods.setPrice(price.html() + "元/月");
goods.setChengse("");
Element area = summary.get(4).select("div.su_con").first();
goods.setArea(area == null ? "" : Htmls.simpleClean(area.html()).replace("\n ( 地图▪街景 )", ""));
// 部分在summary中的房屋信息加入description中
Elements houseInfo = main.select("ul.suUl").first().select("li");
StringBuilder descBuilder = new StringBuilder();
for(int i = 1; i <= 3; i++) {
descBuilder.append(houseInfo.get(i).html().replace("\n", ""));
descBuilder.append("\n");
}
String descriptionStr = Htmls.simpleClean(descBuilder.toString()) + "\n";
/* content */
Element element = doc.getElementsByClass("cur").get(1);
Document doc = Jsoups.getDocByUrl(url);
TPGoods goods = new TPGoods();
goods.setId(Long.valueOf(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("x.shtml"))));
goods.setRealUrl(url);
/* main */
Element main = doc.getElementById("main").getElementsByTag("div").first();
Element title = main.select("h1").first();
goods.setTitle(title.html());
Element time = main.select("li.time").first();
goods.setTime(time.html());
/* summary */
Elements summary = main.select("ul.suUl").first().select("div.su_con");
Element price = summary.get(0).select("span").first();
goods.setPrice(price.html());
Element chengse = summary.get(1).select("span").first(); // 成色
goods.setChengse(chengse == null ? "" : chengse.html());
Element area = summary.get(2).select("span.c_25d").first();
goods.setArea(area == null ? "" : Htmls.simpleClean(area.html()));
/* content */
Element element = doc.getElementById("sub_1");
// 车辆信息
Element bikeInfo = element.select("div.sub_1_c").first();
String descriptionStr = Htmls.simpleClean(bikeInfo.html().replace("|", "\n")) + "\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment