Skip to content

Instantly share code, notes, and snippets.

@omkz
Created March 7, 2012 21:40
Show Gist options
  • Save omkz/1996417 to your computer and use it in GitHub Desktop.
Save omkz/1996417 to your computer and use it in GitHub Desktop.
java scrap htmlunit
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.Map;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import org.apache.commons.lang.StringUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import com.artivisi.eticket.domain.Flight;
import com.artivisi.eticket.service.AirlineService;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
import com.gargoylesoftware.htmlunit.util.Cookie;
@Service("sriwijayaService")
public class SriwijayaService implements AirlineService {
private final Logger logger = LoggerFactory.getLogger(this.getClass());
@Resource(name = "airlineServices")
private Map<String, String> airlineServices;
@PostConstruct
public void init() {
airlineServices.put("sriwijayaService", "Sriwijaya Airline");
}
@SuppressWarnings("unchecked")
@Override
public List<Flight> search(Date departure, String origin, String destination, Integer adult, Integer child, Integer infant) {
try {
// inisialisasi HTML Unit
WebClient client = new WebClient();
client.setThrowExceptionOnScriptError(false);
client.setCssEnabled(false);
DateTime expire = new DateTime().plusDays(30);
client.getCookieManager().addCookie(new Cookie("sriwijayaair.co.id", "language", "in", "/", expire.toDate(), false));
client.getCookieManager().addCookie(new Cookie("sriwijayaair.co.id", "location", "id", "/", expire.toDate(), false));
client.getCookieManager().addCookie(new Cookie("sriwijayaair.co.id", "dest", "home", "/", expire.toDate(), false));
HtmlPage page = client.getPage("http://www.sriwijayaair.co.id/id");
logger.debug("Title : {}", page.getTitleText());
HtmlForm form = page.getFormByName("form1");
form.getSelectByName("from").setSelectedAttribute(origin, true);
form.getSelectByName("to").setSelectedAttribute(destination, true);
DateTime depart = new DateTime(departure);
form.getSelectByName("departDate1").setSelectedAttribute(depart.getDayOfMonth() + "-", true);
form.getSelectByName("departDate2").setSelectedAttribute(depart.getMonthOfYear() + "-" + depart.getYear(), true);
form.getSelectByName("adult").setSelectedAttribute(adult.toString(), true);
final HtmlSubmitInput button = form.getInputByName("Submit");
final HtmlPage page2 = button.click();
logger.debug("Page 2 : " + page2.getTitleText());
List<HtmlElement> flightCodes = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_CODE);
List<HtmlElement> origins = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_FROM);
List<HtmlElement> destinations = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_TO);
List<HtmlElement> pricePromo = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_PRICE_PROMO);
List<HtmlElement> priceEkonomi = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_PRICE_EKONOMI);
List<HtmlElement> priceBisnis = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_PRICE_BISNIS);
debugArrays(flightCodes, origins, destinations, pricePromo, priceEkonomi, priceBisnis);
validateArraySize(flightCodes, origins, destinations, pricePromo, priceEkonomi, priceBisnis);
List<Flight> result = new ArrayList<Flight>();
for (int i = 0; i < flightCodes.size(); i++) {
Flight promo = parseFlight(departure, flightCodes, origins, destinations, i, "Promo", pricePromo.get(i).asText());
if (promo.getPrice().compareTo(BigDecimal.ZERO) > 0) {
result.add(promo);
}
Flight ekonomi = parseFlight(departure, flightCodes, origins, destinations, i, "Ekonomi", priceEkonomi.get(i).asText());
if (ekonomi.getPrice().compareTo(BigDecimal.ZERO) > 0) {
result.add(ekonomi);
}
Flight bisnis = parseFlight(departure, flightCodes, origins, destinations, i, "Bisnis", priceBisnis.get(i).asText());
if (bisnis.getPrice().compareTo(BigDecimal.ZERO) > 0) {
result.add(bisnis);
}
}
client.closeAllWindows();
Collections.sort(result, new Comparator<Flight>() {
@Override
public int compare(Flight f1, Flight f2) {
return f1.getPrice().compareTo(f2.getPrice());
}
});
return result;
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return null;
}
private Flight parseFlight(Date departure, List<HtmlElement> flightCodes,
List<HtmlElement> origins, List<HtmlElement> destinations, int i, String flightClass, String rawHarga) {
Flight f = new Flight();
f.setAirline("Sriwijaya Air");
f.setArrival(departure);
f.setCode(flightCodes.get(i).asText().trim());
f.setFlightClass(flightClass);
String harga = parseHarga(rawHarga);
logger.debug("Harga : " + harga);
if (harga.length() > 0) {
f.setPrice(new BigDecimal(harga));
} else {
f.setPrice(BigDecimal.ZERO);
}
String[] rawOrigin = origins.get(i).asText().trim().split("\n");
debugOriginDestination("Origin", rawOrigin);
DateTime departureTime = parseTime(departure, rawOrigin);
f.setDeparture(departureTime.toDate());
f.setOrigin(rawOrigin[1].trim());
String[] rawDestination = destinations.get(i).asText().trim().split("\n");
debugOriginDestination("Destination", rawDestination);
DateTime arrivalTime = parseTime(departure, rawDestination);
f.setArrival(arrivalTime.toDate());
f.setDestination(rawDestination[1].trim());
return f;
}
private void debugOriginDestination(String label, String[] rawOrigin) {
if (logger.isDebugEnabled()) {
logger.debug("Split {} : {}", new Object[]{label, rawOrigin.length});
logger.debug("Split {} 1 : {}", new Object[]{label, rawOrigin[0]});
logger.debug("Split {} 2 : {}", new Object[]{label, rawOrigin[1]});
}
}
private DateTime parseTime(Date departure, String[] rawString) {
logger.debug("Departure : {} , RawString[0] : {}", new Object[]{departure, rawString[0]});
String[] rawTimeDepart = rawString[0].split(":");
logger.debug("Hour : {}, Minute {}", new Object[]{rawTimeDepart[0], rawTimeDepart[1]});
DateTime departureTime = new DateTime(departure)
.withHourOfDay(Integer.valueOf(rawTimeDepart[0].trim()))
.withMinuteOfHour(Integer.valueOf(rawTimeDepart[1].trim()));
return departureTime;
}
private void debugArrays(List<HtmlElement> flightCodes,
List<HtmlElement> origins, List<HtmlElement> destinations,
List<HtmlElement> pricePromo, List<HtmlElement> priceEkonomi,
List<HtmlElement> priceBisnis) {
if (logger.isDebugEnabled()) {
logger.debug("Flights : " + flightCodes.size());
logger.debug("Origins : " + origins.size());
logger.debug("Destinations : " + destinations.size());
logger.debug("Price Promo : " + pricePromo.size());
logger.debug("Price Ekonomi : " + priceEkonomi.size());
logger.debug("Price Bisnis : " + priceBisnis.size());
}
}
private String parseHarga(String rawHarga) {
String harga = rawHarga.trim();
harga = StringUtils.remove(harga, "HABIS");
harga = StringUtils.remove(harga, "unchecked");
harga = StringUtils.remove(harga, "N/A");
harga = StringUtils.remove(harga, ".");
harga = harga.trim();
return harga;
}
private void validateArraySize(List<HtmlElement> flightCodes,
List<HtmlElement> origins, List<HtmlElement> destinations,
List<HtmlElement> pricePromo, List<HtmlElement> priceEkonomi,
List<HtmlElement> priceBisnis) {
if (flightCodes.size() != origins.size()) {
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Origins : " + origins.size());
}
if (flightCodes.size() != destinations.size()) {
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Destinations : " + destinations.size());
}
if (flightCodes.size() != pricePromo.size()) {
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Price Promo : " + pricePromo.size());
}
if (flightCodes.size() != priceEkonomi.size()) {
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Price Ekonomi : " + priceEkonomi.size());
}
if (flightCodes.size() != priceBisnis.size()) {
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Price Bisnis : " + priceBisnis.size());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment