Created
March 7, 2012 21:40
-
-
Save omkz/1996417 to your computer and use it in GitHub Desktop.
java scrap htmlunit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.math.BigDecimal; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Comparator; | |
import java.util.Date; | |
import java.util.List; | |
import java.util.Map; | |
import javax.annotation.PostConstruct; | |
import javax.annotation.Resource; | |
import org.apache.commons.lang.StringUtils; | |
import org.joda.time.DateTime; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import org.springframework.stereotype.Service; | |
import com.artivisi.eticket.domain.Flight; | |
import com.artivisi.eticket.service.AirlineService; | |
import com.gargoylesoftware.htmlunit.WebClient; | |
import com.gargoylesoftware.htmlunit.html.HtmlElement; | |
import com.gargoylesoftware.htmlunit.html.HtmlForm; | |
import com.gargoylesoftware.htmlunit.html.HtmlPage; | |
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; | |
import com.gargoylesoftware.htmlunit.util.Cookie; | |
@Service("sriwijayaService") | |
public class SriwijayaService implements AirlineService { | |
private final Logger logger = LoggerFactory.getLogger(this.getClass()); | |
@Resource(name = "airlineServices") | |
private Map<String, String> airlineServices; | |
@PostConstruct | |
public void init() { | |
airlineServices.put("sriwijayaService", "Sriwijaya Airline"); | |
} | |
@SuppressWarnings("unchecked") | |
@Override | |
public List<Flight> search(Date departure, String origin, String destination, Integer adult, Integer child, Integer infant) { | |
try { | |
// inisialisasi HTML Unit | |
WebClient client = new WebClient(); | |
client.setThrowExceptionOnScriptError(false); | |
client.setCssEnabled(false); | |
DateTime expire = new DateTime().plusDays(30); | |
client.getCookieManager().addCookie(new Cookie("sriwijayaair.co.id", "language", "in", "/", expire.toDate(), false)); | |
client.getCookieManager().addCookie(new Cookie("sriwijayaair.co.id", "location", "id", "/", expire.toDate(), false)); | |
client.getCookieManager().addCookie(new Cookie("sriwijayaair.co.id", "dest", "home", "/", expire.toDate(), false)); | |
HtmlPage page = client.getPage("http://www.sriwijayaair.co.id/id"); | |
logger.debug("Title : {}", page.getTitleText()); | |
HtmlForm form = page.getFormByName("form1"); | |
form.getSelectByName("from").setSelectedAttribute(origin, true); | |
form.getSelectByName("to").setSelectedAttribute(destination, true); | |
DateTime depart = new DateTime(departure); | |
form.getSelectByName("departDate1").setSelectedAttribute(depart.getDayOfMonth() + "-", true); | |
form.getSelectByName("departDate2").setSelectedAttribute(depart.getMonthOfYear() + "-" + depart.getYear(), true); | |
form.getSelectByName("adult").setSelectedAttribute(adult.toString(), true); | |
final HtmlSubmitInput button = form.getInputByName("Submit"); | |
final HtmlPage page2 = button.click(); | |
logger.debug("Page 2 : " + page2.getTitleText()); | |
List<HtmlElement> flightCodes = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_CODE); | |
List<HtmlElement> origins = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_FROM); | |
List<HtmlElement> destinations = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_TO); | |
List<HtmlElement> pricePromo = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_PRICE_PROMO); | |
List<HtmlElement> priceEkonomi = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_PRICE_EKONOMI); | |
List<HtmlElement> priceBisnis = (List<HtmlElement>) page2.getByXPath(SriwijayaConstants.XPATH_FLIGHT_PRICE_BISNIS); | |
debugArrays(flightCodes, origins, destinations, pricePromo, priceEkonomi, priceBisnis); | |
validateArraySize(flightCodes, origins, destinations, pricePromo, priceEkonomi, priceBisnis); | |
List<Flight> result = new ArrayList<Flight>(); | |
for (int i = 0; i < flightCodes.size(); i++) { | |
Flight promo = parseFlight(departure, flightCodes, origins, destinations, i, "Promo", pricePromo.get(i).asText()); | |
if (promo.getPrice().compareTo(BigDecimal.ZERO) > 0) { | |
result.add(promo); | |
} | |
Flight ekonomi = parseFlight(departure, flightCodes, origins, destinations, i, "Ekonomi", priceEkonomi.get(i).asText()); | |
if (ekonomi.getPrice().compareTo(BigDecimal.ZERO) > 0) { | |
result.add(ekonomi); | |
} | |
Flight bisnis = parseFlight(departure, flightCodes, origins, destinations, i, "Bisnis", priceBisnis.get(i).asText()); | |
if (bisnis.getPrice().compareTo(BigDecimal.ZERO) > 0) { | |
result.add(bisnis); | |
} | |
} | |
client.closeAllWindows(); | |
Collections.sort(result, new Comparator<Flight>() { | |
@Override | |
public int compare(Flight f1, Flight f2) { | |
return f1.getPrice().compareTo(f2.getPrice()); | |
} | |
}); | |
return result; | |
} catch (Exception e) { | |
logger.error(e.getMessage(), e); | |
} | |
return null; | |
} | |
private Flight parseFlight(Date departure, List<HtmlElement> flightCodes, | |
List<HtmlElement> origins, List<HtmlElement> destinations, int i, String flightClass, String rawHarga) { | |
Flight f = new Flight(); | |
f.setAirline("Sriwijaya Air"); | |
f.setArrival(departure); | |
f.setCode(flightCodes.get(i).asText().trim()); | |
f.setFlightClass(flightClass); | |
String harga = parseHarga(rawHarga); | |
logger.debug("Harga : " + harga); | |
if (harga.length() > 0) { | |
f.setPrice(new BigDecimal(harga)); | |
} else { | |
f.setPrice(BigDecimal.ZERO); | |
} | |
String[] rawOrigin = origins.get(i).asText().trim().split("\n"); | |
debugOriginDestination("Origin", rawOrigin); | |
DateTime departureTime = parseTime(departure, rawOrigin); | |
f.setDeparture(departureTime.toDate()); | |
f.setOrigin(rawOrigin[1].trim()); | |
String[] rawDestination = destinations.get(i).asText().trim().split("\n"); | |
debugOriginDestination("Destination", rawDestination); | |
DateTime arrivalTime = parseTime(departure, rawDestination); | |
f.setArrival(arrivalTime.toDate()); | |
f.setDestination(rawDestination[1].trim()); | |
return f; | |
} | |
private void debugOriginDestination(String label, String[] rawOrigin) { | |
if (logger.isDebugEnabled()) { | |
logger.debug("Split {} : {}", new Object[]{label, rawOrigin.length}); | |
logger.debug("Split {} 1 : {}", new Object[]{label, rawOrigin[0]}); | |
logger.debug("Split {} 2 : {}", new Object[]{label, rawOrigin[1]}); | |
} | |
} | |
private DateTime parseTime(Date departure, String[] rawString) { | |
logger.debug("Departure : {} , RawString[0] : {}", new Object[]{departure, rawString[0]}); | |
String[] rawTimeDepart = rawString[0].split(":"); | |
logger.debug("Hour : {}, Minute {}", new Object[]{rawTimeDepart[0], rawTimeDepart[1]}); | |
DateTime departureTime = new DateTime(departure) | |
.withHourOfDay(Integer.valueOf(rawTimeDepart[0].trim())) | |
.withMinuteOfHour(Integer.valueOf(rawTimeDepart[1].trim())); | |
return departureTime; | |
} | |
private void debugArrays(List<HtmlElement> flightCodes, | |
List<HtmlElement> origins, List<HtmlElement> destinations, | |
List<HtmlElement> pricePromo, List<HtmlElement> priceEkonomi, | |
List<HtmlElement> priceBisnis) { | |
if (logger.isDebugEnabled()) { | |
logger.debug("Flights : " + flightCodes.size()); | |
logger.debug("Origins : " + origins.size()); | |
logger.debug("Destinations : " + destinations.size()); | |
logger.debug("Price Promo : " + pricePromo.size()); | |
logger.debug("Price Ekonomi : " + priceEkonomi.size()); | |
logger.debug("Price Bisnis : " + priceBisnis.size()); | |
} | |
} | |
private String parseHarga(String rawHarga) { | |
String harga = rawHarga.trim(); | |
harga = StringUtils.remove(harga, "HABIS"); | |
harga = StringUtils.remove(harga, "unchecked"); | |
harga = StringUtils.remove(harga, "N/A"); | |
harga = StringUtils.remove(harga, "."); | |
harga = harga.trim(); | |
return harga; | |
} | |
private void validateArraySize(List<HtmlElement> flightCodes, | |
List<HtmlElement> origins, List<HtmlElement> destinations, | |
List<HtmlElement> pricePromo, List<HtmlElement> priceEkonomi, | |
List<HtmlElement> priceBisnis) { | |
if (flightCodes.size() != origins.size()) { | |
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Origins : " + origins.size()); | |
} | |
if (flightCodes.size() != destinations.size()) { | |
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Destinations : " + destinations.size()); | |
} | |
if (flightCodes.size() != pricePromo.size()) { | |
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Price Promo : " + pricePromo.size()); | |
} | |
if (flightCodes.size() != priceEkonomi.size()) { | |
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Price Ekonomi : " + priceEkonomi.size()); | |
} | |
if (flightCodes.size() != priceBisnis.size()) { | |
throw new IllegalStateException("Array size mismatch, Flight Code " + flightCodes.size() + " Price Bisnis : " + priceBisnis.size()); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment