Skip to content

Instantly share code, notes, and snippets.

@QQism
Created September 10, 2012 00:05
Show Gist options
  • Save QQism/3688026 to your computer and use it in GitHub Desktop.
Save QQism/3688026 to your computer and use it in GitHub Desktop.
vietjet air flights crawler
<!DOCTYPE html>
<html>
<head>
<title>Vietjet Air Crawlee</title>
<script type='text/javascript' src='https://ajax.googleapis.com/ajax/libs/jquery/1.8.1/jquery.min.js'></script>
<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.1.1/css/bootstrap-combined.min.css" rel="stylesheet">
</head>
<body>
<script>
window.onload = function() {
var url = 'https://ameliaweb5.intelisys.ca/VietJet/ViewFlights.aspx';
resp = '';
$('#scrape-btn').click(function() {
$('#dummy-status').text('scraping...');
$.ajax({
url: url,
crossDomain: true,
success: function(data) {
$('#dummy-status').text('OK');
console.log('Get flight page');
resp = $(data).find('form')[0];
$('#dummy1').html(resp);
show_input();
fill_data();
}
});
});
$('#post-btn').click(function() {
data = {};
$('#dummy1 input').each(function(index, e) { data[e.name] = e.value; });
$('#dummy1 select').each(function(index, e) { data[e.name] = e.value; });
$('#dummy-status').text('scraping...');
$.ajax({
type: 'POST',
url: url,
data: data,
crossDomain: true,
success: function(data) {
$('#dummy-status').text('OK');
console.log('Get flight page');
resp = $(data).find('form')[0];
$('#dummy2').html(resp);
export_data();
}
});
});
var fill_data = function() {
console.log('Filling info');
$('#dummy1 #lstOrigAP').val('SGN');
$('#dummy1 #lstDestAP').val('HAN');
$('#dummy1 #departure1').val('10/09/2012');
$('#dummy1 #departure2').val('10/09/2012');
$('#dummy1 #dlstDepDate_Month').val('2012/09');
$('#dummy1 #dlstDepDate_Day').val('10');
$('#dummy1 #dlstRetDate_Month').val('2012/09');
$('#dummy1 #dlstRetDate_Day').val('10');
$('#dummy1 #lstCurrency').val('VND');
$('#dummy1 #button').val('vfto');
};
var export_data = function() {
flights = [];
$('#dummy2 .FlightsGrid').each(function(index, table) {
$(table).children('tbody').children('tr [class^=gridFlight]').each(function(index, row) {
var flight = {};
var tds = $(row).children('td');
// Flight info
var info = $(tds[0]).find('table > tbody > tr > td');
var depart = $(info[0]).html().split('<br>');
flight['startTime'] = depart[0];
flight['startPlace'] = depart[1];
var arrive = $(info[1]).html().split('<br>');
flight['endTime'] = arrive[0];
flight['endPlace'] = arrive[1];
flight['detail'] = $(info[2]).text().trim();
// avalbility, pricing
var pricing = $(tds[1]).find('table > tbody > tr > td');
if (pricing.length === 3) {
flight['promo'] = $(pricing[0]).text();
flight['eco'] = $(pricing[1]).text().trim();
flight['flexi'] = $(pricing[2]).text().trim();
} else {
// sold out
flight['error'] = pricing.text();
}
flights.push(flight);
});
});
var xml_result = flights_to_xml(flights);
$('#dummy-result').text(xml_result);
};
};
var flights_to_xml = function(data) {
xml = [];
xml.push('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>');
xml.push('<flights>');
data.forEach(function(flight, index) {
xml.push('<flight>');
for (var prop in flight) {
xml.push('<' + prop + '>');
xml.push(unescape(flight[prop]));
xml.push('</' + prop + '>');
}
xml.push('</flight>');
})
xml.push('</flights>');
result = xml.join('');
return result;
};
var show_input = function() {
var startTime = $('#dummy1 #departure1').clone();
startTime.removeAttr('onclick readonly onblur onkeydown onfocus onchange');
startTime.change(function() {
date = $(this).val().split(/[\/|-]/); // DD/MM/YYYY
$('#dummy1 #departure1').val(date.join('/'));
$('#dummy1 #departure2').val(date.join('/'));
$('#dummy1 #dlstDepDate_Month').val(date[2] + '/' + date[1]);
$('#dummy1 #dlstDepDate_Day').val(date[0]);
$('#dummy1 #dlstRetDate_Month').val(date[2] + '/' + date[1]);
$('#dummy1 #dlstRetDate_Day').val(date[0]);
})
var startPlace = $('#dummy1 #lstOrigAP').clone();
startPlace.removeAttr('onclick onchange onblur onfocus');
startPlace.change(function() {
$('#dummy1 #lstOrigAP').val($(this).val());
});
var endPlace = $('#dummy1 #lstDestAP').clone();
endPlace.removeAttr('onclick onchange onblur onfocus');
endPlace.change(function() {
$('#dummy1 #lstDestAP').val($(this).val());
});
$('#prepare').append('<br/><span>Start Time </span>');
$('#prepare').append(startTime);
$('#prepare').append('<br/><span>Start </span>');
$('#prepare').append(startPlace);
$('#prepare').append('<br/><span>End </span>');
$('#prepare').append(endPlace);
}
</script>
<h2>This is the micro crawler that scrapes Vietjet air flights</h2><br>
<b>On Mac, it work with only Safari (Chrome and Firefox both throw error due to security policy). Didn't test on Linux and Windows yet</b>
<h3>Steps</h3>
<ul>
<li>Click <b>Start</b> to prepare</li>
<li>Input the time (e.g 12/09/2012) and select the departure and destination</li>
<li>Click <b>Get flights</b> to retrieve xml flight data</li>
</ul>
<div id='prepare'>
<button class='btn' id='scrape-btn'>Start</button>
</div><br/>
<button class='btn' id='post-btn'>Get flights</button><br/>
<!-- status footer -->
<div style="position: fixed; bottom: 0px; right: 0; left: 0; background: grey; color: white">
<span id='dummy-status'></span>
<span class='pull-right'>Quang Quach</span>
</div>
<!-- this part is for storing scraping content-->
<div id='dummy1' style='display: none'></div>
<div id='dummy2' style='display: none'></div>
<br/>
<div id='dummy3'>
<h4>Result</h4>
<code id='dummy-result'></code>
</div>
<br/><br/><br/>
</body>
</html>
@carotsay
Copy link

Hi Mr.QuangQuach, I'd like to see more demo of your crawlers, please show more and more. I am really interesting and apreciate your work. thank you very much.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment