Skip to content

Instantly share code, notes, and snippets.

@hiiamyes
Created September 13, 2016 17:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hiiamyes/52b186f839baaa95871264966adaefef to your computer and use it in GitHub Desktop.
Save hiiamyes/52b186f839baaa95871264966adaefef to your computer and use it in GitHub Desktop.
bed crawler
let moment = require('moment');
let cheerio = require('cheerio');
let MongoClient = require('mongodb').MongoClient;
let co = require('co');
var rp = require('request-promise');
let dbUrl = 'mongodb://localhost:27017/tw-huts-dev';
const crawl = ({name, url, room}) => {
co(function* () {
// init
const dataInit = yield rp({
url,
transform: body => cheerio.load(body)
});
// this month
const dataThisMonth = yield rp.post({
url,
form: {
__EVENTVALIDATION: dataInit('#__EVENTVALIDATION').val(),
__VIEWSTATE: dataInit('#__VIEWSTATE').val(),
ctl00$ContentPlaceHolder1$rooms: room,
ctl00$ScriptManager1: 'ctl00$ScriptManager1|ctl00$ContentPlaceHolder1$btnsearch',
ctl00$ContentPlaceHolder1$btnsearch: '查詢'
},
transform: body => cheerio.load(body)
});
// next month
const dataNextMonth = yield rp.post({
url,
form: {
__EVENTVALIDATION: dataThisMonth('#__EVENTVALIDATION').val(),
__VIEWSTATE: dataThisMonth('#__VIEWSTATE').val(),
ctl00$ContentPlaceHolder1$ddlMonth: moment().month() + 2,
ctl00$ContentPlaceHolder1$ddlYear: moment().year(),
ctl00$ContentPlaceHolder1$hidMonth: moment().month() + 2,
ctl00$ContentPlaceHolder1$hidYear: moment().year(),
ctl00$ContentPlaceHolder1$rooms: room,
ctl00$ScriptManager1: 'ctl00$ScriptManager1|ctl00$ContentPlaceHolder1$btnsearch',
ctl00$ContentPlaceHolder1$btnsearch: '查詢'
},
transform: body => {
return cheerio.load(body)
}
});
let capacityStatus = parser(dataThisMonth).concat(parser(dataNextMonth));
let db = yield MongoClient.connect(dbUrl);
let col = db.collection('huts')
yield col.updateOne({name}, {$set: {capacityStatus}})
db.close();
}).catch(err => {
console.log('err: ', err);
})
}
const parser = ($) => {
let capacityStatus = [];
for (var i = 1; i <= 42; i++) {
const indexString = `0${i}`.substr(-2);
const date = $(`#ContentPlaceHolder1_cc_${indexString} a`)
if (date.length) {
const [year, month, day] = date.attr('href').split('sdate=')[1].split('-');
const remaining = parseInt($(`#ContentPlaceHolder1_cc_${indexString} a span:nth-of-type(1)`).text());
const waiting = parseInt($(`#ContentPlaceHolder1_cc_${indexString} a span:nth-of-type(2)`).text());
const applying = parseInt($(`#ContentPlaceHolder1_cc_${indexString} a span:nth-of-type(3)`).text());
capacityStatus.push({
date: moment.utc(`${parseInt(year) + 1911}-${month}-${day}`).format(),
remaining,
applying: waiting + applying,
isDrawn: true,
});
}
}
return capacityStatus;
}
crawl({name: '新達山屋', url:'https://npm.cpami.gov.tw/bed_1.aspx', room:63})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment