Skip to content

Instantly share code, notes, and snippets.

@yhsiang
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yhsiang/be11bd02eb0559cfd544 to your computer and use it in GitHub Desktop.
Save yhsiang/be11bd02eb0559cfd544 to your computer and use it in GitHub Desktop.
require! <[request cheerio fs async prelude-ls]>
_ = prelude-ls
jar = request.jar!
request = request.defaults jar: jar
base-form =
'__EVENTTARGET':''
'__EVENTARGUMENT':''
'__VIEWSTATE': ''
'__EVENTVALIDATION': ''
'__VIEWSTATE_ID': ''
'WR1_1$btnNext': '下一步>'
form-step1 =
'WR1_1_Q_DataII': '1'
'WR1_1_Q_PointII': 'D'
'WR1_1_Q_SexII': '3'
'WR1_1$ctl138': 'on'
'WR1_1$ctl143':'on'
'WR1_1$ctl148':'on'
'WR1_1$ctl153':'on'
'WR1_1$ctl158':'on'
'WR1_1$ctl163':'on'
'WR1_1$ctl168':'on'
'WR1_1$ctl173':'on'
'WR1_1$ctl178':'on'
'WR1_1$ctl183':'on'
'WR1_1$ctl188':'on'
'WR1_1$ctl193':'on'
'WR1_1$ctl198':'on'
'WR1_1$ctl203':'on'
'WR1_1$ctl208':'on'
'WR1_1$ctl213':'on'
'WR1_1$ctl218':'on'
'WR1_1$ctl223':'on'
'WR1_1$ctl228':'on'
'WR1_1$ctl233':'on'
'WR1_1$ctl238':'on'
form-step2 =
'WR1_1_Q_YearBeginII': '1979'
'WR1_1_Q_YearEndII': '2010'
form-step3 =
'WR1_1$Q_AgeKind': 'Q_AgeKindAll'
'WR1_1_Q_AgeFromII': ''
'WR1_1_Q_AgeToII': ''
form-step4 =
'WR1_1_QP_AreaRegionII':'ROOT'
'WR1_1$Q_AreaGrp':'Q_AreaTown'
#'WR1_1_ctl10_0':'' #CRA_3102'
form-step5 =
'WR1_1_Q_ReportKindII':'T'
'WR1_1_Q_ChartKindII':'L'
'WR1_1$cmdQuery':'產生報表'
fetch = (options, next) ->
url = options.url || 'https://cris.hpa.gov.tw/pagepub/Home.aspx?itemNo=cr.q.10'
method = options.method || 'POST'
(error, res, body) <- request do
url: url
method: method
headers:
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131'
'Referer': 'https://cris.hpa.gov.tw/pagepub/Home.aspx?itemNo=cr.q.10'
strictSSL: false
form: options.form
$ = cheerio.load body
event-validation = $('#__EVENTVALIDATION').val!
viewstate-id = $('#__VIEWSTATE_ID').val!
base-form.__EVENTVALIDATION = event-validation
base-form.__VIEWSTATE_ID = viewstate-id
next error, res, body, form: base-form
export-file = (area-code) ->
(err, res, body, it)<- fetch do
url: 'https://cris.hpa.gov.tw/pagepub/Home.aspx?itemNo=cr.q.10'
method: 'GET'
form = {}
form <<<< it.form <<<< form-step1
(err, res, body, it)<- fetch do
form: form
form = {}
form <<<< it.form <<<< form-step2
(err, res, body, it)<- fetch do
form: form
form = {}
form <<<< it.form <<<< form-step3
(err, res, body, it)<- fetch do
form: form
form = {}
form-step4 <<<< area-code
form <<<< it.form <<<< form-step4
(err, res, body, it)<- fetch do
form: form
form = {}
form <<<< it.form <<<< form-step5
(err, res, body, it)<- fetch do
form: form
#TODO error handle here
#console.log res.headers.location
report-url = 'https://cris.hpa.gov.tw' + res.headers.location
(err, res, body, it)<- fetch do
url: report-url
method: 'GET'
$ = cheerio.load body
export-url = 'https://cris.hpa.gov.tw' + $('iframe').attr('src')
(err, res, body, it)<- fetch do
url: export-url
method: 'GET'
$ = cheerio.load body
paths = $('#report').attr('src') / '&'
xls-url = 'https://cris.hpa.gov.tw' + paths.0 + '&' + paths.1 + '&' + paths.2 + '&' + paths.3 + '&' + paths.4 + '&' + paths.5
xls-url += '&OpType=Export&FileName=CrReportN_A01_C&ContentDisposition=AlwaysAttachment&Format=Excel'
file-name = _.values area-code
console.log "Export to data/#{file-name.0}.xls"
file = fs.createWriteStream "data/#{file-name.0}.xls"
request do
url: xls-url
headers:
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131'
strictSSL: false
.pipe file
lists =
* WR1_1_ctl95_0':'CRA_0201'
* WR1_1_ctl95_1':'CRA_0202'
* WR1_1_ctl95_2':'CRA_0203'
* WR1_1_ctl95_3':'CRA_0204'
* WR1_1_ctl95_4':'CRA_0205'
* WR1_1_ctl95_5':'CRA_0206'
* WR1_1_ctl95_6':'CRA_0207'
* WR1_1_ctl95_7':'CRA_0208'
* WR1_1_ctl95_8':'CRA_0209'
* WR1_1_ctl95_9':'CRA_0210'
* WR1_1_ctl95_10':'CRA_0211'
* WR1_1_ctl100_0':'CRA_4301'
* WR1_1_ctl100_1':'CRA_4302'
* WR1_1_ctl100_2':'CRA_4303'
* WR1_1_ctl100_3':'CRA_4304'
* WR1_1_ctl100_4':'CRA_4305'
* WR1_1_ctl100_5':'CRA_4306'
* WR1_1_ctl100_6':'CRA_4307'
* WR1_1_ctl100_7':'CRA_4308'
* WR1_1_ctl100_8':'CRA_4309'
* WR1_1_ctl100_9':'CRA_4310'
* WR1_1_ctl100_10':'CRA_4311'
* WR1_1_ctl100_11':'CRA_4312'
* WR1_1_ctl100_12':'CRA_4313'
* WR1_1_ctl100_13':'CRA_4314'
* WR1_1_ctl100_14':'CRA_4315'
* WR1_1_ctl100_15':'CRA_4316'
* WR1_1_ctl100_16':'CRA_4317'
* WR1_1_ctl100_17':'CRA_4318'
* WR1_1_ctl100_18':'CRA_4319'
* WR1_1_ctl100_19':'CRA_4320'
* WR1_1_ctl100_20':'CRA_4321'
* WR1_1_ctl100_21':'CRA_4322'
* WR1_1_ctl100_22':'CRA_4323'
* WR1_1_ctl100_23':'CRA_4324'
* WR1_1_ctl100_24':'CRA_4325'
* WR1_1_ctl100_25':'CRA_4326'
* WR1_1_ctl100_26':'CRA_4327'
* WR1_1_ctl100_27':'CRA_4328'
* WR1_1_ctl100_28':'CRA_4329'
* WR1_1_ctl100_29':'CRA_4330'
* WR1_1_ctl100_30':'CRA_4331'
* WR1_1_ctl100_31':'CRA_4332'
* WR1_1_ctl100_32':'CRA_4333'
* WR1_1_ctl105_0':'CRA_3401'
* WR1_1_ctl105_1':'CRA_3402'
* WR1_1_ctl105_2':'CRA_3403'
* WR1_1_ctl105_3':'CRA_3404'
* WR1_1_ctl105_4':'CRA_3405'
* WR1_1_ctl105_5':'CRA_3406'
* WR1_1_ctl105_6':'CRA_3407'
* WR1_1_ctl105_7':'CRA_3408'
* WR1_1_ctl105_8':'CRA_3409'
* WR1_1_ctl105_9':'CRA_3410'
* WR1_1_ctl105_10':'CRA_3411'
* WR1_1_ctl105_11':'CRA_3412'
* WR1_1_ctl110_0':'CRA_4501'
* WR1_1_ctl110_1':'CRA_4502'
* WR1_1_ctl110_2':'CRA_4503'
* WR1_1_ctl110_3':'CRA_4504'
* WR1_1_ctl110_4':'CRA_4505'
* WR1_1_ctl110_5':'CRA_4506'
* WR1_1_ctl110_6':'CRA_4507'
* WR1_1_ctl110_7':'CRA_4508'
* WR1_1_ctl110_8':'CRA_4509'
* WR1_1_ctl110_9':'CRA_4510'
* WR1_1_ctl110_10':'CRA_4511'
* WR1_1_ctl110_11':'CRA_4512'
* WR1_1_ctl110_12':'CRA_4513'
* WR1_1_ctl115_0':'CRA_4601'
* WR1_1_ctl115_1':'CRA_4602'
* WR1_1_ctl115_2':'CRA_4603'
* WR1_1_ctl115_3':'CRA_4604'
* WR1_1_ctl115_4':'CRA_4605'
* WR1_1_ctl115_5':'CRA_4606'
* WR1_1_ctl115_6':'CRA_4607'
* WR1_1_ctl115_7':'CRA_4608'
* WR1_1_ctl115_8':'CRA_4609'
* WR1_1_ctl115_9':'CRA_4610'
* WR1_1_ctl115_10':'CRA_4611'
* WR1_1_ctl115_11':'CRA_4612'
* WR1_1_ctl115_12':'CRA_4613'
* WR1_1_ctl115_13':'CRA_4614'
* WR1_1_ctl115_14':'CRA_4615'
* WR1_1_ctl115_15':'CRA_4616'
* WR1_1_ctl120_0':'CRA_4401'
* WR1_1_ctl120_1':'CRA_4402'
* WR1_1_ctl120_2':'CRA_4403'
* WR1_1_ctl120_3':'CRA_4404'
* WR1_1_ctl120_4':'CRA_4405'
* WR1_1_ctl120_5':'CRA_4406'
* WR1_1_ctl125_0':'CRA_9001'
* WR1_1_ctl125_1':'CRA_9002'
* WR1_1_ctl125_2':'CRA_9003'
* WR1_1_ctl125_3':'CRA_9004'
* WR1_1_ctl125_4':'CRA_9005'
* WR1_1_ctl125_5':'CRA_9006'
* WR1_1_ctl130_0':'CRA_9101'
* WR1_1_ctl130_1':'CRA_9102'
* WR1_1_ctl130_2':'CRA_9103'
* WR1_1_ctl130_3':'CRA_9104'
fs.mkdir 'data' unless fs.existsSync 'data'
code <- lists.forEach
export-file code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment