Skip to content

Instantly share code, notes, and snippets.

@hletrd
Created April 7, 2016 14:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hletrd/90f9719188bb08954c0c8472897dd816 to your computer and use it in GitHub Desktop.
Save hletrd/90f9719188bb08954c0c8472897dd816 to your computer and use it in GitHub Desktop.
Kyohaksa textbook fetcher
# -*- coding: utf-8 -*-
import httplib
import re
import urllib
import os
for i in range(418,1000):
conn = httplib.HTTPConnection('www.kyohak.co.kr')
conn.request('GET', '/textbook/microsite/microsite_board_list.asp?tch_div=1&sub_det_idx=%03d' % i)
res = conn.getresponse()
res = res.read().decode('euc-kr')
parsed = re.findall("JavaScript:FileDown\('([a-zA-Z0-9]+)','([^']+)'\)", res)
print i
if len(parsed):
parsed_title = re.findall(' title="([^"]+)"', res)
parsed_title[0] = parsed_title[0].replace(u'즐겨찾기 ', '').replace('/', ',')
os.mkdir('downloaded/' + str(i) + '_' + parsed_title[0])
for j in parsed:
conn = httplib.HTTPConnection('www.kyohak.co.kr')
conn.request('GET', '/textbook/inc/board/File_Down.asp?Data_Idx=' + j[0] + '&File_Name=' + urllib.quote_plus(j[1].encode('euc-kr')))
res = conn.getresponse()
output = open('downloaded/' + str(i) + '_' + parsed_title[0] + '/' + j[1], 'wb')
print j[1]
output.write(res.read())
output.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment