Created
July 24, 2013 23:46
-
-
Save wagurano/6075661 to your computer and use it in GitHub Desktop.
전국 어린이집 목록(코드, 이름) 출력하는 루비 코드
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#encoding: utf-8 | |
require 'net/http' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'timeout' | |
TIMEOUT_CNT = 42 | |
cd = '29200000641' | |
#<ul class="nursry_search_list"> | |
#http://m.childcare.go.kr/nursery/mAllNurserySlPL.jsp?programId=null&flag=NSSlPL&offset=10000&ctprvn=11000&signgu=&dong=&road=&crtype=&crspec=&crcert=&crpub=&crname=&x=36&y=14 | |
province = ['11', '26', '27', '28', '29', '30', '36', '41', '42', '43', '44', '45','46', '47', '48','49'] | |
#province = ['45'] | |
province.each do |prvn| | |
puts "FETCH,#{prvn}" | |
#for offset in 1..10000 | |
offset = 1 #6710 | |
while true | |
cnt = 0 | |
url = "http://m.childcare.go.kr/nursery/mAllNurserySlPL.jsp?programId=null&flag=NSSlPL&offset=#{offset}&ctprvn=#{prvn}000&signgu=&dong=&road=&crtype=&crspec=&crcert=&crpub=&crname=" | |
retries = TIMEOUT_CNT | |
begin | |
Timeout::timeout(5) { | |
puts "FETCH,#{prvn},#{offset}" | |
doc = Nokogiri::HTML(open(url)) | |
names = [] | |
codes = [] | |
kk = doc.at_css("ul.nursry_search_list").css("a").map { |link| link['href'] } | |
jj = doc.at_css("ul.nursry_search_list").css("li[@class = 'first orange']/strong") | |
kk.each do |hr| | |
#hr.scan(/'(.*)'/) { |x| puts x} | |
hr.scan(/'(.*)'/) { |x| codes << x[0]} | |
end | |
jj.each do |nm| | |
#puts nm.content | |
names << nm.content | |
end | |
puts "NEXT,#{prvn},#{offset},#{codes.length}" | |
has_next = true if codes.length <= 0 | |
if codes.length > 0 | |
cnt = codes.length | |
for i in 0..codes.length-1 | |
puts "#{codes[i]},#{names[i]}" | |
end | |
end | |
} | |
rescue OpenURI::HTTPError | |
puts "FETCH,#{prvn},#{offset},ERROR" | |
next | |
rescue Timeout::Error | |
retries -= 1 | |
if retries > 0 | |
puts "sleep" | |
sleep 0.42 * (TIMEOUT_CNT - retries) | |
retry | |
else | |
puts "raise" | |
raise | |
end | |
end # TIMEOUT | |
break if cnt <= 0 | |
offset += cnt | |
end # offset 1 to 10000 step by codes.length | |
end # prvn | |
puts "END" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
version 1.9.3 tested