Skip to content

Instantly share code, notes, and snippets.

@wagurano
Created July 24, 2013 23:46
Show Gist options
  • Save wagurano/6075661 to your computer and use it in GitHub Desktop.
Save wagurano/6075661 to your computer and use it in GitHub Desktop.
전국 어린이집 목록(코드, 이름) 출력하는 루비 코드
#encoding: utf-8
require 'net/http'
require 'nokogiri'
require 'open-uri'
require 'timeout'
TIMEOUT_CNT = 42
cd = '29200000641'
#<ul class="nursry_search_list">
#http://m.childcare.go.kr/nursery/mAllNurserySlPL.jsp?programId=null&flag=NSSlPL&offset=10000&ctprvn=11000&signgu=&dong=&road=&crtype=&crspec=&crcert=&crpub=&crname=&x=36&y=14
province = ['11', '26', '27', '28', '29', '30', '36', '41', '42', '43', '44', '45','46', '47', '48','49']
#province = ['45']
province.each do |prvn|
puts "FETCH,#{prvn}"
#for offset in 1..10000
offset = 1 #6710
while true
cnt = 0
url = "http://m.childcare.go.kr/nursery/mAllNurserySlPL.jsp?programId=null&flag=NSSlPL&offset=#{offset}&ctprvn=#{prvn}000&signgu=&dong=&road=&crtype=&crspec=&crcert=&crpub=&crname="
retries = TIMEOUT_CNT
begin
Timeout::timeout(5) {
puts "FETCH,#{prvn},#{offset}"
doc = Nokogiri::HTML(open(url))
names = []
codes = []
kk = doc.at_css("ul.nursry_search_list").css("a").map { |link| link['href'] }
jj = doc.at_css("ul.nursry_search_list").css("li[@class = 'first orange']/strong")
kk.each do |hr|
#hr.scan(/'(.*)'/) { |x| puts x}
hr.scan(/'(.*)'/) { |x| codes << x[0]}
end
jj.each do |nm|
#puts nm.content
names << nm.content
end
puts "NEXT,#{prvn},#{offset},#{codes.length}"
has_next = true if codes.length <= 0
if codes.length > 0
cnt = codes.length
for i in 0..codes.length-1
puts "#{codes[i]},#{names[i]}"
end
end
}
rescue OpenURI::HTTPError
puts "FETCH,#{prvn},#{offset},ERROR"
next
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 0.42 * (TIMEOUT_CNT - retries)
retry
else
puts "raise"
raise
end
end # TIMEOUT
break if cnt <= 0
offset += cnt
end # offset 1 to 10000 step by codes.length
end # prvn
puts "END"
@wagurano
Copy link
Author

version 1.9.3 tested

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment