Skip to content

Instantly share code, notes, and snippets.

@wagurano
Created August 11, 2013 05:30
Show Gist options
  • Save wagurano/6203520 to your computer and use it in GitHub Desktop.
Save wagurano/6203520 to your computer and use it in GitHub Desktop.
전국 어린이집 좌표 가져오기
#encoding: utf-8
require 'net/http'
require 'nokogiri'
require 'open-uri'
require 'timeout'
TIMEOUT_CNT = 42
#cd = '29200000641'
#cd = '11620000061'
def print_nursery_geo(nursery_code)
cd = nursery_code
retries = TIMEOUT_CNT
req = "/cpis2gi/nursery/NurseryMap.jsp?flag=NA&stcode=#{cd}&ctprvn1=41000&ctprvn2=41000&signgu1=40100&signgu2=40100&dong1=&dong2=&road1=&road2="
begin
Timeout::timeout(5) {
doc = Net::HTTP.get('www.childcare.go.kr', req)
if doc.nil?
puts "PRINT,#{cd},ERROR"
else
aa = doc.scan(/position: new daum.maps.LatLng\((.*), (.*)\)/)
if aa.nil?
puts "GEO,#{cd},null,null"
else
puts "GEO,#{cd},#{aa[0][0]},#{aa[0][1]}"
end
end
}
rescue OpenURI::HTTPError
puts "ERROR,PRINT,#{nursery_code},HTTP"
rescue EOFError
puts "ERROR,PRINT,#{nursery_code},EOF"
retries -= 1
if retries > 0
puts "sleep"
sleep TIMEOUT_CNT - retries
retry
else
puts "raise"
raise
end
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep TIMEOUT_CNT - retries
retry
else
puts "raise"
raise
end
end # TIMEOUT
end
province = ['11', '26', '27', '28', '29', '30', '36', '41', '42', '43', '44', '45','46', '47', '48','49']
province.each do |prvn|
puts "FETCH,#{prvn}"
#for offset in 1..10000
offset = 1 #5191 #prvn=41 #1 #6710
while true
#break if offset > 2
cnt = 0
codes = []
names = []
url = "http://m.childcare.go.kr/nursery/mAllNurserySlPL.jsp?programId=null&flag=NSSlPL&offset=#{offset}&ctprvn=#{prvn}000&signgu=&dong=&road=&crtype=&crspec=&crcert=&crpub=&crname="
retries = TIMEOUT_CNT
begin
Timeout::timeout(5) {
puts "FETCH,#{prvn},#{offset}"
doc = Nokogiri::HTML(open(url))
codes = []
names = []
kk = doc.at_css("ul.nursry_search_list").css("a").map { |link| link['href'] }
jj = doc.at_css("ul.nursry_search_list").css("li[@class = 'first orange']/strong")
kk.each { |hr| hr.scan(/'(.*)'/) { |x| codes << x[0]} }
jj.each { |nm| names << nm.content }
puts "NEXT,#{prvn},#{offset},#{codes.length}"
has_next = true if codes.length <= 0
if codes.length > 0
cnt = codes.length
end
}
rescue OpenURI::HTTPError
puts "FETCH,#{prvn},#{offset},ERROR"
next
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 0.42 * (TIMEOUT_CNT - retries)
retry
else
puts "raise"
raise
end
end # TIMEOUT
for i in 0..codes.length-1
puts "PRINT,#{codes[i]}"
puts "INFO,#{codes[i]},이름,#{names[i]}"
print_nursery_geo(codes[i])
end # for i
break if cnt <= 0
offset += cnt
end # offset 1 to 10000 step by codes.length
end # prvn
puts "END"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment