Created
August 11, 2013 05:30
-
-
Save wagurano/6203520 to your computer and use it in GitHub Desktop.
전국 어린이집 좌표 가져오기
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#encoding: utf-8 | |
require 'net/http' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'timeout' | |
TIMEOUT_CNT = 42 | |
#cd = '29200000641' | |
#cd = '11620000061' | |
def print_nursery_geo(nursery_code) | |
cd = nursery_code | |
retries = TIMEOUT_CNT | |
req = "/cpis2gi/nursery/NurseryMap.jsp?flag=NA&stcode=#{cd}&ctprvn1=41000&ctprvn2=41000&signgu1=40100&signgu2=40100&dong1=&dong2=&road1=&road2=" | |
begin | |
Timeout::timeout(5) { | |
doc = Net::HTTP.get('www.childcare.go.kr', req) | |
if doc.nil? | |
puts "PRINT,#{cd},ERROR" | |
else | |
aa = doc.scan(/position: new daum.maps.LatLng\((.*), (.*)\)/) | |
if aa.nil? | |
puts "GEO,#{cd},null,null" | |
else | |
puts "GEO,#{cd},#{aa[0][0]},#{aa[0][1]}" | |
end | |
end | |
} | |
rescue OpenURI::HTTPError | |
puts "ERROR,PRINT,#{nursery_code},HTTP" | |
rescue EOFError | |
puts "ERROR,PRINT,#{nursery_code},EOF" | |
retries -= 1 | |
if retries > 0 | |
puts "sleep" | |
sleep TIMEOUT_CNT - retries | |
retry | |
else | |
puts "raise" | |
raise | |
end | |
rescue Timeout::Error | |
retries -= 1 | |
if retries > 0 | |
puts "sleep" | |
sleep TIMEOUT_CNT - retries | |
retry | |
else | |
puts "raise" | |
raise | |
end | |
end # TIMEOUT | |
end | |
province = ['11', '26', '27', '28', '29', '30', '36', '41', '42', '43', '44', '45','46', '47', '48','49'] | |
province.each do |prvn| | |
puts "FETCH,#{prvn}" | |
#for offset in 1..10000 | |
offset = 1 #5191 #prvn=41 #1 #6710 | |
while true | |
#break if offset > 2 | |
cnt = 0 | |
codes = [] | |
names = [] | |
url = "http://m.childcare.go.kr/nursery/mAllNurserySlPL.jsp?programId=null&flag=NSSlPL&offset=#{offset}&ctprvn=#{prvn}000&signgu=&dong=&road=&crtype=&crspec=&crcert=&crpub=&crname=" | |
retries = TIMEOUT_CNT | |
begin | |
Timeout::timeout(5) { | |
puts "FETCH,#{prvn},#{offset}" | |
doc = Nokogiri::HTML(open(url)) | |
codes = [] | |
names = [] | |
kk = doc.at_css("ul.nursry_search_list").css("a").map { |link| link['href'] } | |
jj = doc.at_css("ul.nursry_search_list").css("li[@class = 'first orange']/strong") | |
kk.each { |hr| hr.scan(/'(.*)'/) { |x| codes << x[0]} } | |
jj.each { |nm| names << nm.content } | |
puts "NEXT,#{prvn},#{offset},#{codes.length}" | |
has_next = true if codes.length <= 0 | |
if codes.length > 0 | |
cnt = codes.length | |
end | |
} | |
rescue OpenURI::HTTPError | |
puts "FETCH,#{prvn},#{offset},ERROR" | |
next | |
rescue Timeout::Error | |
retries -= 1 | |
if retries > 0 | |
puts "sleep" | |
sleep 0.42 * (TIMEOUT_CNT - retries) | |
retry | |
else | |
puts "raise" | |
raise | |
end | |
end # TIMEOUT | |
for i in 0..codes.length-1 | |
puts "PRINT,#{codes[i]}" | |
puts "INFO,#{codes[i]},이름,#{names[i]}" | |
print_nursery_geo(codes[i]) | |
end # for i | |
break if cnt <= 0 | |
offset += cnt | |
end # offset 1 to 10000 step by codes.length | |
end # prvn | |
puts "END" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment