Skip to content

Instantly share code, notes, and snippets.

@wagurano
Created December 10, 2013 02:17
Show Gist options
  • Save wagurano/7884787 to your computer and use it in GitHub Desktop.
Save wagurano/7884787 to your computer and use it in GitHub Desktop.
건강보험심사평가원 병원 위치 등 일반현황 가져오기
#encoding: utf-8
require 'net/http'
require 'nokogiri'
require 'open-uri'
require 'timeout'
TIMEOUT_CNT = 42
def print_info code
retries = TIMEOUT_CNT
begin
Timeout::timeout(5) {
info_dt = []
info_dd = []
doc = Nokogiri::HTML(open("http://m.hira.or.kr/eva/data.do?view=hos&code=#{code}"))
doc.xpath('//h3').each do |a|
info_dt << "병원이름"
info_dd << a.inner_text
end
doc.xpath('//dl/dt').each do |a|
info_dt << a.inner_text.scan(/(.*) :/).join
end
doc.xpath('//dl/dd').each do |a|
info_dd << a.inner_text
end
doc.xpath('//table/tbody/tr/th').each do |a|
info_dt << a.inner_text
end
doc.xpath('//table/tbody/tr/td').each do |a|
info_dd << a.inner_text
end
while info_dt.length < info_dd.length
info_dd.pop
end
while !info_dt.empty?
puts code + "," + info_dt.pop + "," + info_dd.pop
end
ch = doc.css('.lst_clinic').children
ch.each do |a|
puts code + "," + a.inner_text.scan(/(.*) \((.*)\/(.*)\)/).join(',') if a.type == 1
end
}
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 1.42
retry
else
puts "raise"
raise
end
end
end # def print_info
def get_info begin_n, thru_n # [ )
n = begin_n
begin
retries = TIMEOUT_CNT
code_list = Array.new
begin
cnt = 0
code = ""
Timeout::timeout(5) {
doc = Nokogiri::HTML(open("http://m.hira.or.kr/eva/list.do?cateID=&p=#{n}"))
doc.xpath('//li/a').each do |a|
code = a['href'].scan(/code=(.*)/)
if !code.empty?
cnt = cnt + 1
code = code.join()
puts "#{n},#{cnt},#{code},'list',#{retries}"
code_list.push(code)
end
end #doc.xpath
}
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 1.42
retry
else
puts "raise"
raise
end
end
retries = TIMEOUT_CNT
cl_cnt = 0
code_list.each do |cl|
cl_cnt += 1
puts "#{n},#{cnt},#{cl},FETCH,-"
begin
print_info cl
rescue OpenURI::HTTPError
puts "#{n},#{cl_cnt},#{cl},NOT_FOUND,-"
next
end
end # code_list
n = n + 1
sleep 0.42
end while 0 < cnt && n < thru_n
puts "#{n},#{cnt},0_LT_CNT,-,-"
end # def
if ARGV.length == 2
bn, tn = ARGV
get_info bn.to_i, tn.to_i
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment