Skip to content

Instantly share code, notes, and snippets.

@wagurano
Created December 10, 2013 02:10
Show Gist options
  • Save wagurano/7884729 to your computer and use it in GitHub Desktop.
Save wagurano/7884729 to your computer and use it in GitHub Desktop.
건강보험심사평가원 병원 평가 등급 가져오기
#encoding: utf-8
require 'net/http'
require 'nokogiri'
require 'open-uri'
require 'timeout'
TIMEOUT_CNT = 42
n = 1 # 500 # 1614
begin
retries = TIMEOUT_CNT
code_list = Array.new
begin
cnt = 0
code = ""
Timeout::timeout(5) {
doc = Nokogiri::HTML(open("http://m.hira.or.kr/eva/list.do?cateID=&p=#{n}"))
doc.xpath('//li/a').each do |a|
code = a['href'].scan(/code=(.*)/)
if !code.empty?
cnt = cnt + 1
code = code.join()
puts "#{n},#{cnt},#{code},'list',#{retries}"
code_list.push(code)
end
end #doc.xpath
}
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 0.42
retry
else
puts "raise"
raise
end
end
retries = TIMEOUT_CNT
cl_cnt = 0
code_list.each do |cl|
begin
cl_cnt += 1
puts "#{n},#{cnt},#{cl},FETCH,-"
Timeout::timeout(5) {
doc = Nokogiri::HTML(open("http://m.hira.or.kr/eva/data.do?code=#{cl}"))
doc.xpath('//li').each do |aa|
s = aa.to_s
t = s.scan(/<dt>(.*)<\/dt>/).join()
g = s.scan(/alt="(.*)"/)
if !g.empty?
g = g.join()
puts "#{n},#{cl_cnt},#{cl},#{t},#{g}"
end
end
}
rescue OpenURI::HTTPError
puts "#{n},#{cl_cnt},#{cl},NOT_FOUND,-"
next
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep h"
sleep 0.42
retry
else
puts "raise h"
raise
end
end # timeout doc
end # code_list
n = n + 1
end while 0 < cnt
puts "#{n},#{cnt},0_LT_CNT,-,-"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment