Skip to content

Instantly share code, notes, and snippets.

@choipd
Created October 5, 2012 00:03
Show Gist options
  • Save choipd/3837240 to your computer and use it in GitHub Desktop.
Save choipd/3837240 to your computer and use it in GitHub Desktop.
행안부의 착한가격업소 crawling 소스와 GoodShop model, 그리고 리버스 지오코딩 적용한 것
# encoding: UTF-8
require 'open-uri'
namespace :data do
desc "crawling data"
task :crawl => :environment do |t, args|
(1..7133).each do |n|
begin
uri = "http://www.mulga.go.kr/price/bestStore/regionInfo.do?bestStoreSeq=#{n}"
doc = Nokogiri::HTML(open(uri))
rescue OpenURI::HTTPError => the_error
the_status = the_error.io.status[0]
puts "#{n}: [#{the_status}]"
else
next if doc.xpath("//div[attribute::id='content']/h4").text == ""
data = GoodShop.new
data.crawl_no = n
data.title = doc.xpath("//div[attribute::id='content']/h4").text
data.address = doc.xpath("//th[text()='주소']/following-sibling::*").text
data.phone = doc.xpath("//th[text()='전화번호']/following-sibling::*").text
data.business_type = doc.xpath("//th[text()='업종']/following-sibling::*").text
a = doc.xpath("//th[text()='주요품목']/following-sibling::*").collect { |a| a.text }
h = Hash[*a]
data.items = h
data.remark = doc.xpath("//h5[text()='업소자랑거리']/following-sibling::*").text
data.save!
puts "#{n} done!"
end
end
end
api_key = "당신의 다음 Open API 키"
def call_reverse_geocoding(address)
begin
uri = "http://apis.daum.net/local/geo/addr2coord?apikey=#{api_key}&output=xml&q=#{URI::encode(address)}"
doc = Nokogiri::XML(open(uri))
rescue OpenURI::HTTPError => the_error
the_status = the_error.io.status[0]
puts "[#{the_status}]"
else
if doc.xpath("//lng[1]").text == "" then
puts "#{address} failed!"
return call_reverse_geocoding(address.gsub(/\s.*$/, ""))
end
return doc
end
end
desc "reverse geocoding"
task :reverse_geocoding => :environment do |t, args|
GoodShop.all.each_with_index do |shop, i|
next if shop.geo_active == true
address = shop.address
doc = call_reverse_geocoding(address)
shop.location = {:lng => doc.xpath("//lng[1]").text, :lat => doc.xpath("//lat[1]").text}
shop.geo_active = true
shop.save!
puts "#{i}: #{shop.address} done!"
end
end
end
gem 'httparty'
gem 'nokogiri'
gem 'mongoid', '~> 3.0.0'
gem 'geocoder'
gem 'mongoid_geospatial'
class GoodShop
include Mongoid::Document
include Mongoid::Timestamps
include Mongoid::Geospatial
field :crawl_no, type: Integer
field :address, type: String
field :title, type: String
field :phone, type: String
field :business_type, type: String
field :items, type: Hash
field :remark, type: String
field :location, type: Point, spatial: true
field :geo_active, type: Boolean
spatial_index :location
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment