|
# 公式サイトからデータを集めてきてgeojsonを作るRubyスクリプト |
|
require "net/http" |
|
require "nokogiri" |
|
require "json" |
|
|
|
YAHOO_APPLICATION_ID = "" # YAHOOIDをここに設定する |
|
|
|
NEXT_PAGE_XPATH = "//div[@class='wp-pagenavi'][1]/span[@class='current']/following-sibling::a[contains(@class,'page')][1]/@href" |
|
SPONSOR_XPATH = "//article" |
|
|
|
Address = Struct.new(:value, :latitude, :longitude) do |
|
def initialize(value) |
|
self.value = value || "" |
|
end |
|
|
|
def empty? |
|
value.empty? |
|
end |
|
|
|
def geocoding! |
|
if !empty? && longitude.nil? |
|
encoded = URI.encode(value) |
|
uri = URI.parse("http://geo.search.olp.yahooapis.jp/OpenLocalPlatform/V1/geoCoder?appid=#{YAHOO_APPLICATION_ID}&query=#{encoded}&output=json") |
|
response = JSON.parse(Net::HTTP.get(uri)) |
|
if response["Feature"] |
|
lng, lat = response["Feature"][0]["Geometry"]["Coordinates"].split(",") |
|
self.longitude = lng.to_f |
|
self.latitude = lat.to_f |
|
[latitude, longitude] |
|
end |
|
end |
|
end |
|
end |
|
|
|
Sponsor = Struct.new(:name, :grade, :duration, :job_type, :address, :phone_number, :web_address, :prize, :comment) do |
|
def self.parse(element) |
|
name = element.at("./h3//child::node()").text.tr(" ", "") |
|
grade, duration = *element.at("./h3//span").text.split(/(\d+)年$/) |
|
job_type, address, phone_number, web_address, prize, comment = *element.search("./table/tr/td").map(&:text).map(&:chomp) |
|
new(name, grade, duration, job_type, address, phone_number, web_address, prize, comment) |
|
end |
|
|
|
def initialize(name, grade, duration, job_type, address, phone_number, web_address, prize, comment) |
|
self.name = name || "" |
|
self.grade = grade || "" |
|
self.duration = duration.chomp.to_i |
|
self.job_type = job_type || "" |
|
self.address = Address.new(address.tr(" ", "")) |
|
self.phone_number = phone_number || "" |
|
self.web_address = web_address || "" |
|
self.prize = prize || "" |
|
self.comment = comment || "" |
|
end |
|
|
|
def to_h |
|
hashmap = super() |
|
hashmap[:address] = address.value |
|
hashmap |
|
end |
|
end |
|
|
|
def get_page(url, pages=[]) |
|
puts "スポンサー一覧取得:#{url}" |
|
doc = Net::HTTP.get(url) |
|
doc.force_encoding("UTF-8") |
|
page = Nokogiri::HTML.parse(doc) |
|
new_pages = pages + [page] |
|
|
|
if next_url = page.at(NEXT_PAGE_XPATH) |
|
sleep 1 |
|
get_page(URI.parse(next_url), new_pages) |
|
else |
|
new_pages |
|
end |
|
end |
|
|
|
pages = [ |
|
"http://www.consadole-sapporo.jp/partner/support/center/", |
|
"http://www.consadole-sapporo.jp/partner/support/east/", |
|
"http://www.consadole-sapporo.jp/partner/support/north/", |
|
"http://www.consadole-sapporo.jp/partner/support/south/", |
|
"http://www.consadole-sapporo.jp/partner/support/other/" |
|
].flat_map { |e| get_page(URI.parse(e)) } |
|
sponsors = pages.flat_map { |page| page.search(SPONSOR_XPATH) } |
|
parsed_sponsors = sponsors.map { |e| Sponsor.parse(e) } |
|
|
|
# 緯度経度取得 |
|
parsed_sponsors.reject { |s| s.address.empty? }.each { |s| puts "緯度経度を取得:#{s.name}"; s.address.geocoding!; sleep 1 } |
|
|
|
# 緯度経度が取得できなかったデータ |
|
parsed_sponsors.select { |s| s.address.empty? }.each { |s| puts "#{s.name}:住所が登録されていませんでした" } |
|
parsed_sponsors.reject { |s| s.address.empty? }.reject { |s| s.address.latitude }.each { |s| puts "#{s.name}:住所から緯度経度が取得できませんでした" } |
|
|
|
features = parsed_sponsors.reject {|s| s.address.empty? }.select { |s| s.address.latitude }.map { |s| |
|
{ |
|
type: "Feature", |
|
properties: s.to_h, |
|
geometry: { |
|
type: "Point", |
|
coordinates: [s.address.longitude, s.address.latitude] |
|
} |
|
} |
|
} |
|
|
|
geojson = { |
|
type: "FeatureCollection", |
|
features: features |
|
}.to_json |
|
|
|
File.write("supportship-partner.geojson", geojson) |