-
-
Save thatblue/0d662f09e047e0863c1ad02ecd8f84bd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'net/http' | |
require 'uri' | |
require 'json' | |
require 'nokogiri' | |
file = File.open('events.html') | |
doc = Nokogiri::HTML(file) | |
apiUrl = "https://restcountries.eu/rest/v2/name/" | |
countries = [] | |
notFoundCountries = [] | |
doc.xpath('//a[@class="event"]/div/h3').each do |city| | |
cityDetails = city.inner_text.split(",") | |
case cityDetails.size | |
when 1 then | |
country = cityDetails[0].strip | |
when 2 then | |
country = cityDetails[1].strip | |
else | |
country = cityDetails[2].strip | |
end | |
sleep 1 | |
# 国名の正規化 | |
if country === "Georgia" && cityDetails[0].strip === "Atlanta" | |
# アトランタ開催なのはジョージア州(ジョージア(旧名グルジア)ではない) | |
country = "USA" | |
elsif country === "DC" && cityDetails[0].strip === "Washington" | |
# ワシントンDCのこと | |
country = "USA" | |
elsif ["Utah", "Washington", "NC", "Louisiana", "Fairfax"].include?(country) | |
# アメリカの州名や都市名 | |
country = "USA" | |
elsif country === "Nairobi" | |
# ナイロビ(ケニアの首都) | |
country = "Kenya" | |
elsif country === "Belgrade" | |
# ベオグラード(セルビアの首都) | |
country = "Serbia" | |
elsif country === "Urcuquí" | |
# エクアドルの郡 | |
country = "Ecuador" | |
elsif country === "Bogotá" | |
# コロンビアの都市 | |
country = "Colombia" | |
elsif country === "Scotland" | |
# スコットランド(イギリス) | |
country = "United Kingdom" | |
elsif country === "UK" | |
# イギリス略称 | |
country = "United Kingdom" | |
end | |
countryDetails = country.split(" ") | |
if countryDetails.size > 1 | |
searchResult = Net::HTTP.get(URI.parse(apiUrl + countryDetails[1])) | |
jsonHash = JSON.parse(searchResult) | |
if jsonHash.size === 1 | |
countries << jsonHash[0]["name"] | |
next | |
end | |
end | |
searchResult = Net::HTTP.get(URI.parse(URI.encode(apiUrl + country))) | |
jsonHash = JSON.parse(searchResult) | |
if jsonHash.size === 1 | |
countries << jsonHash[0]["name"] | |
elsif ["China", "India", "Ireland"].include?(country) | |
# どの国をさすのか明らかだが、検索結果が一意に絞れない国 | |
countries << country | |
else | |
# よくわからない | |
notFoundCountries << country | |
end | |
end | |
p countries.uniq.sort | |
p countries.uniq.size | |
p notFoundCountries | |
=begin | |
2019-05-01時点での実行結果 | |
["Argentina", "Australia", "Austria", "Belgium", "Brazil", "Bulgaria", "Canada", "Chile", "China", "Colombia", "Croatia", "Czech Republic", "Denmark", "Ecuador", "Egypt", "Estonia", "Finland", "France", "Germany", "Greece", "Hong Kong", "India", "Ireland", "Israel", "Italy", "Japan", "Jordan", "Kenya", "Latvia", "Lithuania", "Luxembourg", "Macedonia (the former Yugoslav Republic of)", "Malaysia", "Moldova (Republic of)", "Mozambique", "Netherlands", "New Zealand", "Nigeria", "Norway", "Peru", "Philippines", "Poland", "Portugal", "Romania", "Serbia", "Singapore", "Slovakia", "Slovenia", "South Africa", "Spain", "Sweden", "Switzerland", "Taiwan", "Tunisia", "Turkey", "Uganda", "Ukraine", "United Kingdom of Great Britain and Northern Ireland", "United States of America", "Uruguay", "Viet Nam"] | |
61 | |
[] | |
=end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment