-
-
Save dsisnero/5bb77ab39a2a4a10b3a9b2de30cba134 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
# frozen_string_literal: true | |
require 'pry' | |
require 'json' | |
require 'rest-client' | |
WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql' | |
P_POSITION = 'P39' | |
P_START_DATE = 'P580' | |
P_END_DATE = 'P582' | |
P_CONSTITUENCY = 'P768' | |
P_PARTY = 'P4100' | |
P_ELECTED_IN = 'P2715' | |
P_TERM = 'P2937' | |
SOURCE = 'S854' | |
class WikidataValue | |
def initialize(data) | |
@data = data | |
end | |
def value | |
return raw_value.split('/').last if wikidata_link? | |
raw_value | |
end | |
def type | |
@data[:type] | |
end | |
def wikidata_link? | |
type == 'uri' && raw_value.start_with?('http://www.wikidata.org/entity') | |
end | |
private | |
def raw_value | |
@data[:value] | |
end | |
end | |
def wikidata_data(query) | |
result = RestClient.get WIKIDATA_SPARQL_URL, params: { query: query, format: 'json' } | |
json = JSON.parse(result, symbolize_names: true) | |
json[:results][:bindings].map { |r| r.map { |k, v| [k, WikidataValue.new(v)] }.to_h } | |
rescue RestClient::Exception => e | |
raise "Wikidata query #{query} failed: #{e.message}" | |
end | |
def morph_data(scraper) | |
morph_api_url = 'https://api.morph.io/%s/data.json' % scraper | |
morph_api_key = ENV['MORPH_API_KEY'] | |
result = RestClient.get morph_api_url, params: { | |
key: morph_api_key, | |
query: 'SELECT * FROM data', | |
} | |
JSON.parse(result, symbolize_names: true) | |
end | |
query = <<~SPARQL | |
SELECT DISTINCT ?item ?itemLabel ?constituency ?party WHERE { | |
?item p:P39 ?mem . | |
?mem ps:P39 wd:%s ; pq:P2937 wd:%s . | |
OPTIONAL { ?mem pq:P768 ?constituency } | |
OPTIONAL { ?mem pq:P4100 ?party } | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } | |
} | |
ORDER BY ?itemLabel | |
SPARQL | |
#----------------------------------------------------------------------- | |
MEMBERSHIP = 'Q654291' | |
TERM = 'Q28976095' | |
existing = wikidata_data(query % [MEMBERSHIP, TERM]).map do |r| | |
[r[:item].value, { | |
party: r[:party]&.value, | |
constituency: r[:constituency]&.value, | |
},] | |
end.to_h | |
incoming = morph_data('everypolitician-scrapers/ireland-dail-members-wikipedia') | |
statements = incoming.reject { |r| existing[r[:id]] }.map do |r| | |
data = { | |
P_POSITION => MEMBERSHIP, | |
P_TERM => TERM, | |
P_PARTY => r[:party_wikidata], | |
P_CONSTITUENCY => r[:constituency_wikidata], | |
SOURCE => '"https://en.wikipedia.org/wiki/Members_of_the_32nd_D%C3%A1il"', | |
} | |
[r[:id], data.to_a].join("\t") | |
end | |
puts statements |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment