Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dsisnero/5bb77ab39a2a4a10b3a9b2de30cba134 to your computer and use it in GitHub Desktop.
Save dsisnero/5bb77ab39a2a4a10b3a9b2de30cba134 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true
require 'pry'
require 'json'
require 'rest-client'
WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql'
P_POSITION = 'P39'
P_START_DATE = 'P580'
P_END_DATE = 'P582'
P_CONSTITUENCY = 'P768'
P_PARTY = 'P4100'
P_ELECTED_IN = 'P2715'
P_TERM = 'P2937'
SOURCE = 'S854'
class WikidataValue
def initialize(data)
@data = data
end
def value
return raw_value.split('/').last if wikidata_link?
raw_value
end
def type
@data[:type]
end
def wikidata_link?
type == 'uri' && raw_value.start_with?('http://www.wikidata.org/entity')
end
private
def raw_value
@data[:value]
end
end
def wikidata_data(query)
result = RestClient.get WIKIDATA_SPARQL_URL, params: { query: query, format: 'json' }
json = JSON.parse(result, symbolize_names: true)
json[:results][:bindings].map { |r| r.map { |k, v| [k, WikidataValue.new(v)] }.to_h }
rescue RestClient::Exception => e
raise "Wikidata query #{query} failed: #{e.message}"
end
def morph_data(scraper)
morph_api_url = 'https://api.morph.io/%s/data.json' % scraper
morph_api_key = ENV['MORPH_API_KEY']
result = RestClient.get morph_api_url, params: {
key: morph_api_key,
query: 'SELECT * FROM data',
}
JSON.parse(result, symbolize_names: true)
end
query = <<~SPARQL
SELECT DISTINCT ?item ?itemLabel ?constituency ?party WHERE {
?item p:P39 ?mem .
?mem ps:P39 wd:%s ; pq:P2937 wd:%s .
OPTIONAL { ?mem pq:P768 ?constituency }
OPTIONAL { ?mem pq:P4100 ?party }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?itemLabel
SPARQL
#-----------------------------------------------------------------------
MEMBERSHIP = 'Q654291'
TERM = 'Q28976095'
existing = wikidata_data(query % [MEMBERSHIP, TERM]).map do |r|
[r[:item].value, {
party: r[:party]&.value,
constituency: r[:constituency]&.value,
},]
end.to_h
incoming = morph_data('everypolitician-scrapers/ireland-dail-members-wikipedia')
statements = incoming.reject { |r| existing[r[:id]] }.map do |r|
data = {
P_POSITION => MEMBERSHIP,
P_TERM => TERM,
P_PARTY => r[:party_wikidata],
P_CONSTITUENCY => r[:constituency_wikidata],
SOURCE => '"https://en.wikipedia.org/wiki/Members_of_the_32nd_D%C3%A1il"',
}
[r[:id], data.to_a].join("\t")
end
puts statements
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment