Skip to content

Instantly share code, notes, and snippets.

@tmtmtmtm
Created May 21, 2018 09:39
Show Gist options
  • Save tmtmtmtm/28de60e167ddd658493d17548d573715 to your computer and use it in GitHub Desktop.
Save tmtmtmtm/28de60e167ddd658493d17548d573715 to your computer and use it in GitHub Desktop.
#!/bin/env ruby
require 'csv'
require 'rest-client'
#---------------------------------------------------------------------------
# Find all Memberships of the 2016-20 with no P4100 qualifier and add it
# from data on the Wikipedia members list page:
# https://ro.wikipedia.org/wiki/Legislatura_2016-2020_(Camera_Deputaților)
# as scraped at
# https://morph.io/everypolitician-scrapers/romania-deputies-2016-wikipedia
#
# The output of this can then be passed to PositionStatements
# https://github.com/everypolitician/position_statements
#---------------------------------------------------------------------------
WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql'
def sparql(query)
result = RestClient.get WIKIDATA_SPARQL_URL, accept: 'text/csv', params: { query: query }
CSV.parse(result.body, headers: true, header_converters: :symbol)
rescue RestClient::Exception => e
raise "Wikidata query #{query} failed: #{e.message}"
end
memberships_query = <<SPARQL
SELECT ?item ?ps
{
?item p:P39 ?ps .
?ps ps:P39/wdt:P279* wd:Q17556530 ; pq:P2937 wd:Q28726607 .
FILTER NOT EXISTS { ?ps pq:P4100 wd:Q28726607 }
}
SPARQL
morph_data = CSV.table('MORPH.csv').group_by { |row| row[:id] }
sparql(memberships_query).map(&:to_h).each do |row|
id = row[:item].split('/').last
unless morph_data[id].to_a.count == 1
warn "#{morph_data[id].to_a.count} rows for #{id}"
next
end
party = morph_data[id].first[:party_id] or next
puts [id, "P39", row[:ps].split('/').last, 'P4100', party].join("\t")
end
#!/bin/env ruby
require 'csv'
require 'rest-client'
#---------------------------------------------------------------------------
# Find all Memberships of the 2016-20 with no P4100 qualifier and add it
# from data on the Wikipedia members list page:
# https://ro.wikipedia.org/wiki/Legislatura_2016-2020_(Camera_Deputaților)
# as scraped at
# https://morph.io/everypolitician-scrapers/romania-deputies-2016-wikipedia
#
# The output of this can then be passed to PositionStatements
# https://github.com/everypolitician/position_statements
#---------------------------------------------------------------------------
WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql'
def sparql(query)
result = RestClient.get WIKIDATA_SPARQL_URL, accept: 'text/csv', params: { query: query }
CSV.parse(result.body, headers: true, header_converters: :symbol)
rescue RestClient::Exception => e
raise "Wikidata query #{query} failed: #{e.message}"
end
memberships_query = <<SPARQL
SELECT ?item ?ps
{
?item p:P39 ?ps .
?ps ps:P39/wdt:P279* wd:Q17556530 ; pq:P2937 wd:Q28726607 .
FILTER NOT EXISTS { ?ps pq:P4100 wd:Q28726607 }
}
SPARQL
morph_data = CSV.table('MORPH.csv').group_by { |row| row[:id] }
sparql(memberships_query).map(&:to_h).each do |row|
id = row[:item].split('/').last
unless morph_data[id].to_a.count == 1
warn "#{morph_data[id].to_a.count} rows for #{id}"
next
end
party = morph_data[id].first[:party_id] or next
puts [id, "P39", row[:ps].split('/').last, 'P4100', party].join("\t")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment