Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@connorshea
Last active January 10, 2019 05:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save connorshea/943ce81e8ff21abe6db7fbeaf38c3b69 to your computer and use it in GitHub Desktop.
Save connorshea/943ce81e8ff21abe6db7fbeaf38c3b69 to your computer and use it in GitHub Desktop.
Run with `WIKIDATA_USERNAME=username WIKIDATA_PASSWORD=password ruby pcgw_to_wikidata.rb`, you'll need a CSV with all the PCGW articles and Steam App IDs
# gem install sparql
# gem install mediawiki_api-wikidata
#http://www.rubydoc.info/github/ruby-rdf/sparql/frames
require 'sparql/client'
require 'json'
require 'csv'
require 'open-uri'
require 'mediawiki_api'
require "mediawiki_api/wikidata/wikidata_client"
# SPARQL Query to find the, pass the Steam App ID and it'll return a query
# that finds any Wikidata items with that App ID.
def query(steam_app_id)
sparql = <<-SPARQL
SELECT ?item ?itemLabel WHERE {
?item wdt:P1733 "#{steam_app_id}".
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 10
SPARQL
return sparql
end
# Finds Wikidata items based on the Steam App ID it's passed
def find_wikidata_item_by_steam_app_id(app_id)
endpoint = "https://query.wikidata.org/sparql"
client = SPARQL::Client.new(endpoint, :method => :get)
sparql = query(app_id)
rows = client.query(sparql)
# If there are 0 rows (no data returned) or more than one row, just skip it.
return nil if rows.size != 1
return_row = {}
rows.each do |row|
return_row = { url: row.to_h[:item].to_s, title: row.to_h[:itemLabel].to_s }
end
return return_row
end
pcgw_steam_ids = []
# Go through the CSV and create a hash for each PCGW item and its Steam App ID
# The CSV is in a format like this:
# Half-Life,70
# Half-Life_2,220
# Half-Life_2:_Deathmatch,320
# Half-Life_2:_Episode_One,380
# Half-Life_2:_Episode_Two,420
# Half-Life_2:_Lost_Coast,340
# Half-Life_Deathmatch:_Source,360
# Half-Life:_Blue_Shift,130
# Half-Life:_Opposing_Force,50
# Half-Life:_Source,280
CSV.foreach(
File.join(File.dirname(__FILE__), 'pcgw_steam_ids.csv'),
skip_blanks: true,
headers: false,
encoding: 'ISO-8859-1'
) do |row|
# Skip the row if the length is >40 characters. This is a hack to get around a
# weird issue where some game titles have really screwy encoding problems.
next if row[0].length > 40
pcgw_steam_ids << {
pcgw_id: row[0],
steam_app_id: row[1]
}
end
# Authenticate with Wikidata.
wikidata_client = MediawikiApi::Wikidata::WikidataClient.new "https://www.wikidata.org/w/api.php"
wikidata_client.log_in ENV["WIKIDATA_USERNAME"], ENV["WIKIDATA_PASSWORD"]
# For every PCGW item created from the CSV, find the respective wikidata item
# and then compare the id of the PCGW item and the Wikidata item found via the
# Steam App ID.
pcgw_steam_ids.each do |game|
# Get the wikidata item for the current game's Steam App ID
wikidata_item = find_wikidata_item_by_steam_app_id(game[:steam_app_id])
# If no wikidata item is returned, skip this PCGW item.
next if wikidata_item.nil?
# Replace the underscores in the PCGW ID with spaces to get as close as possible
# to the normal name.
game[:title] = game[:pcgw_id].gsub(/_/, ' ')
if game[:title] == wikidata_item[:title]
wikidata_id = wikidata_item[:url].sub('http://www.wikidata.org/entity/', '')
puts "Wikidata Item ID: #{wikidata_id}, game[:pcgw_id]: #{game[:pcgw_id]}"
# Check if the property already exists, and skip if it already does.
claims = JSON.load(open("https://www.wikidata.org/w/api.php?action=wbgetclaims&entity=#{wikidata_id}&property=P6337&format=json"))
if claims["claims"] != {}
puts "This already has a PCGW ID"
next
end
puts "This doesn't have a PCGW ID yet"
wikidata_client.create_claim wikidata_id, "value", "P6337", "\"#{game[:pcgw_id]}\""
puts "Updated #{game[:title]}: #{wikidata_item[:url]}"
else
puts "#{game[:title]} does not equal #{wikidata_item[:title]}"
end
# Sleep for 1 second to ensure we don't get rate limited.
sleep(1)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment