Skip to content

Instantly share code, notes, and snippets.

@paluchas
Created June 16, 2019 20:20
Show Gist options
  • Save paluchas/b80b69367ceac900cfbf36586780667e to your computer and use it in GitHub Desktop.
Save paluchas/b80b69367ceac900cfbf36586780667e to your computer and use it in GitHub Desktop.
require 'csv'
require 'faraday'
require 'faraday_middleware'
require 'pry'
# Process DOIs from Altmetrics CSV file
altmetric_csv_file = 'Altmetric - Mentions - University of York - 2019-04-16.csv'
# Store all processed unpaywall responses in new csv file
unpaywall_csv_file = 'Unpaywall_York.csv'
csv = CSV.open(unpaywall_csv_file, 'wb')
csv << ['doi', 'is_oa', 'best_oa_evidence', 'best_oa_host_type', 'best_oai_license',
'best_oa_url_for_pdf', 'best_oa_version', 'journal_is_oa', 'genre',
'journal_name', 'oa_status', 'published_date', 'publisher']
# Unpaywall connection
email = 'sebastian.palucha@gmail.com'
unpaywall = Faraday.new(url: 'https://api.unpaywall.org/v2') do |conn|
conn.response :json, content_type: /\bjson$/
conn.adapter Faraday.default_adapter
end
# For each DOI, call Unpaywall API and save returned data
CSV.foreach(altmetric_csv_file,
encoding: 'windows-1251:utf-8',
col_sep: ',',
row_sep: :auto,
headers: true) do |row|
# Call Unpaywall API with DOI from Altmetric CSV file
begin
response = unpaywall.get row['DOI'], email: email
rescue URI::InvalidURIError => e
puts "Error #{e} while processing #{row['DOI']}"
next
end
# binding.pry
# Process Unpaywall response
hash = {}
hash['doi'] = response.body['doi'] # response.bidy returns hash data
hash['is_oa'] = response.body['is_oa']
if hash['is_oa'] == true
# Use first set of data e.g. Unpaywall mark it as best_oa
hash['best_oa_evidence'] = response.body['oa_locations'][0]['evidence']
hash['best_oa_host_type'] = response.body['oa_locations'][0]['host_type']
hash['best_oai_license'] = response.body['oa_locations'][0]['license']
hash['best_oa_url_for_pdf'] = response.body['oa_locations'][0]['url_for_pdf']
hash['best_oa_version'] = response.body['oa_locations'][0]['version']
end
# Other information about an original paper
hash['journal_is_oa'] = response.body['journal_is_oa']
hash['genre'] = response.body['genre']
hash['journal_name'] = response.body['journal_name']
hash['oa_status'] = response.body['oa_status']
hash['published_date'] = response.body['published_date']
hash['publisher'] = response.body['publisher']
csv << hash.values # Save processed data
end
csv.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment