Created
June 16, 2019 20:20
-
-
Save paluchas/b80b69367ceac900cfbf36586780667e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
require 'faraday' | |
require 'faraday_middleware' | |
require 'pry' | |
# Process DOIs from Altmetrics CSV file | |
altmetric_csv_file = 'Altmetric - Mentions - University of York - 2019-04-16.csv' | |
# Store all processed unpaywall responses in new csv file | |
unpaywall_csv_file = 'Unpaywall_York.csv' | |
csv = CSV.open(unpaywall_csv_file, 'wb') | |
csv << ['doi', 'is_oa', 'best_oa_evidence', 'best_oa_host_type', 'best_oai_license', | |
'best_oa_url_for_pdf', 'best_oa_version', 'journal_is_oa', 'genre', | |
'journal_name', 'oa_status', 'published_date', 'publisher'] | |
# Unpaywall connection | |
email = 'sebastian.palucha@gmail.com' | |
unpaywall = Faraday.new(url: 'https://api.unpaywall.org/v2') do |conn| | |
conn.response :json, content_type: /\bjson$/ | |
conn.adapter Faraday.default_adapter | |
end | |
# For each DOI, call Unpaywall API and save returned data | |
CSV.foreach(altmetric_csv_file, | |
encoding: 'windows-1251:utf-8', | |
col_sep: ',', | |
row_sep: :auto, | |
headers: true) do |row| | |
# Call Unpaywall API with DOI from Altmetric CSV file | |
begin | |
response = unpaywall.get row['DOI'], email: email | |
rescue URI::InvalidURIError => e | |
puts "Error #{e} while processing #{row['DOI']}" | |
next | |
end | |
# binding.pry | |
# Process Unpaywall response | |
hash = {} | |
hash['doi'] = response.body['doi'] # response.bidy returns hash data | |
hash['is_oa'] = response.body['is_oa'] | |
if hash['is_oa'] == true | |
# Use first set of data e.g. Unpaywall mark it as best_oa | |
hash['best_oa_evidence'] = response.body['oa_locations'][0]['evidence'] | |
hash['best_oa_host_type'] = response.body['oa_locations'][0]['host_type'] | |
hash['best_oai_license'] = response.body['oa_locations'][0]['license'] | |
hash['best_oa_url_for_pdf'] = response.body['oa_locations'][0]['url_for_pdf'] | |
hash['best_oa_version'] = response.body['oa_locations'][0]['version'] | |
end | |
# Other information about an original paper | |
hash['journal_is_oa'] = response.body['journal_is_oa'] | |
hash['genre'] = response.body['genre'] | |
hash['journal_name'] = response.body['journal_name'] | |
hash['oa_status'] = response.body['oa_status'] | |
hash['published_date'] = response.body['published_date'] | |
hash['publisher'] = response.body['publisher'] | |
csv << hash.values # Save processed data | |
end | |
csv.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment