-
-
Save pmgreen/bf23a3a7b44ef3b8234a4de8a3a06330 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative './../lib/ils_sql' | |
require 'csv' | |
## Basis is an Analytics report of items in ReCAP with 'X' call numbers | |
## Queries WorldCat metadata API to return matching records | |
## and writes out a report for review | |
## NOTE: the search API returns call numbers as a string without subfields | |
## so cannot be used for this process | |
## API scopes: https://gist.github.com/rococodogs/346f333f88519c804d97 | |
def get_wcapi_results(record:) | |
auth_url = 'https://oauth.oclc.org/token' | |
auth_conn = alma_conn(auth_url) | |
authorization = Base64.strict_encode64("#{WORLDCAT_MD_CLIENT_ID}:#{WORLDCAT_MD_CLIENT_SECRET}") | |
api_url = 'https://metadata.api.oclc.org/worldcat/manage/' | |
api_conn = alma_conn(api_url) | |
mms_id = record.keys.first.to_s | |
oclc = record.values.map { |h| h[:oclc] } | |
isbn = record.values.map { |h| h[:isbn] } | |
oclc_responses = {} | |
oclc_responses[mms_id] = {} | |
api_authorization = nil | |
auth_response = auth_conn.post do |req| | |
req.headers['Accept'] = 'application/json' | |
req.headers['Authorization'] = "Basic #{authorization}" | |
req.params['grant_type'] = 'client_credentials' | |
## see 'scopes' note above | |
req.params['scope'] = 'WorldCatMetadataAPI' | |
end | |
if auth_response.status == 200 | |
token = JSON.parse(auth_response.body)['access_token'] | |
api_authorization = "Bearer #{token}" | |
end | |
response = api_conn.get do |req| | |
oclc.compact! | |
break unless oclc.size > 0 | |
req.url "bibs/#{oclc.first}" | |
req.headers['accept'] = 'application/marcxml+xml' | |
req.headers['Authorization'] = api_authorization | |
end | |
puts response.status | |
exit if response.status == 401 | |
return unless response.status == 200 | |
raw_data = response.body | |
reader = MARC::XMLReader.new(StringIO.new(raw_data), parser: 'nokogiri', ignore_namespace: true) | |
reader.each do |record| | |
oclc_num = record['001'].value | |
oclc_responses[mms_id][:oclc_nums] = oclc_num | |
oclc_responses[mms_id][:oclc_lc_callnum] = [] | |
oclc_responses[mms_id][:oclc_lc_callnum_val] = [] | |
next if record['050'].nil? | |
writer = MARC::XMLWriter.new("#{ROOT_DIR}/output/f050/oclc_records_with_050_to_add_#{oclc_num}.xml") | |
writer.write(record) | |
writer.close | |
f050 = record.fields('050') | |
f050.each do |field| | |
## no indicator filter | |
#oclc_responses[mms_id][:oclc_lc_callnum] << field.to_marchash.to_json | |
oclc_responses[mms_id][:oclc_lc_callnum_val] << field.value | |
end | |
end | |
oclc_responses | |
end | |
WORLDCAT_MD_CLIENT_ID = ENV['WORLDCAT_MD_CLIENT_ID'] | |
WORLDCAT_MD_CLIENT_SECRET = ENV['WORLDCAT_MD_CLIENT_SECRET'] | |
## build a hash from analytics report | |
analytics_csv = "#{ROOT_DIR}/tmp/recap_needing_050.csv" | |
analytics_hash = {} | |
analytics_report = CSV.open(analytics_csv, 'r', headers: true, header_converters: :symbol) | |
analytics_report.each do |line| | |
mms_id = line[:mms_id] | |
analytics_hash[mms_id] = {} | |
analytics_hash[mms_id][:isbn] = line[:isbn]&.split(';')&.first | |
analytics_hash[mms_id][:oclc] = line[:oclc] # assumes a single oclc number | |
end | |
## check OCLC num against WorldCat | |
## may want to search ISBN secondarily | |
lc_callnum_list = [] | |
analytics_hash.each do |mms_id, values| | |
next unless values[:oclc] | |
std_no_hash = {} | |
std_no_hash[mms_id] = {} | |
std_no_hash[mms_id][:oclc] = values[:oclc] | |
results = get_wcapi_results(record: std_no_hash) | |
lc_callnum_list << results if results | |
end | |
## write out report for review (prior to enhancing records) | |
report = CSV.open("#{ROOT_DIR}/output/recap_needing_050_w_callnums.csv", 'w') | |
report << %w[mms oclc lc_callnum marchash] | |
lc_callnum_list.each do |hash| | |
hash.each do |mms_id, values| | |
report << [ | |
mms_id, | |
values[:oclc_nums], | |
array_to_blob(values[:oclc_lc_callnum_val]), | |
array_to_blob(values[:oclc_lc_callnum]) | |
] | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment