Skip to content

Instantly share code, notes, and snippets.

@pmgreen
Last active November 6, 2024 22:04
Show Gist options
  • Save pmgreen/bf23a3a7b44ef3b8234a4de8a3a06330 to your computer and use it in GitHub Desktop.
Save pmgreen/bf23a3a7b44ef3b8234a4de8a3a06330 to your computer and use it in GitHub Desktop.
require_relative './../lib/ils_sql'
require 'csv'
## Basis is an Analytics report of items in ReCAP with 'X' call numbers
## Queries WorldCat metadata API to return matching records
## and writes out a report for review
## NOTE: the search API returns call numbers as a string without subfields
## so cannot be used for this process
## API scopes: https://gist.github.com/rococodogs/346f333f88519c804d97
def get_wcapi_results(record:)
auth_url = 'https://oauth.oclc.org/token'
auth_conn = alma_conn(auth_url)
authorization = Base64.strict_encode64("#{WORLDCAT_MD_CLIENT_ID}:#{WORLDCAT_MD_CLIENT_SECRET}")
api_url = 'https://metadata.api.oclc.org/worldcat/manage/'
api_conn = alma_conn(api_url)
mms_id = record.keys.first.to_s
oclc = record.values.map { |h| h[:oclc] }
isbn = record.values.map { |h| h[:isbn] }
oclc_responses = {}
oclc_responses[mms_id] = {}
api_authorization = nil
auth_response = auth_conn.post do |req|
req.headers['Accept'] = 'application/json'
req.headers['Authorization'] = "Basic #{authorization}"
req.params['grant_type'] = 'client_credentials'
## see 'scopes' note above
req.params['scope'] = 'WorldCatMetadataAPI'
end
if auth_response.status == 200
token = JSON.parse(auth_response.body)['access_token']
api_authorization = "Bearer #{token}"
end
response = api_conn.get do |req|
oclc.compact!
break unless oclc.size > 0
req.url "bibs/#{oclc.first}"
req.headers['accept'] = 'application/marcxml+xml'
req.headers['Authorization'] = api_authorization
end
puts response.status
exit if response.status == 401
return unless response.status == 200
raw_data = response.body
reader = MARC::XMLReader.new(StringIO.new(raw_data), parser: 'nokogiri', ignore_namespace: true)
reader.each do |record|
oclc_num = record['001'].value
oclc_responses[mms_id][:oclc_nums] = oclc_num
oclc_responses[mms_id][:oclc_lc_callnum] = []
oclc_responses[mms_id][:oclc_lc_callnum_val] = []
next if record['050'].nil?
writer = MARC::XMLWriter.new("#{ROOT_DIR}/output/f050/oclc_records_with_050_to_add_#{oclc_num}.xml")
writer.write(record)
writer.close
f050 = record.fields('050')
f050.each do |field|
## no indicator filter
#oclc_responses[mms_id][:oclc_lc_callnum] << field.to_marchash.to_json
oclc_responses[mms_id][:oclc_lc_callnum_val] << field.value
end
end
oclc_responses
end
WORLDCAT_MD_CLIENT_ID = ENV['WORLDCAT_MD_CLIENT_ID']
WORLDCAT_MD_CLIENT_SECRET = ENV['WORLDCAT_MD_CLIENT_SECRET']
## build a hash from analytics report
analytics_csv = "#{ROOT_DIR}/tmp/recap_needing_050.csv"
analytics_hash = {}
analytics_report = CSV.open(analytics_csv, 'r', headers: true, header_converters: :symbol)
analytics_report.each do |line|
mms_id = line[:mms_id]
analytics_hash[mms_id] = {}
analytics_hash[mms_id][:isbn] = line[:isbn]&.split(';')&.first
analytics_hash[mms_id][:oclc] = line[:oclc] # assumes a single oclc number
end
## check OCLC num against WorldCat
## may want to search ISBN secondarily
lc_callnum_list = []
analytics_hash.each do |mms_id, values|
next unless values[:oclc]
std_no_hash = {}
std_no_hash[mms_id] = {}
std_no_hash[mms_id][:oclc] = values[:oclc]
results = get_wcapi_results(record: std_no_hash)
lc_callnum_list << results if results
end
## write out report for review (prior to enhancing records)
report = CSV.open("#{ROOT_DIR}/output/recap_needing_050_w_callnums.csv", 'w')
report << %w[mms oclc lc_callnum marchash]
lc_callnum_list.each do |hash|
hash.each do |mms_id, values|
report << [
mms_id,
values[:oclc_nums],
array_to_blob(values[:oclc_lc_callnum_val]),
array_to_blob(values[:oclc_lc_callnum])
]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment