Skip to content

Instantly share code, notes, and snippets.

@Beck-Davis
Created February 1, 2024 21:11
Show Gist options
  • Save Beck-Davis/f7d8f42f6b95e1c0dddb2eb30ef0b47e to your computer and use it in GitHub Desktop.
Save Beck-Davis/f7d8f42f6b95e1c0dddb2eb30ef0b47e to your computer and use it in GitHub Desktop.
require_relative './../lib/marc_cleanup'
ISBN13PREFIX = '978'.freeze
def contains_020error_indicator?(field) #regular expression looking for specified phrase
field['c'] =~ /Invalid data in 1st \$a in .* 020/
end
def field_020_error?(record) #check to see if 020$a has non numerical or alphabetical characters
record.fields('020').any? { |field| (field['a'].to_s =~ /^\s*([^\s]+)\s+(\(.*?\))\s*$/) } #regex, block returns true if there are any characters other than 0-9 or X
end
def new_020_q(record) #remove the non numerical strings and append a new 020$q with the string
record.fields('020').each do |f020|
f020.subfields.each do |subfield|
next unless subfield.code == 'a'
isbn_parts = /^\s*([\d\-]+)\s*(\(.*?\))\s*$/.match(subfield.value)
next if isbn_parts.nil?
subfield.value = isbn_parts[1]
f020.append(MARC::Subfield.new('q', isbn_parts[2])) #appends the matching (substring) to a new subfield q
end
end
record
end
### Convert ISBN-10 to ISBN-13
def isbn10_to_13(isbn)
stem = isbn[0..8]
return nil if stem =~ /\D/
existing_check = isbn[9]
return nil if existing_check && existing_check != checkdigit_10(stem)
main = ISBN13PREFIX + stem
checkdigit = checkdigit_13(main)
main + checkdigit
end
### Calculate check digit for ISBN-10
def checkdigit_10(stem)
int_index = 0
int_sum = 0
stem.each_char do |digit|
int_sum += digit.to_i * (10 - int_index)
int_index += 1
end
mod = (11 - (int_sum % 11)) % 11
mod == 10 ? 'X' : mod.to_s
end
### Calculate check digit for ISBN-13
def checkdigit_13(stem)
int_index = 0
int_sum = 0
stem.each_char do |digit|
int_sum += int_index.even? ? digit.to_i : digit.to_i * 3
int_index += 1
end
((10 - (int_sum % 10)) % 10).to_s
end
### Normalize ISBN-13
def isbn13_normalize(raw_isbn)
int_sum = 0
stem = raw_isbn[0..11]
return nil if stem =~ /\D/
int_index = 0
stem.each_char do |digit|
int_sum += int_index.even? ? digit.to_i : digit.to_i * 3
int_index += 1
end
checkdigit = checkdigit_13(stem)
return nil if raw_isbn[12] && raw_isbn[12] != checkdigit
stem + checkdigit
end
### Normalize any given string that is supposed to include an ISBN
def isbn_normalize(isbn)
return nil unless isbn
raw_isbn = isbn.dup
raw_isbn.delete!('-')
raw_isbn.delete!('\\')
raw_isbn.gsub!(/\([^\)]*\)/, '')
raw_isbn.gsub!(/^(.*)\$c.*$/, '\1')
raw_isbn.gsub!(/^(.*)\$q.*$/, '\1')
raw_isbn.gsub!(/^\D+([0-9].*)$/, '\1')
if raw_isbn =~ /^978/
raw_isbn.gsub!(/^(978[0-9 ]+).*$/, '\1')
raw_isbn.delete!(' ')
else
raw_isbn.gsub!(/([0-9])\s*([0-9]{4})\s*([0-9]{4})\s*([0-9xX]).*$/, '\1\2\3\4')
end
raw_isbn.gsub!(/^([0-9]{9,13}[xX]?)[^0-9xX].*$/, '\1')
raw_isbn.gsub!(/^([0-9]+?)\D.*$/, '\1')
if raw_isbn.length > 6 && raw_isbn.length < 9 && raw_isbn =~ /^[0-9]+$/
raw_isbn = raw_isbn.ljust(9, '0')
end
valid_lengths = [9, 10, 12, 13] # ISBN10 and ISBN13 with/out check digits
return nil unless valid_lengths.include? raw_isbn.length
if raw_isbn.length < 12
isbn10_to_13(raw_isbn)
else
isbn13_normalize(raw_isbn)
end
end
def move_invalid_isbn(record)
record.fields('020').each do |f020|
f020.subfields.each do |subfield|
next unless subfield.code == 'a'
isbn = subfield.value
normalized_isbn = isbn_normalize(isbn)
if normalized_isbn
subfield.value = normalized_isbn
else
subfield.code = 'z'
end
end
end
record
end
def cleanup_020(record)
record = new_020_q(record)
record = move_invalid_isbn(record)
record.fields.delete_if { |f| f.tag == '915' && contains_020error_indicator?(f) }
record.fields.delete_if { |f| f.tag == '914' && record['915'].nil? }
record
end
writer = MARC::XMLWriter.new('filepath')
Dir.glob('filepath').each do |file|
puts File.basename(file)
reader = MARC::XMLReader.new(file)
reader.each do |record|
originalrecord = record.to_s.dup
cleanup_020(record)
writer.write(record) if originalrecord != record.to_s
end
end
writer.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment