Skip to content

Instantly share code, notes, and snippets.

@billdueber
Created September 27, 2013 15:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save billdueber/6730218 to your computer and use it in GitHub Desktop.
Save billdueber/6730218 to your computer and use it in GitHub Desktop.
partial implementation of MarcExtractor specset and spec objects
module Traject
class MarcExtractor
# A set of Spec object, with knowlege about the collection as a whole
class SpecSet
attr_reader :interesting_tags, :options
def initialize(opts = {})
@specs = {}
# Tags are "interesting" if we have a spec that might cover it
@interesting_tags_hash = {}
@interesting_tags = []
self.set_options(opts)
end
def set_options(opts)
@options = opts
# If we *are* interested in alternate scripts, add the 880
# Otherwise, take it out
if @options[:alternate_script] != false
@interesting_tags_hash['880'] = true
else
@interesting_tags_hash.delete('880')
end
# We may have modified @interesting_tags_hash
@interesting_tags = @interesting_tags_hash.keys
return self
end
def add_spec(spec)
if @specs[spec.tag]
raise ArgumentError.new("MARC tag #{spec.tag} specified twice in the same spec. Use two calls to to_field if you need that")
end
@interesting_tags_hash = {}
# By default, interesting tags are those represented by keys in spec_hash.
# Add them unless we only care about alternate scripts (i.e., we *only* are about 880s)
unless @options[:alternate_script] == :only
@interesting_tags_hash[spec.tag] = true
end
@specs[spec.tag] = spec
# We may have modified @interesting_tags_hash
@interesting_tags = @interesting_tags_hash.keys
return self
end
def interesting_tag?(tag)
@interesting_tags_hash.has_key?(tag)
end
def spec_covering_field(field)
tag = field.tag
# Short-circuit the unintersting stuff
return nil unless interesting_tag?(tag)
# Due to bug in jruby https://github.com/jruby/jruby/issues/886 , we need
# to do this weird encode gymnastics, which fixes it for mysterious reasons.
if tag == "880" && field['6']
tag = field["6"].encode(field["6"].encoding).byteslice(0,3)
end
# Take the resulting tag and get the spec for it (or the default nil if there isn't a spec for this tag)
@specs[tag]
end
end
class GenericSpec
attr_accessor :tag
def initialize(tag)
@tag = tag
end
end
class ControlSpec < GenericSpec
attr_reader :bytes
def initialize(tag, byte1, byte2)
super(tag)
self.set_bytes(byte1, byte2)
end
def matches_indicators(field)
true
end
def set_bytes(byte1, byte2=nil)
if byte1 && byte2
@bytes = ((byte1.to_i)..(byte2.to_i))
elsif byte1
@bytes = byte1.to_i
end
# Set up the correct #value method
if @bytes
define_singleton_method(:value, self.class.instance_method(:byte_value))
else
define_singleton_method(:value, self.class.instance_method(:whole_value))
end
end
def byte_value(field)
[field.value.byteslice(@bytes)]
end
def whole_value(field)
[field.value]
end
# Unless/until self.set_bytes is called, value is the whole value
alias_method :values, :whole_value
end
class DataSpec < GenericSpec
attr_reader :subfields
def initialize(tag, indicators, subfields)
super(tag)
@subfields = {}
self.set_indicators(indicators)
subfields.split('').each do |code|
self.add_subfield(code)
end
end
def set_indicators(two_char_ind_string)
two_char_ind_string ||= ''
i1 = two_char_ind_string[0]
i2 = two_char_ind_string[1]
@ind1 = i1 unless i1 == '*'
@ind2 = i2 unless i2 == '*'
if (@ind1 || @ind2)
define_singleton_method(:matches_indicators, self.class.instance_method(:sometimes_matches_indicators))
else
define_singleton_method(:matches_indicators, self.class.instance_method(:always_matches_indicators))
end
end
def always_matches_indicators(field)
true
end
def sometimes_matches_indicators(field)
([field.indicator1, nil].include? @ind1) &&
([field.indicator2, nil].include? @ind2)
end
def add_subfield(code)
@subfields[code] = true
end
def values(field)
if @subfields
vals = field.subfields.collect {|sf| sf.value if @subfields.has_key?(sf.code) }
else
vals = field.subfields.map {|sf| sf.value}
end
return vals
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment