Created
September 27, 2013 15:15
-
-
Save billdueber/6730218 to your computer and use it in GitHub Desktop.
partial implementation of MarcExtractor specset and spec objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Traject | |
class MarcExtractor | |
# A set of Spec object, with knowlege about the collection as a whole | |
class SpecSet | |
attr_reader :interesting_tags, :options | |
def initialize(opts = {}) | |
@specs = {} | |
# Tags are "interesting" if we have a spec that might cover it | |
@interesting_tags_hash = {} | |
@interesting_tags = [] | |
self.set_options(opts) | |
end | |
def set_options(opts) | |
@options = opts | |
# If we *are* interested in alternate scripts, add the 880 | |
# Otherwise, take it out | |
if @options[:alternate_script] != false | |
@interesting_tags_hash['880'] = true | |
else | |
@interesting_tags_hash.delete('880') | |
end | |
# We may have modified @interesting_tags_hash | |
@interesting_tags = @interesting_tags_hash.keys | |
return self | |
end | |
def add_spec(spec) | |
if @specs[spec.tag] | |
raise ArgumentError.new("MARC tag #{spec.tag} specified twice in the same spec. Use two calls to to_field if you need that") | |
end | |
@interesting_tags_hash = {} | |
# By default, interesting tags are those represented by keys in spec_hash. | |
# Add them unless we only care about alternate scripts (i.e., we *only* are about 880s) | |
unless @options[:alternate_script] == :only | |
@interesting_tags_hash[spec.tag] = true | |
end | |
@specs[spec.tag] = spec | |
# We may have modified @interesting_tags_hash | |
@interesting_tags = @interesting_tags_hash.keys | |
return self | |
end | |
def interesting_tag?(tag) | |
@interesting_tags_hash.has_key?(tag) | |
end | |
def spec_covering_field(field) | |
tag = field.tag | |
# Short-circuit the unintersting stuff | |
return nil unless interesting_tag?(tag) | |
# Due to bug in jruby https://github.com/jruby/jruby/issues/886 , we need | |
# to do this weird encode gymnastics, which fixes it for mysterious reasons. | |
if tag == "880" && field['6'] | |
tag = field["6"].encode(field["6"].encoding).byteslice(0,3) | |
end | |
# Take the resulting tag and get the spec for it (or the default nil if there isn't a spec for this tag) | |
@specs[tag] | |
end | |
end | |
class GenericSpec | |
attr_accessor :tag | |
def initialize(tag) | |
@tag = tag | |
end | |
end | |
class ControlSpec < GenericSpec | |
attr_reader :bytes | |
def initialize(tag, byte1, byte2) | |
super(tag) | |
self.set_bytes(byte1, byte2) | |
end | |
def matches_indicators(field) | |
true | |
end | |
def set_bytes(byte1, byte2=nil) | |
if byte1 && byte2 | |
@bytes = ((byte1.to_i)..(byte2.to_i)) | |
elsif byte1 | |
@bytes = byte1.to_i | |
end | |
# Set up the correct #value method | |
if @bytes | |
define_singleton_method(:value, self.class.instance_method(:byte_value)) | |
else | |
define_singleton_method(:value, self.class.instance_method(:whole_value)) | |
end | |
end | |
def byte_value(field) | |
[field.value.byteslice(@bytes)] | |
end | |
def whole_value(field) | |
[field.value] | |
end | |
# Unless/until self.set_bytes is called, value is the whole value | |
alias_method :values, :whole_value | |
end | |
class DataSpec < GenericSpec | |
attr_reader :subfields | |
def initialize(tag, indicators, subfields) | |
super(tag) | |
@subfields = {} | |
self.set_indicators(indicators) | |
subfields.split('').each do |code| | |
self.add_subfield(code) | |
end | |
end | |
def set_indicators(two_char_ind_string) | |
two_char_ind_string ||= '' | |
i1 = two_char_ind_string[0] | |
i2 = two_char_ind_string[1] | |
@ind1 = i1 unless i1 == '*' | |
@ind2 = i2 unless i2 == '*' | |
if (@ind1 || @ind2) | |
define_singleton_method(:matches_indicators, self.class.instance_method(:sometimes_matches_indicators)) | |
else | |
define_singleton_method(:matches_indicators, self.class.instance_method(:always_matches_indicators)) | |
end | |
end | |
def always_matches_indicators(field) | |
true | |
end | |
def sometimes_matches_indicators(field) | |
([field.indicator1, nil].include? @ind1) && | |
([field.indicator2, nil].include? @ind2) | |
end | |
def add_subfield(code) | |
@subfields[code] = true | |
end | |
def values(field) | |
if @subfields | |
vals = field.subfields.collect {|sf| sf.value if @subfields.has_key?(sf.code) } | |
else | |
vals = field.subfields.map {|sf| sf.value} | |
end | |
return vals | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment