Skip to content

Instantly share code, notes, and snippets.

@anarchivist
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anarchivist/3c6acd513f744ebbcb19 to your computer and use it in GitHub Desktop.
Save anarchivist/3c6acd513f744ebbcb19 to your computer and use it in GitHub Desktop.
Krikri mapper DSL mockup
# Note: this is all pseudocode to mock up a DSL at this point.
# THIS IS A VERY INCOMPLETE EXAMPLE; much work to do for mods.
bpl_mods_mapper = Krikri::Mapper.new
bpl_mods_mapper.build do
input_format :xml # :json, :csv, :tsv others?
selector type: :xpath, "//record"
# select an XPath relative to `selector` as the default parent for mappings
default_parent xpath: "metadata/mods:mods"
aggregation do
provider do
# Set "literals" and "identifiers" just as strings
prefLabel "Digital Commonwealth"
providedLabel prefLabel # refer to a previously set value
uri "http://jux/wuz"
end
# specify a particular XML element via XPath
dataProvider xpath: "mods:location/mods:physicalLocation"
# specify using XPath when attributes have specific values
isShownAt xpath: "mods:location/mods:url[@usage='primary' and @access='object in context']"
preview xpath: "mods:location/mods:url[@access='preview']"
aggregatedCHO do
collection do
title xpath: "dc:collection"
# run a function based on a parsed value
uri generate_opaque_uri(xpath: selector + "/header/setspec")
end
multiple_map(:contributor, :creator) do |record|
:creator = record.xpath("mods:name[mods:role/mods:roleTerm='creator']")
:contributor = record.xpath("mods:name")
if :creator.nil?
:creator = :contributor[0]
:contributor = :contributor[1..-1]
end
:contributor.delete(:creator) if :creator in :contributor
# remove <affiliation>, <displayForm>, <description>, and <role>.
end
date do
providedLabel xpath: "mods:originInfo/*[self::dateCreated|self::dateIssued|self::dateOther|self::copyrightDate][@encoding='w3cdtf' and @keyDate='yes']"
end
description xpath: "*[self::abstract|self::note]"
extent xpath: "mods:physicalDescription/mods:extent"
format xpath: "mods:genre"
identifier do |record|
id = record.xpath("mods:identifier")
id_value = id.xpath("text()")
id_type = id.xpath("@type").titleize
return "#{id_type}: #{id_value}"
end
language do
providedLabel xpath: "mods:language/mods:languageTerm"
uri xpath: "mods:language/mods:languageTerm[@valueURI]"
end
publisher xpath: "mods:originInfo/mods:place/mods:placeTerm[@type='text']|mods:originInfo/mods:publisher"
# relation do
# <mods:location><mods:physicalLocation>
# CONCATENATED with ". "
# <mods:location><mods:holdingSimple><mods:copyInformation><mods:subLocation>
# CONCATENATED with ". "
# <mods:relatedItem type="host"><mods:titleInfo><mods:title>
# CONCATENATED with ". "
# <mods:relatedItem type=""series""><mods:titleInfo><mods:title>
### (e.g., Boston Public Library. Leslie Jones photograph collection)
# end
rights xpath: "mods:accessCondition"
#spatial do
# <mods:subject><mods:hierarchicalGeographic>
# <mods:subject><mods:geographic>
# <mods:subject><mods:cartographics><mods:coordinates>
#end
subject do
providedLabel xpath: "mods:subject/*[not(self::hierarchicalGeographic|self::geographic|self::cartographics)]"
end
temporal do
providedLabel xpath: "mods:subject/mods:temporal"
end
# "<titleInfo> has two subelements:
# <title> <nonSort> AND <subTitle>
# (<partNumber> and <partName> are not currently supported).
# Supported title types are:
# <mods:titleInfo usage=""primary"">,
# <mods:titleInfo type=""alternative"">,
# <mods:titleInfo type=""translated"">,
# <mods:titleInfo type=""uniform"">
title xpath: "mods:titleInfo/mods:title/mods:nonSort|mods:titleInfo/mods:subTitle"
type xpath: "mods:typeOfResource"
end
end
end
# Note: this is all pseudocode to mock up a DSL at this point.
mdl_oai_qdc_mapper = Krikri::Mapper.new
mdl_oai_qdc_mapper.build do
input_format :xml # :json, :csv, :tsv others?
selector type: :xpath, "//record"
# select an XPath relative to `selector` as the default parent for mappings
default_parent xpath: "metadata/oai_qdc:qualifieddc"
aggregation do
provider do
# Set "literals" and "identifiers" just as strings
prefLabel "Minnesota Digital Library"
providedLabel prefLabel # refer to a previously set value
uri "http://foo/bar"
end
# specify a particular XML element via XPath
dataProvider xpath: "dc:publisher"
# select the last dc:identifier
isShownAt xpath: "dc:identifier[last()]"
# Use a function defined elsewhere (but where?)
object contentdm_map_object(isShownAt)
# Use an inline function passed as a block; "with" ensures
# that `preview` will only get set if `isShownAt` is not nil
preview with: isShownAt do
if isShownAt.include? "cdm/ref"
isShownAt.gsub("cdm/ref", "utils/getthumbnail")
elsif if isShownAt.include? "u?"
url_parts = isShownAt.split "u?"
base_url = url_parts[0]
rest = url_parts[1].split ","
root = rest[0]
pointer = rest[1]
"#{base_url}cgi-bin/thumbnail.exe?CISOROOT=#{root}&CISOPTR=#{pointer}"
else
nil
end
end
aggregatedCHO do
collection do
title xpath: "dc:collection"
# run a function based on a parsed value
uri generate_opaque_uri(xpath: selector + "/header/setspec")
end
contributor do
providedLabel xpath: "dc:contributor"
end
creator do
providedLabel xpath: "dc:creator"
end
date do
providedLabel xpath: "dc:created"
end
description xpath: "dc:description"
extent xpath: "dc:extent"
format xpath: "dc:medium"
genre do
providedLabel xpath: "dcterms:format"
end
identifier xpath: "dc:identifier"
language do
providedLabel xpath: "dc:language"
end
publisher xpath: "dc:source"
relation xpath: "dc:isPartOf"
#isReplacedBy
#replaces
rights xpath: "dc:rights"
spatial do
providedLabel xpath: "dcterms:spatial"
end
subject do
# split on semicolons? here or later?
providedLabel xpath: "dc:subject"
end
temporal do
providedLabel xpath: "dcterms:temporal"
end
title xpath: "dc:title"
type map_type_from_literal(xpath: "dc:type")
end
end
end
# Note: this is all pseudocode to mock up a DSL at this point.
uiuc_marcxml_mapper = Krikri::Mapper.new
uiuc_marcxml_mapper.build do
input_format :xml # :json, :csv, :tsv others?
selector type: :xpath, "//record/metadata/record"
# could expose convenience methods or other tools that assist with mapping
# e.g. marcspec: http://cklee.github.io/marc-spec/marc-spec.html
mapper_helper :traject
aggregation do
provider do
# Set "literals" and "identifiers" just as strings
prefLabel "University of Illinois Urbana Champaign, University Library"
providedLabel prefLabel # refer to a previously set value
uri "http://baz/quux"
end
# specify a particular XML element via XPath
dataProvider provider.prefLabel
isShownAt traject: extract_marc("856u")
aggregatedCHO do
contributor do
# TODO: exclude 720 when $e is "aut" or "cre"
providedLabel traject: "700:710:711:720" do |fld|
if fld.tag == "720" && (fld['e'] == "aut" || fld['e'] == "cre")
nil
else
extract_marc(fld)
end
end
end
creator do
providedLabel traject: extract("100:110:111")
end
date do
providedLabel traject: extract_marc("260c")
end
# all 5xx fields, except 538
description traject: "5.." do |fld| # note: this is a marcspecism
extract_marc(fld) unless fld.tag == "538"
end
extent traject: extract_marc("300ac:340b")
# again, this is taking marcspec+traject syntax just as a demonstration
format traject: "LDR:007:337:338:340" do |fld|
if fld.tag[0] == "3"
extract(fld['a'])
elsif field.tag == "LDR"
# return LDR/6
fld[6]
else
# Return 007/0
fld[0]
end
end
#genre do
# providedLabel #external mapping
#end
identifier # 001; 020 [prefix ="ISBN: "]; 022$a [prefix ="ISSN: "]; 035$a; 050$a$b [prefix ="LC call number: "]
language do
providedLabel # 041$a [$2 ids source, i.e. iso689-1]; OR 008 (positions 35-37)
end
publisher traject: extract_marc("260ab")
relation # 760-787
#isReplacedBy
#replaces
rights traject: extract_marc("506:540")
spatial do
providedLabel traject: extract_marc("650z:651a:662")
end
subject do
providedLabel traject: extract_marc("600:61.:650:651:653:654:655:656:657:658:69.")
end
temporal do
providedLabel traject: extract_marc("648")
end
title traject: extract_marc("245:242:240") # don't use 245c
type traject: extract_marc("337a")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment