Skip to content

Instantly share code, notes, and snippets.

@vphill
Created September 3, 2013 21:38
Show Gist options
  • Save vphill/6429962 to your computer and use it in GitHub Desktop.
Save vphill/6429962 to your computer and use it in GitHub Desktop.
Mapping a 115 column spreadsheet in 115 lines... (I skip some unneeded fields)
def processRecord( RecordClass, row ):
language_dict = {
"en": "eng",
"fr": "fre",
"eng": "eng"
}
record = RecordClass("mphillips")
#Creator
record.map("agent", "creator", row["dc.contributor.creator[]"], required=False, split="||")
record.map("agent", "creator", row["dc.contributor.creator[en]"], required=False, split="||")
record.map("agent", "creator", row["dc.contributor.creator[en_US]"], required=False, split="||")
#Author
record.map("agent", "creator", row["dc.contributor.author[En]"], qualifier="aut", required=False, split="||")
record.map("agent", "creator", row["dc.contributor.author[en]"], qualifier="aut", required=False, split="||")
record.map("agent", "creator", row["dc.contributor.author[en_US]"], qualifier="aut", required=False, split="||")
#dc.coverage.spatial - map to subjects (most are LCSH)
coverage_fields = [
"dc.coverage.spatial[En]",
"dc.coverage.spatial[en]",
"dc.coverage.spatial[en_US]",
"dc.coverage.spatial[ev]",
]
for field in coverage_fields:
for instance in row[field].split("||"):
if instance.lower().find("scale") != -1:
record.map("basic", "description", instance, qualifier="content", required=False)
elif instance.find("--") != -1:
record.map("basic", "subject", instance, qualifier="LCSH", required=False,
function=(lambda x: x.replace("--"," -- ")))
else:
record.map("basic", "subject", instance, required=False)
#dc.date
record.map("basic", "date", row["dc.date.copyright[en]"], qualifier="creation", required=False, split="||")
record.map("basic", "date", row["dc.date.issued[En]"], qualifier="creation", required=False, split="||")
record.map("basic", "date", row["dc.date.issued[]"], qualifier="creation", required=False, split="||")
record.map("basic", "date", row["dc.date.issued[en]"], qualifier="creation", required=False, split="||")
record.map("basic", "date", row["dc.date.issued[en_US]"], qualifier="creation", required=False, split="||")
record.map("basic", "date", row["dc.unknown.date[en]"], qualifier="creation", required=False, split="||")
#dc.description - these really are notes
record.map("basic", "note", row["dc.description[]"], qualifier="display", required=False, split="||")
record.map("basic", "note", row["dc.description[en]"], qualifier="display", required=False, split="||")
#dc.edition
record.map("basic", "note", row["dc.edition[en]"], qualifier="display", required=False, split="||")
#dc.editor - Do not map
#dc.format - map to description.physical
record.map("basic", "description", row["dc.format[]"], qualifier="physical", required=False, split="||")
record.map("basic", "description", row["dc.format[en]"], qualifier="physical", required=False, split="||")
record.map("basic", "description", row["dc.format[en_US]"], qualifier="physical", required=False, split="||")
#dc.general.note
record.map("basic", "note", row["dc.general.note[En]"], qualifier="display", required=False, split="||")
record.map("basic", "note", row["dc.general.note[en]"], qualifier="display", required=False, split="||")
record.map("basic", "note", row["dc.general.note[en_US]"], qualifier="display", required=False, split="||")
#dc.granted
record.map("basic", "date", row["dc.granted.date[en]"], required=False, split="||")
record.map("basic", "note", row["dc.granted.note[en]"], qualifier="display", required=False, split="||")
#dc.identifier (skip object, and unqualified identifier)
record.map("basic", "identifier", row["dc.identifier.oclc[]"], qualifier="OCLC", required=False, split="||")
record.map("basic", "identifier", row["dc.identifier.oclc[en]"], qualifier="OCLC", required=False, split="||")
record.map("basic", "identifier", row["dc.identifier.olc[]"], qualifier="OTHER", required=False, split="||")
record.map("basic", "identifier", row["dc.identifier.olc[en]"], qualifier="OTHER", required=False, split="||")
record.map("basic", "identifier", row["dc.identifier.olc[en_US]"], qualifier="OTHER", required=False, split="||")
record.map("basic", "identifier", row["dc.identifier.uri"], qualifier="LOCAL-CONT-NO", required=False, split="||")
record.map("basic", "identifier", row["dc.identifier.uri[]"], qualifier="LOCAL-CONT-NO", required=False, split="||")
#dc.language
record.map("basic", "language", language_dict.get(row["dc.language.iso[en_US]"], ""), required=False, split="||")
record.map("basic", "language", language_dict.get(row["dc.language[en]"], ""), required=False, split="||")
#dc.original (map to identifier?)
record.map("basic", "identifier", row["dc.original.name[]"], qualifier="LOCAL-CONT-NO", required=False, split="||")
record.map("basic", "identifier", row["dc.original.name[en_US]"], qualifier="LOCAL-CONT-NO", required=False, split="||")
#dc.publisher
record.map("agent", "publisher", row["dc.publisher[en]"], location=row["dc.publisher.location[en]"], required=False, split="||")
record.map("agent", "publisher", row["dc.publisher[en_US]"], location=row["dc.publisher.location[en]"], required=False, split="||")
#dc.relation
record.map("basic", "relation", row["dc.relation[en_US]"], required=False, split="||")
#dc.subject
record.map("basic", "subject", row["dc.subject.name[]"], qualifier="KWD", required=False, split="||")
record.map("basic", "subject", row["dc.subject.name[en]"], qualifier="KWD", required=False, split="||")
record.map("basic", "subject", row["dc.subject.name[en_US]"], qualifier="KWD", required=False, split="||")
record.map("basic", "subject", row["dc.subject.title[en]"], qualifier="KWD", required=False, split="||")
record.map("basic", "subject", row["dc.subject[en]"], qualifier="KWD", required=False, split="||")
record.map("basic", "subject", row["dc.subject[en_US]"], qualifier="KWD", required=False, split="||")
#dc.title
record.map("basic", "title", row["dc.title[en_US]"], qualifier="officialtitle", required=False, split="||")
record.map("basic", "title", row["dc.title[En]"], qualifier="officialtitle", required=False, split="||")
record.map("basic", "title", row["dc.title[en]"], qualifier="officialtitle", required=False, split="||")
record.map("basic", "title", row["dc.title.alternative[en]"], qualifier="alternatetitle", required=False, split="||")
record.map("basic", "title", row["dc.title.alternative[en_US]"], qualifier="alternatetitle", required=False, split="||")
record.map("basic", "title", row["dc.title.series[en_US]"], qualifier="seriestitle", required=False, split="||")
record.map("basic", "resourceType", "image_map")
record.map("basic", "format", "image")
record.map("basic", "primarySource", "1")
record.map("basic", "institution", "HSUL")
record.map("basic", "collection", "HardinSimmonsMapCollection")
return record
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment