|
import xml.etree.ElementTree as ET |
|
from xml.dom.minidom import parseString |
|
import sys, time |
|
|
|
# customer code mappings from the google doc |
|
customerCodes = { |
|
"43": ["Private","XX"], |
|
"209": ["Private","XX"], |
|
"210": ["Private","JS"], |
|
"211": ["Shared","GN"], |
|
"212": ["Shared","JN"], |
|
"213": ["Private","JO"], |
|
"214": ["Shared","NA"], |
|
"215": ["Shared","NB"], |
|
"216": ["Private","ND"], |
|
"217": ["Private","NH"], |
|
"218": ["Private","NL"], |
|
"219": ["Private","NN"], |
|
"220": ["Private","NO"], |
|
"221": ["Private","NP"], |
|
"222": ["Private","NQ"], |
|
"223": ["Private","NR"], |
|
"224": ["Private","NS"], |
|
"225": ["Private","NU"], |
|
"226": ["Private","NV"], |
|
"227": ["Shared","NW"], |
|
"228": ["Private","NX"], |
|
"230": ["Private","NZ"] |
|
} |
|
|
|
# pretty print the XML at the end |
|
pretty_print = lambda data: '\n'.join([line for line in parseString(data).toprettyxml(indent=' '*2).split('\n') if line.strip()]) |
|
|
|
def remove_namespace(doc, namespace): |
|
"""Remove namespace in the passed document in place.""" |
|
ns = u'{%s}' % namespace |
|
nsl = len(ns) |
|
for elem in doc.getiterator(): |
|
if elem.tag.startswith(ns): |
|
elem.tag = elem.tag[nsl:] |
|
|
|
def convert(filename): |
|
|
|
tree = ET.parse(filename) |
|
root = tree.getroot() |
|
|
|
# remove the namespace prefix on all the elements |
|
remove_namespace(root, 'http://www.loc.gov/MARC21/slim') |
|
|
|
|
|
bibRecords = ET.Element("bibRecords") |
|
noCodes = [] |
|
noceCodesIndex = [] |
|
|
|
# loop through each marc:record |
|
for record in root: |
|
|
|
# this will be the new record/holding/items components |
|
newBibRecord = ET.Element('record') |
|
newHolding = False |
|
|
|
newItemsDict = {} |
|
fakeHoldings852TextHoldings = {} |
|
fakeHoldings852Locations = {} |
|
|
|
|
|
item852Data = {} |
|
# gather some values |
|
for aTag in record: |
|
if ('tag' in aTag.attrib and (aTag.attrib['tag'] == '852')): |
|
iNumber = False |
|
for sub in aTag: |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'a': |
|
iNumber = sub.text |
|
item852Data[iNumber] = { 'b' : False, 'h' : False, '3' : False } |
|
# got the item number now |
|
for sub in aTag: |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'b': |
|
item852Data[iNumber]['b'] = sub.text |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'h': |
|
item852Data[iNumber]['h'] = sub.text |
|
if 'code' in sub.attrib and sub.attrib['code'] == '3': |
|
item852Data[iNumber]['3'] = sub.text |
|
|
|
if item852Data[iNumber]['h'] == False: |
|
# try to use the bib call number |
|
for aSubTag in record: |
|
if ('tag' in aSubTag.attrib and (aSubTag.attrib['tag'] == '952')): |
|
for subSub in aSubTag: |
|
if 'code' in subSub.attrib and subSub.attrib['code'] == 'h': |
|
item852Data[iNumber]['h'] = subSub.text |
|
break |
|
|
|
if item852Data[iNumber]['h'] == False: |
|
print("ERROR! No callnumber found for this one:",item852Data[iNumber]) |
|
|
|
if iNumber.find('.i') == -1: |
|
print ("No iNumber",iNumber,iNumber.find('.i')) |
|
print(item852Data) |
|
|
|
item876Data = {} |
|
# gather some values |
|
for aTag in record: |
|
if ('tag' in aTag.attrib and (aTag.attrib['tag'] == '876')): |
|
iNumber = False |
|
for sub in aTag: |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'a': |
|
iNumber = sub.text |
|
item876Data[iNumber] = { 'o' : False, 's' : False, 'y' : False } |
|
# got the item number now |
|
for sub in aTag: |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'o': |
|
item876Data[iNumber]['o'] = str(sub.text) |
|
if 'code' in sub.attrib and sub.attrib['code'] == 's': |
|
item876Data[iNumber]['s'] = str(sub.text) |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'y': |
|
item876Data[iNumber]['y'] = str(sub.text) |
|
|
|
|
|
|
|
# for x in item852Data: |
|
# print(item852Data[x]['h']) |
|
|
|
|
|
holdingsId = False |
|
|
|
for aTag in record: |
|
# the 'holdings' |
|
if ('tag' in aTag.attrib and (aTag.attrib['tag'] == '866')): |
|
for sub in aTag: |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'y': |
|
holdingsId = sub.text |
|
if 'code' in sub.attrib and sub.attrib['code'] == 'a': |
|
new866 = ET.Element('datafield') |
|
new866.attrib['tag'] = "866" |
|
new866.attrib['ind1'] = "0" |
|
new866.attrib['ind2'] = "0" |
|
new866.append(sub) |
|
newHolding = new866 |
|
|
|
|
|
# the items collection |
|
if ('tag' in aTag.attrib and (aTag.attrib['tag'] == '876')): |
|
|
|
|
|
# build the data for this item |
|
item876SubData = {} |
|
for sub in aTag: |
|
if 'code' in sub.attrib: |
|
item876SubData[sub.attrib['code']] = str(sub.text) |
|
|
|
# for convience |
|
iNumber = item876SubData['a'] |
|
|
|
# figure out the logic behind 876$h and 900$a |
|
h876 = False |
|
a900 = False |
|
b900 = False |
|
|
|
|
|
if iNumber in item852Data and iNumber in item876Data: |
|
|
|
if 's' in item876Data[iNumber]: |
|
if (item876Data[iNumber]['s'] in customerCodes): |
|
a900 = customerCodes[item876Data[iNumber]['s']][0] |
|
b900 = customerCodes[item876Data[iNumber]['s']][1] |
|
else: |
|
if item876Data[iNumber]['s'] not in noceCodesIndex: |
|
noCodes.append([item876Data[iNumber]['s'],iNumber]) |
|
noceCodesIndex.append(item876Data[iNumber]['s']) |
|
|
|
# 900$a Shared $b NA & 876$h Circulates |
|
# 852$b Location starts with "rc2" (rc2=not restricted) |
|
# 876$o OPAC Message equal to "2" (2=Advanced Request offsite) |
|
# 876$s Item Agency equal to "214" (214=NA=Shared) |
|
# 876$y Item Type equal to "55" (55=MaRLI) |
|
# ----------------------------- |
|
# 852$b Location starts with "rc2" (rc2=not restricted) |
|
if 'b' in item852Data[iNumber] and item852Data[iNumber]['b'][0:3] == 'rc2': |
|
# 876$o OPAC Message equal to "2" (2=Advanced Request offsite) |
|
if 'o' in item876Data[iNumber] and item876Data[iNumber]['o'] == '2': |
|
# 876$s Item Agency equal to "214" (214=NA=Shared) |
|
if 's' in item876Data[iNumber] and item876Data[iNumber]['s'] == '214': |
|
# 876$y Item Type equal to "55" (55=MaRLI) |
|
if 'y' in item876Data[iNumber] and item876Data[iNumber]['y'] == '55': |
|
h876 = 'Circulates' |
|
a900 = 'Shared' |
|
b900 = 'NA' |
|
|
|
|
|
|
|
# 900$a Shared $b NA & 876$h Supervised use |
|
# 852$b Location starts with "rc2" |
|
# 852$h Call No. All Fields don't have "*q" |
|
# 876$o OPAC Message equal to "u" |
|
# 876$s Item Agency equal to "214" (214=NA=Shared) |
|
# ----------------------------- |
|
# 852$b Location starts with "rc2" |
|
if 'b' in item852Data[iNumber] and item852Data[iNumber]['b'][0:3] == 'rc2': |
|
# 852$h Call No. All Fields don't have "*q" |
|
if 'h' in item852Data[iNumber] and str(item852Data[iNumber]['h']).lower().find('*q') == -1: |
|
# 876$o OPAC Message equal to "u" |
|
if 'o' in item876Data[iNumber] and item876Data[iNumber]['o'] == 'u': |
|
# 876$s Item Agency equal to "214" (214=NA=Shared) |
|
if 's' in item876Data[iNumber] and item876Data[iNumber]['s'] == '214': |
|
h876 = 'Supervised use' |
|
a900 = 'Shared' |
|
b900 = 'NA' |
|
|
|
|
|
# 900$a Shared $b NA & 876$h Use on site |
|
# 852$b Location starts with "rc2" |
|
# 876$o OPAC Message equal to "2" |
|
# 876$s Item Agency equal to "214" |
|
# 876$y Item Type equal to "2" or "3" |
|
# ----------------------------- |
|
# 852$b Location starts with "rc2" |
|
if 'b' in item852Data[iNumber] and item852Data[iNumber]['b'][0:3] == 'rc2': |
|
# 876$o OPAC Message equal to "2" |
|
if 'o' in item876Data[iNumber] and item876Data[iNumber]['o'] == '2': |
|
# 876$s Item Agency equal to "214" |
|
if 's' in item876Data[iNumber] and item876Data[iNumber]['s'] == '214': |
|
# 876$y Item Type equal to "2" or "3" |
|
if 'y' in item876Data[iNumber] and (item876Data[iNumber]['y'] == '2' or item876Data[iNumber]['y'] == '3'): |
|
h876 = 'Use on site' |
|
a900 = 'Shared' |
|
b900 = 'NA' |
|
|
|
|
|
# 900$a Private $b NO & 876$h [blank]? |
|
# 852$b ITEM Location starts with "rc" |
|
# 852$h Call No. All Fields don't have "*o" |
|
# 876$s Item Agency equal to "220" (220=NO=Private) |
|
# 876$y Item Type equal to "2" |
|
# ----------------------------- |
|
# 852$b ITEM Location starts with "rc" |
|
if 'b' in item852Data[iNumber] and item852Data[iNumber]['b'][0:2] == 'rc': |
|
# 852$h Call No. All Fields don't have "*o" |
|
if 'h' in item852Data[iNumber] and str(item852Data[iNumber]['h']).lower().find('*o') == -1: |
|
# 876$s Item Agency equal to "220" (220=NO=Private) |
|
if 's' in item876Data[iNumber] and item876Data[iNumber]['s'] == '220': |
|
# 876$y Item Type equal to "2" |
|
if 'y' in item876Data[iNumber] and item876Data[iNumber]['y'] == '2': |
|
h876 = 'Supervised Use' |
|
a900 = 'Private' |
|
b900 = 'NO' |
|
|
|
|
|
if h876 == False: |
|
# print("Did not fit into User Restriction logic: ",iNumber) |
|
h876 = 'In Library Use' |
|
a900 = 'Private' |
|
|
|
# make this new 876 |
|
new876 = ET.Element('datafield') |
|
new876.attrib['tag'] = "876" |
|
new876.attrib['ind1'] = "0" |
|
new876.attrib['ind2'] = "0" |
|
|
|
# we have this from above logic |
|
if h876: |
|
subfield = ET.SubElement(new876, 'subfield') |
|
subfield.attrib['code'] = 'h' |
|
subfield.text = h876 |
|
|
|
|
|
|
|
if 'a' in item876SubData: |
|
subfield = ET.SubElement(new876, 'subfield') |
|
subfield.attrib['code'] = 'a' |
|
subfield.text = item876SubData['a'] |
|
else: |
|
print(iNumber, "No 876$a") |
|
|
|
if 'j' in item876SubData: |
|
subfield = ET.SubElement(new876, 'subfield') |
|
subfield.attrib['code'] = 'j' |
|
subfield.text = item876SubData['j'] |
|
else: |
|
print(iNumber, "No 876$p") |
|
|
|
if 'p' in item876SubData: |
|
subfield = ET.SubElement(new876, 'subfield') |
|
subfield.attrib['code'] = 'p' |
|
subfield.text = item876SubData['p'] |
|
else: |
|
print(iNumber, "No 876$p") |
|
|
|
if 't' in item876SubData: |
|
subfield = ET.SubElement(new876, 'subfield') |
|
subfield.attrib['code'] = 't' |
|
subfield.text = item876SubData['t'] |
|
else: |
|
print(iNumber, "No 876$t") |
|
|
|
if '3' in item852Data[iNumber]: |
|
subfield = ET.SubElement(new876, 'subfield') |
|
subfield.attrib['code'] = '3' |
|
subfield.text = item852Data[iNumber]['3'] |
|
else: |
|
print(iNumber, "No 876$3") |
|
|
|
# if 'u' in item876SubData: |
|
# subfield = ET.SubElement(new876, 'subfield') |
|
# subfield.attrib['code'] = 'u' |
|
# subfield.text = item876SubData['u'] |
|
|
|
# if 'v' in item876SubData: |
|
# subfield = ET.SubElement(new876, 'subfield') |
|
# subfield.attrib['code'] = 'v' |
|
# subfield.text = item876SubData['v'] |
|
|
|
|
|
# new 900 |
|
new900 = ET.Element('datafield') |
|
new900.attrib['tag'] = "900" |
|
new900.attrib['ind1'] = "0" |
|
new900.attrib['ind2'] = "0" |
|
|
|
if a900: |
|
subfield = ET.SubElement(new900, 'subfield') |
|
subfield.attrib['code'] = 'a' |
|
subfield.text = a900 |
|
|
|
subfield = ET.SubElement(new900, 'subfield') |
|
subfield.attrib['code'] = 'b' |
|
subfield.text = b900 |
|
|
|
# newItemsList.append([new876, new900]) |
|
if str(item852Data[iNumber]['h']) not in newItemsDict: |
|
newItemsDict[str(item852Data[iNumber]['h'])] = [] |
|
|
|
newItemsDict[str(item852Data[iNumber]['h'])].append([new876, new900]) |
|
|
|
|
|
# all the rest of the bib fields |
|
else: |
|
# not the 852 item |
|
if ('tag' in aTag.attrib and (aTag.attrib['tag'] != '852' and aTag.attrib['tag'] != '866')): |
|
newBibRecord.append(aTag) |
|
|
|
# build the holdings textual description if needed |
|
for x in item852Data: |
|
if str(item852Data[x]['h']) not in fakeHoldings852TextHoldings: |
|
fakeHoldings852TextHoldings[str(item852Data[x]['h'])] = [] |
|
if item852Data[x]['3'] != False: |
|
fakeHoldings852TextHoldings[str(item852Data[x]['h'])].append(item852Data[x]['3']) |
|
|
|
if str(item852Data[x]['h']) not in fakeHoldings852Locations: |
|
fakeHoldings852Locations[str(item852Data[x]['h'])] = [] |
|
if item852Data[x]['b'] != False: |
|
fakeHoldings852Locations[str(item852Data[x]['h'])].append(item852Data[x]['b']) |
|
|
|
# now we have all the components for each bibRecord, make that. |
|
# make a new bib |
|
bibRecord = ET.SubElement(bibRecords, 'bibRecord') |
|
bib = ET.SubElement(bibRecord, "bib") |
|
# add in the owningInstitutionId and lastUpdatedDate |
|
owningInstitutionId = ET.Element('owningInstitutionId') |
|
owningInstitutionId.text = 'NYPL' |
|
bib.append(owningInstitutionId) |
|
|
|
|
|
|
|
|
|
bibContent = ET.SubElement(bib, 'content') |
|
bibCollection = ET.SubElement(bibContent, 'collection') |
|
bibCollection.set('xmlns', 'http://www.loc.gov/MARC21/slim') |
|
bibCollection.append(newBibRecord) |
|
|
|
holdings = ET.SubElement(bibRecord, "holdings") |
|
|
|
|
|
if holdingsId: |
|
holding = ET.SubElement(holdings, "holding") |
|
holdingOwningInstitutionHoldingsId = ET.SubElement(holding, "owningInstitutionHoldingsId") |
|
holdingOwningInstitutionHoldingsId.text = holdingsId |
|
holdingContent = ET.SubElement(holding, "content") |
|
holdingCollection = ET.SubElement(holdingContent, "collection") |
|
holdingCollection.set('xmlns', 'http://www.loc.gov/MARC21/slim') |
|
holdingRecord = ET.SubElement(holdingCollection, "record") |
|
|
|
aHolding = list(newItemsDict.keys())[0] |
|
|
|
# create the 852 |
|
fakeHoldingRecord852 = ET.SubElement(holdingRecord,"datafield") |
|
# add in the two fields of the fake holdings record |
|
fakeHoldingRecord852.attrib['tag'] = '852' |
|
fakeHoldingRecord852.attrib['ind1'] = '0' |
|
fakeHoldingRecord852.attrib['ind2'] = '0' |
|
|
|
newHolding852b = ET.SubElement(fakeHoldingRecord852, "subfield") |
|
newHolding852b.attrib['code'] = 'b' |
|
newHolding852b.text = fakeHoldings852Locations[aHolding][0] |
|
|
|
newHolding852h = ET.SubElement(fakeHoldingRecord852, "subfield") |
|
newHolding852h.attrib['code'] = 'h' |
|
newHolding852h.text = aHolding |
|
|
|
# the 866 |
|
if newHolding != False: |
|
holdingRecord.append(newHolding) |
|
else: |
|
holdingsText = [] |
|
for aHolding in newItemsDict: |
|
for x in fakeHoldings852TextHoldings[aHolding]: |
|
holdingsText.append(x) |
|
|
|
# there was no aggergated holdings text in the 866 so make our own |
|
fakeHoldingRecord866 = ET.SubElement(holdingRecord,"datafield") |
|
fakeHoldingRecord866.attrib['tag'] = '866' |
|
fakeHoldingRecord866.attrib['ind1'] = '0' |
|
fakeHoldingRecord866.attrib['ind2'] = '0' |
|
|
|
# we need to create a fake holdings text statement |
|
newHolding866a = ET.SubElement(fakeHoldingRecord866, "subfield") |
|
newHolding866a.attrib['code'] = 'a' |
|
newHolding866a.text = ", ".join(holdingsText) |
|
|
|
|
|
items = ET.SubElement(holding, "items") |
|
itemContent = ET.SubElement(items, "content") |
|
|
|
newItems = ET.SubElement(itemContent,'collection') |
|
newItems.set('xmlns', 'http://www.loc.gov/MARC21/slim') |
|
for aHolding in newItemsDict: |
|
for aItem in newItemsDict[aHolding]: |
|
newitem = ET.Element('record') |
|
newitem.append(aItem[0]) |
|
newitem.append(aItem[1]) |
|
newItems.append(newitem) |
|
|
|
else: |
|
# there is no holdings ID |
|
# add in a holdings for each differnt set of items with call numbers |
|
for aHolding in newItemsDict: |
|
fakeHoldingRecord = ET.Element("record") |
|
|
|
holding = ET.SubElement(holdings, "holding") |
|
holdingOwningInstitutionHoldingsId = ET.SubElement(holding, "owningInstitutionHoldingsId") |
|
holdingContent = ET.SubElement(holding, "content") |
|
holdingCollection = ET.SubElement(holdingContent, "collection") |
|
holdingCollection.set('xmlns', 'http://www.loc.gov/MARC21/slim') |
|
|
|
fakeHoldingRecord852 = ET.Element("datafield") |
|
# add in the two fields of the fake holdings record |
|
fakeHoldingRecord852.attrib['tag'] = '852' |
|
fakeHoldingRecord852.attrib['ind1'] = '0' |
|
fakeHoldingRecord852.attrib['ind2'] = '0' |
|
|
|
newHolding852b = ET.SubElement(fakeHoldingRecord852, "subfield") |
|
newHolding852b.attrib['code'] = 'b' |
|
newHolding852b.text = fakeHoldings852Locations[aHolding][0] |
|
|
|
newHolding852h = ET.SubElement(fakeHoldingRecord852, "subfield") |
|
newHolding852h.attrib['code'] = 'h' |
|
newHolding852h.text = aHolding |
|
|
|
fakeHoldingRecord866 = ET.Element("datafield") |
|
fakeHoldingRecord866.attrib['tag'] = '866' |
|
fakeHoldingRecord866.attrib['ind1'] = '0' |
|
fakeHoldingRecord866.attrib['ind2'] = '0' |
|
|
|
# we need to create a fake holdings text statement |
|
newHolding866a = ET.SubElement(fakeHoldingRecord866, "subfield") |
|
newHolding866a.attrib['code'] = 'a' |
|
newHolding866a.text = ", ".join(fakeHoldings852TextHoldings[aHolding]) |
|
|
|
fakeHoldingRecord.append(fakeHoldingRecord852) |
|
fakeHoldingRecord.append(fakeHoldingRecord866) |
|
holdingCollection.append(fakeHoldingRecord) |
|
|
|
items = ET.SubElement(holding, "items") |
|
itemContent = ET.SubElement(items, "content") |
|
|
|
newItems = ET.SubElement(itemContent,'collection') |
|
newItems.set('xmlns', 'http://www.loc.gov/MARC21/slim') |
|
|
|
for aItem in newItemsDict[aHolding]: |
|
newitem = ET.Element('record') |
|
newitem.append(aItem[0]) |
|
newitem.append(aItem[1]) |
|
newItems.append(newitem) |
|
|
|
|
|
|
|
# |
|
# itemContent.append(newItems) |
|
|
|
if (len(noCodes) > 0): |
|
print("These customer codes were not found in the mapping") |
|
print(noCodes) |
|
|
|
with open(filename + '_converted.xml', 'w') as f: |
|
f.write(pretty_print(ET.tostring(bibRecords, 'utf-8'))) |
|
|
|
if __name__ == '__main__': |
|
|
|
if len(sys.argv) != 2: |
|
print('Pass the name of the file to convert, example: "python3.4 recap_sample_convert.py marcEditOutput.xml"') |
|
|
|
else: |
|
convert(sys.argv[1]) |
|
|
|
|