Create MARC records from a spreadsheet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xlrd, sys | |
from pymarc import Field, Reader, Record, MARCWriter | |
#script timer | |
import time | |
start_time = time.time() | |
"""Script that reads data from an Excel sheet and converts the records to MARC format. | |
Sample incoming data is in Chinese, UTF 8. Would work for other languages as well. | |
Adjust the findvalue(rownum, x) values to reflect the column numbers in the incoming data. | |
Tutorial for Excel: http://scienceoss.com/read-excel-files-from-python/ | |
Ted Lawless, Sept 2009""" | |
INFILE = sys.argv[1] | |
OUTFILE = sys.argv[2] | |
#open file from stdin | |
wb = xlrd.open_workbook(INFILE) | |
#Excel sheet constants | |
sh = wb.sheet_by_name(u'Sheet1') | |
def findvalue(rownum, cellnum): | |
"""Simple function to read cells from the spreadsheet.""" | |
#Excel converts ISBNS to floats so we have to convert them back to ints | |
if cellnum == 5: | |
try: return str(int(sh.cell(rowx=rownum, colx=cellnum).value)) | |
except: pass | |
return sh.cell(rowx=rownum, colx=cellnum).value | |
#counter for records | |
count = 1 | |
writer = MARCWriter(file(OUTFILE, 'w')) | |
for rownum in range(sh.nrows)[1:]: | |
#store spreadsheet values | |
#adjust these values to reflect the incoming data | |
#eg, 4 below would be 2 if the title were in the second cell. | |
title = findvalue(rownum, 4) | |
author = findvalue(rownum, 7) | |
isbn = findvalue(rownum, 5) | |
imprint = findvalue(rownum, 8) | |
year = str(findvalue(rownum, 9))[0:4] | |
ctrl = findvalue(rownum, 2) | |
#create a marc record instance | |
record = Record() | |
#store vendor catalog id in 001 | |
field = Field(tag='001', data='chi' + ctrl) | |
record.add_field(field) | |
#008 | |
oh8 = list('/' * 39) | |
oh8[35:37] = 'chi' | |
oh8[15:17] = 'cc ' | |
field = Field(tag='008', data="".join(oh8)) | |
record.add_field(field) | |
#isbn | |
field = Field('020', [' ', ' '], ['a', isbn]) | |
record.add_field(field) | |
#author | |
field = Field('100', ['0', ' '], ['a', author]) | |
record.add_field(field) | |
#title | |
field = Field('245', ['1', '0'], ['a', title]) | |
record.add_field(field) | |
#imprint | |
#=260 \\$bEDWARD ELGAR PUBLISHING$c2004 | |
field = Field( | |
tag = '260', | |
indicators = ['\\', '\\'], | |
subfields = [ | |
'b', imprint, | |
'c', year | |
] | |
) | |
record.add_field(field) | |
#leader | |
l = list(record.leader) | |
l[5] = 'n' | |
l[6] = 'a' | |
l[7] = 'm' | |
l[9] = 'a' # flag saying this record is utf8 | |
record.leader = "".join(l) | |
#write the record to file | |
writer.write(record) | |
sys.stderr.write("Record: %s\n%s\n" % (count, str(record))) | |
#counter | |
count += 1 | |
#close file | |
writer.close() | |
run_time = time.time() - start_time | |
sys.stderr.write("%s total records written to %s in %s seconds.\n\n" % (count, OUTFILE, run_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment