Skip to content

Instantly share code, notes, and snippets.

@lawlesst
Created July 23, 2011 13:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lawlesst/1101419 to your computer and use it in GitHub Desktop.
Save lawlesst/1101419 to your computer and use it in GitHub Desktop.
Create MARC records from a spreadsheet
import xlrd, sys
from pymarc import Field, Reader, Record, MARCWriter
#script timer
import time
start_time = time.time()
"""Script that reads data from an Excel sheet and converts the records to MARC format.
Sample incoming data is in Chinese, UTF 8. Would work for other languages as well.
Adjust the findvalue(rownum, x) values to reflect the column numbers in the incoming data.
Tutorial for Excel: http://scienceoss.com/read-excel-files-from-python/
Ted Lawless, Sept 2009"""
INFILE = sys.argv[1]
OUTFILE = sys.argv[2]
#open file from stdin
wb = xlrd.open_workbook(INFILE)
#Excel sheet constants
sh = wb.sheet_by_name(u'Sheet1')
def findvalue(rownum, cellnum):
"""Simple function to read cells from the spreadsheet."""
#Excel converts ISBNS to floats so we have to convert them back to ints
if cellnum == 5:
try: return str(int(sh.cell(rowx=rownum, colx=cellnum).value))
except: pass
return sh.cell(rowx=rownum, colx=cellnum).value
#counter for records
count = 1
writer = MARCWriter(file(OUTFILE, 'w'))
for rownum in range(sh.nrows)[1:]:
#store spreadsheet values
#adjust these values to reflect the incoming data
#eg, 4 below would be 2 if the title were in the second cell.
title = findvalue(rownum, 4)
author = findvalue(rownum, 7)
isbn = findvalue(rownum, 5)
imprint = findvalue(rownum, 8)
year = str(findvalue(rownum, 9))[0:4]
ctrl = findvalue(rownum, 2)
#create a marc record instance
record = Record()
#store vendor catalog id in 001
field = Field(tag='001', data='chi' + ctrl)
record.add_field(field)
#008
oh8 = list('/' * 39)
oh8[35:37] = 'chi'
oh8[15:17] = 'cc '
field = Field(tag='008', data="".join(oh8))
record.add_field(field)
#isbn
field = Field('020', [' ', ' '], ['a', isbn])
record.add_field(field)
#author
field = Field('100', ['0', ' '], ['a', author])
record.add_field(field)
#title
field = Field('245', ['1', '0'], ['a', title])
record.add_field(field)
#imprint
#=260 \\$bEDWARD ELGAR PUBLISHING$c2004
field = Field(
tag = '260',
indicators = ['\\', '\\'],
subfields = [
'b', imprint,
'c', year
]
)
record.add_field(field)
#leader
l = list(record.leader)
l[5] = 'n'
l[6] = 'a'
l[7] = 'm'
l[9] = 'a' # flag saying this record is utf8
record.leader = "".join(l)
#write the record to file
writer.write(record)
sys.stderr.write("Record: %s\n%s\n" % (count, str(record)))
#counter
count += 1
#close file
writer.close()
run_time = time.time() - start_time
sys.stderr.write("%s total records written to %s in %s seconds.\n\n" % (count, OUTFILE, run_time))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment