Skip to content

Instantly share code, notes, and snippets.

@lawlesst
Created October 15, 2010 00:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lawlesst/627394 to your computer and use it in GitHub Desktop.
Save lawlesst/627394 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
from pymarc import Field, Reader, Record, MARCWriter
import xlrd
#script timer
import time
start_time = time.time()
"""Script that reads data from an Excel sheet and converts the records to MARC
format. Adjust the findvalue(rownum, x) values to reflect the column
numbers in the incoming data.
Tutorial for reading Excel with Python:
http://scienceoss.com/read-excel-files-from-python/
Ted Lawless <lawlesst at gmail dot com>, Sept 2009"""
INFILE = sys.argv[1]
OUTFILE = sys.argv[2]
#open file from stdin
wb = xlrd.open_workbook(INFILE)
#Excel sheet
sh = wb.sheet_by_name(u'Sheet1')
def findvalue(rownum, cellnum, isbn_cell=5):
"""Simple function to read cells from the spreadsheet."""
#Excel converts ISBNS to floats so we have to convert them back to ints
if cellnum == isbn_cell:
try: return str(int(sh.cell(rowx=rownum, colx=cellnum).value))
except: pass
return sh.cell(rowx=rownum, colx=cellnum).value
#counter for records
count = 1
writer = MARCWriter(file(OUTFILE, 'w'))
for rownum in range(sh.nrows)[1:]:
#store spreadsheet values
#adjust these values to reflect the incoming data
#eg, 4 below would be 2 if the title were in the second cell.
title = findvalue(rownum, 4)
author = findvalue(rownum, 7)
isbn = findvalue(rownum, 5)
imprint = findvalue(rownum, 8)
year = str(findvalue(rownum, 9))[0:4]
ctrl = findvalue(rownum, 2)
#create a marc record instance
record = Record()
#store vendor catalog id in 001
field = Field(tag='001', data='chi' + ctrl)
record.add_field(field)
#008
oh8 = list('/' * 39)
oh8[35:37] = 'chi'
oh8[15:17] = 'cc '
field = Field(tag='008', data="".join(oh8))
record.add_field(field)
#isbn
field = Field('020', [' ', ' '], ['a', isbn])
record.add_field(field)
#author
field = Field('100', ['0', ' '], ['a', author])
record.add_field(field)
#title
field = Field('245', ['1', '0'], ['a', title])
record.add_field(field)
#imprint
#=260 \\$bEDWARD ELGAR PUBLISHING$c2004
field = Field(
tag = '260',
indicators = ['\\', '\\'],
subfields = [
'b', imprint,
'c', year
]
)
record.add_field(field)
#leader
l = list(record.leader)
l[5] = 'n'
l[6] = 'a'
l[7] = 'm'
l[9] = 'a' # flag saying this record is utf8
record.leader = "".join(l)
#write the record to file
writer.write(record)
sys.stderr.write("Record: %s\n%s\n" % (count, str(record)))
#counter
count += 1
#close file
writer.close()
run_time = time.time() - start_time
sys.stderr.write("%s total records written to %s in %s seconds.\n\n" %\
(count, OUTFILE, run_time))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment