Created
October 15, 2010 00:54
-
-
Save lawlesst/627394 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
from pymarc import Field, Reader, Record, MARCWriter | |
import xlrd | |
#script timer | |
import time | |
start_time = time.time() | |
"""Script that reads data from an Excel sheet and converts the records to MARC | |
format. Adjust the findvalue(rownum, x) values to reflect the column | |
numbers in the incoming data. | |
Tutorial for reading Excel with Python: | |
http://scienceoss.com/read-excel-files-from-python/ | |
Ted Lawless <lawlesst at gmail dot com>, Sept 2009""" | |
INFILE = sys.argv[1] | |
OUTFILE = sys.argv[2] | |
#open file from stdin | |
wb = xlrd.open_workbook(INFILE) | |
#Excel sheet | |
sh = wb.sheet_by_name(u'Sheet1') | |
def findvalue(rownum, cellnum, isbn_cell=5): | |
"""Simple function to read cells from the spreadsheet.""" | |
#Excel converts ISBNS to floats so we have to convert them back to ints | |
if cellnum == isbn_cell: | |
try: return str(int(sh.cell(rowx=rownum, colx=cellnum).value)) | |
except: pass | |
return sh.cell(rowx=rownum, colx=cellnum).value | |
#counter for records | |
count = 1 | |
writer = MARCWriter(file(OUTFILE, 'w')) | |
for rownum in range(sh.nrows)[1:]: | |
#store spreadsheet values | |
#adjust these values to reflect the incoming data | |
#eg, 4 below would be 2 if the title were in the second cell. | |
title = findvalue(rownum, 4) | |
author = findvalue(rownum, 7) | |
isbn = findvalue(rownum, 5) | |
imprint = findvalue(rownum, 8) | |
year = str(findvalue(rownum, 9))[0:4] | |
ctrl = findvalue(rownum, 2) | |
#create a marc record instance | |
record = Record() | |
#store vendor catalog id in 001 | |
field = Field(tag='001', data='chi' + ctrl) | |
record.add_field(field) | |
#008 | |
oh8 = list('/' * 39) | |
oh8[35:37] = 'chi' | |
oh8[15:17] = 'cc ' | |
field = Field(tag='008', data="".join(oh8)) | |
record.add_field(field) | |
#isbn | |
field = Field('020', [' ', ' '], ['a', isbn]) | |
record.add_field(field) | |
#author | |
field = Field('100', ['0', ' '], ['a', author]) | |
record.add_field(field) | |
#title | |
field = Field('245', ['1', '0'], ['a', title]) | |
record.add_field(field) | |
#imprint | |
#=260 \\$bEDWARD ELGAR PUBLISHING$c2004 | |
field = Field( | |
tag = '260', | |
indicators = ['\\', '\\'], | |
subfields = [ | |
'b', imprint, | |
'c', year | |
] | |
) | |
record.add_field(field) | |
#leader | |
l = list(record.leader) | |
l[5] = 'n' | |
l[6] = 'a' | |
l[7] = 'm' | |
l[9] = 'a' # flag saying this record is utf8 | |
record.leader = "".join(l) | |
#write the record to file | |
writer.write(record) | |
sys.stderr.write("Record: %s\n%s\n" % (count, str(record))) | |
#counter | |
count += 1 | |
#close file | |
writer.close() | |
run_time = time.time() - start_time | |
sys.stderr.write("%s total records written to %s in %s seconds.\n\n" %\ | |
(count, OUTFILE, run_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment