lawlesst/write_excel_to_marc.py

## write_excel_to_marc.py
#!/usr/bin/env python

import sys
from pymarc import Field, Reader, Record, MARCWriter

import xlrd

#script timer
import time
start_time = time.time()

"""Script that reads data from an Excel sheet and converts the records to MARC
   format.  Adjust the findvalue(rownum, x) values to reflect the column
   numbers in the incoming data.

   Tutorial for reading Excel  with Python:
   http://scienceoss.com/read-excel-files-from-python/

   Ted Lawless <lawlesst at gmail dot com>, Sept 2009"""


INFILE = sys.argv[1]
OUTFILE = sys.argv[2]


#open file from stdin
wb = xlrd.open_workbook(INFILE)
#Excel sheet
sh = wb.sheet_by_name(u'Sheet1')


def findvalue(rownum, cellnum, isbn_cell=5):
	"""Simple function to read cells from the spreadsheet."""
	#Excel converts ISBNS to floats so we have to convert them back to ints
	if cellnum == isbn_cell:
		try: return str(int(sh.cell(rowx=rownum, colx=cellnum).value))
		except: pass
	return sh.cell(rowx=rownum, colx=cellnum).value


#counter for records
count = 1

writer = MARCWriter(file(OUTFILE, 'w'))
for rownum in range(sh.nrows)[1:]:
	#store spreadsheet values
	#adjust these values to reflect the incoming data
	#eg, 4 below would be 2 if the title were in the second cell.
	title = findvalue(rownum, 4)
	author = findvalue(rownum, 7)
	isbn = findvalue(rownum, 5)
	imprint = findvalue(rownum, 8)
	year = str(findvalue(rownum, 9))[0:4]
	ctrl = findvalue(rownum, 2)

	#create a marc record instance
	record = Record()

	#store vendor catalog id in 001
	field = Field(tag='001', data='chi' + ctrl)
	record.add_field(field)

	#008
	oh8 = list('/' * 39)
	oh8[35:37] = 'chi'
	oh8[15:17] = 'cc '
	field = Field(tag='008', data="".join(oh8))
	record.add_field(field)

	#isbn
	field = Field('020', [' ', ' '], ['a', isbn])
	record.add_field(field)

	#author
	field = Field('100', ['0', ' '], ['a', author])
	record.add_field(field)

	#title
	field = Field('245', ['1', '0'], ['a', title])
	record.add_field(field)

	#imprint
	#=260  \\$bEDWARD ELGAR PUBLISHING$c2004
	field = Field(
		tag = '260',
		indicators = ['\\', '\\'],
		subfields = [
			'b', imprint,
			'c', year
			]
		)
	record.add_field(field)

	#leader
	l = list(record.leader)
	l[5] = 'n'
	l[6] = 'a'
	l[7] = 'm'
	l[9] = 'a' # flag saying this record is utf8
	record.leader = "".join(l)

	#write the record to file
	writer.write(record)
	sys.stderr.write("Record: %s\n%s\n" % (count, str(record)))
	#counter
	count += 1

#close file
writer.close()

run_time = time.time() - start_time
sys.stderr.write("%s total records written to %s in %s seconds.\n\n" %\
                (count, OUTFILE, run_time))
	#!/usr/bin/env python

	import sys
	from pymarc import Field, Reader, Record, MARCWriter

	import xlrd

	#script timer
	import time
	start_time = time.time()

	"""Script that reads data from an Excel sheet and converts the records to MARC
	format. Adjust the findvalue(rownum, x) values to reflect the column
	numbers in the incoming data.

	Tutorial for reading Excel with Python:
	http://scienceoss.com/read-excel-files-from-python/

	Ted Lawless <lawlesst at gmail dot com>, Sept 2009"""


	INFILE = sys.argv[1]
	OUTFILE = sys.argv[2]


	#open file from stdin
	wb = xlrd.open_workbook(INFILE)
	#Excel sheet
	sh = wb.sheet_by_name(u'Sheet1')


	def findvalue(rownum, cellnum, isbn_cell=5):
	"""Simple function to read cells from the spreadsheet."""
	#Excel converts ISBNS to floats so we have to convert them back to ints
	if cellnum == isbn_cell:
	try: return str(int(sh.cell(rowx=rownum, colx=cellnum).value))
	except: pass
	return sh.cell(rowx=rownum, colx=cellnum).value


	#counter for records
	count = 1

	writer = MARCWriter(file(OUTFILE, 'w'))
	for rownum in range(sh.nrows)[1:]:
	#store spreadsheet values
	#adjust these values to reflect the incoming data
	#eg, 4 below would be 2 if the title were in the second cell.
	title = findvalue(rownum, 4)
	author = findvalue(rownum, 7)
	isbn = findvalue(rownum, 5)
	imprint = findvalue(rownum, 8)
	year = str(findvalue(rownum, 9))[0:4]
	ctrl = findvalue(rownum, 2)

	#create a marc record instance
	record = Record()

	#store vendor catalog id in 001
	field = Field(tag='001', data='chi' + ctrl)
	record.add_field(field)

	#008
	oh8 = list('/' * 39)
	oh8[35:37] = 'chi'
	oh8[15:17] = 'cc '
	field = Field(tag='008', data="".join(oh8))
	record.add_field(field)

	#isbn
	field = Field('020', [' ', ' '], ['a', isbn])
	record.add_field(field)

	#author
	field = Field('100', ['0', ' '], ['a', author])
	record.add_field(field)

	#title
	field = Field('245', ['1', '0'], ['a', title])
	record.add_field(field)

	#imprint
	#=260 \\$bEDWARD ELGAR PUBLISHING$c2004
	field = Field(
	tag = '260',
	indicators = ['\\', '\\'],
	subfields = [
	'b', imprint,
	'c', year
	]
	)
	record.add_field(field)

	#leader
	l = list(record.leader)
	l[5] = 'n'
	l[6] = 'a'
	l[7] = 'm'
	l[9] = 'a' # flag saying this record is utf8
	record.leader = "".join(l)

	#write the record to file
	writer.write(record)
	sys.stderr.write("Record: %s\n%s\n" % (count, str(record)))
	#counter
	count += 1

	#close file
	writer.close()

	run_time = time.time() - start_time
	sys.stderr.write("%s total records written to %s in %s seconds.\n\n" %\
	(count, OUTFILE, run_time))