Created
December 18, 2009 04:07
-
-
Save anarchivist/259281 to your computer and use it in GitHub Desktop.
Aleph Sequential reader code for pymarc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymarc import MARCReader, Record, Field | |
class AlephSequentialReader(MARCReader): | |
""" | |
An iterator class for reading a file of MARC records in Aleph Sequential | |
format, which subclasses pymarc's MARCReader. Based on Tim Prettyman's | |
MARC::File::AlephSeq Perl code. | |
""" | |
def __init__(self, marc_target): | |
super(AlephSequentialReader, self).__init__(marc_target) | |
def next(self): | |
""" | |
To support iteration. | |
""" | |
record_data = '' | |
line = self.file_handle.readline() | |
if not line: | |
raise StopIteration | |
key = line[0:9] | |
current_key = key | |
while key == current_key: | |
record_data += line | |
position = self.file_handle.tell() | |
line = self.file_handle.readline() | |
key = line[0:9] | |
self.file_handle.seek(position) | |
record = Record() | |
for recordln in record_data.splitlines(): | |
tag = recordln[10:13] | |
ind1 = recordln[13:14] | |
ind2 = recordln[14:15] | |
rest = recordln[18:] | |
#if tag == 'FMT': pass | |
if tag == 'LDR': | |
record.leader = rest.replace('^', ' ') | |
elif tag < '010' and tag.isdigit(): | |
if tag == '008': rest = rest.replace('^', ' ') | |
record.add_field(Field(tag=tag, data=rest)) | |
else: | |
subfields = list() | |
subfield_data = rest.split('$$') | |
subfield_data.pop(0) | |
for subfield in subfield_data: | |
subfields.extend([subfield[0], subfield[1:]]) | |
record.add_field(Field(tag=tag, indicators=[ind1, ind2], | |
subfields=subfields)) | |
return record |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I try to create an AlephSequentialReader object, as follows
with open('DCTa_finalAleph_20190501.txt', 'rb') as fh:
reader = AlephSequentialReader(fh)
for record in reader:
...
I get the following Error:
----> 3 for record in reader:
ValueError: invalid literal for int() with base 10: b'1 L'
What am I doing wrong?
Thanks!
Yael