vphill/test_nara.py

## test_nara.py
"""script for converting NOVAXCHANGE tape files into record block files"""

import sys


def iterate_stream(stream, delimiter, max_read_size=1024 * 4):
    """ Reads `delimiter` separated strings or bytes from `stream`. """
    empty = '' if isinstance(delimiter, str) else b''
    chunks = []
    delimiter_len = len(delimiter)
    while 1:
        d = stream.read(max_read_size)
        if not d:
            break
        while d:
            i = d.find(delimiter)
            if i < 0:
                chunks.append(d)
                break
            chunks.append(d[:i+delimiter_len])
            d = d[i+delimiter_len:]
            yield empty.join(chunks)
            chunks = []
    s = empty.join(chunks)
    if s:
        yield s


def remove_expected_null_bytes_iterator(payload, frequency=16384):
    """In the tape image format there are four control characters '\\x00\\x0@\\x00\\x0@'
       that occur every 16384 bytes. This number is in the HDR2 which seems meaningful.
       This function removes four bytes every 16384 bytes and seems to return the correct
       payload we are wanting.
    """
    offset = 0
    payload_len = len(payload)
    while offset < payload_len:
        yield payload[offset:offset+frequency]
        offset += frequency + 4


if __name__ == '__main__':

    if len(sys.argv) != 2:
        print('usage: python3 test_nara.py <tape-image-file>')
        sys.exit()

    with open(sys.argv[1], "rb") as fp:
        for f in iterate_stream(fp, b'P\x00HDR1'):
            # The following lines are useful for seeing what is in the record headers.
            # hdr1 = b'HDR1' + f[:80] # Read the remaining 80 (of 84) bytes in HDR1
            # hdr2 = f[80:164] # Read all of HDR2 (84 bytes)

            out_filename = f[0:17].replace(b'\x00', b'').decode().strip()
            eof_loc = f.find(b'P\x00EOF1')

            # File Payload from 168 bytes from HDR1 until the EOF1. Remove trailing whitespace
            # at the end of file.
            payload = remove_expected_null_bytes_iterator(f[166:eof_loc-4].strip())
            print('Outputting:', out_filename)
            with open(out_filename + '.gz', 'wb') as wf:  # The .gz is only for testing.
                wf.writelines(payload)
	"""script for converting NOVAXCHANGE tape files into record block files"""

	import sys


	def iterate_stream(stream, delimiter, max_read_size=1024 * 4):
	""" Reads `delimiter` separated strings or bytes from `stream`. """
	empty = '' if isinstance(delimiter, str) else b''
	chunks = []
	delimiter_len = len(delimiter)
	while 1:
	d = stream.read(max_read_size)
	if not d:
	break
	while d:
	i = d.find(delimiter)
	if i < 0:
	chunks.append(d)
	break
	chunks.append(d[:i+delimiter_len])
	d = d[i+delimiter_len:]
	yield empty.join(chunks)
	chunks = []
	s = empty.join(chunks)
	if s:
	yield s


	def remove_expected_null_bytes_iterator(payload, frequency=16384):
	"""In the tape image format there are four control characters '\\x00\\x0@\\x00\\x0@'
	that occur every 16384 bytes. This number is in the HDR2 which seems meaningful.
	This function removes four bytes every 16384 bytes and seems to return the correct
	payload we are wanting.
	"""
	offset = 0
	payload_len = len(payload)
	while offset < payload_len:
	yield payload[offset:offset+frequency]
	offset += frequency + 4


	if __name__ == '__main__':

	if len(sys.argv) != 2:
	print('usage: python3 test_nara.py <tape-image-file>')
	sys.exit()

	with open(sys.argv[1], "rb") as fp:
	for f in iterate_stream(fp, b'P\x00HDR1'):
	# The following lines are useful for seeing what is in the record headers.
	# hdr1 = b'HDR1' + f[:80] # Read the remaining 80 (of 84) bytes in HDR1
	# hdr2 = f[80:164] # Read all of HDR2 (84 bytes)

	out_filename = f[0:17].replace(b'\x00', b'').decode().strip()
	eof_loc = f.find(b'P\x00EOF1')

	# File Payload from 168 bytes from HDR1 until the EOF1. Remove trailing whitespace
	# at the end of file.
	payload = remove_expected_null_bytes_iterator(f[166:eof_loc-4].strip())
	print('Outputting:', out_filename)
	with open(out_filename + '.gz', 'wb') as wf: # The .gz is only for testing.
	wf.writelines(payload)