Last active
October 17, 2022 02:09
-
-
Save vphill/76e7eb0a1681f147d7e2768e7e82a6d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""script for converting NOVAXCHANGE tape files into record block files""" | |
import sys | |
def iterate_stream(stream, delimiter, max_read_size=1024 * 4): | |
""" Reads `delimiter` separated strings or bytes from `stream`. """ | |
empty = '' if isinstance(delimiter, str) else b'' | |
chunks = [] | |
delimiter_len = len(delimiter) | |
while 1: | |
d = stream.read(max_read_size) | |
if not d: | |
break | |
while d: | |
i = d.find(delimiter) | |
if i < 0: | |
chunks.append(d) | |
break | |
chunks.append(d[:i+delimiter_len]) | |
d = d[i+delimiter_len:] | |
yield empty.join(chunks) | |
chunks = [] | |
s = empty.join(chunks) | |
if s: | |
yield s | |
def remove_expected_null_bytes_iterator(payload, frequency=16384): | |
"""In the tape image format there are four control characters '\\x00\\x0@\\x00\\x0@' | |
that occur every 16384 bytes. This number is in the HDR2 which seems meaningful. | |
This function removes four bytes every 16384 bytes and seems to return the correct | |
payload we are wanting. | |
""" | |
offset = 0 | |
payload_len = len(payload) | |
while offset < payload_len: | |
yield payload[offset:offset+frequency] | |
offset += frequency + 4 | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print('usage: python3 test_nara.py <tape-image-file>') | |
sys.exit() | |
with open(sys.argv[1], "rb") as fp: | |
for f in iterate_stream(fp, b'P\x00HDR1'): | |
# The following lines are useful for seeing what is in the record headers. | |
# hdr1 = b'HDR1' + f[:80] # Read the remaining 80 (of 84) bytes in HDR1 | |
# hdr2 = f[80:164] # Read all of HDR2 (84 bytes) | |
out_filename = f[0:17].replace(b'\x00', b'').decode().strip() | |
eof_loc = f.find(b'P\x00EOF1') | |
# File Payload from 168 bytes from HDR1 until the EOF1. Remove trailing whitespace | |
# at the end of file. | |
payload = remove_expected_null_bytes_iterator(f[166:eof_loc-4].strip()) | |
print('Outputting:', out_filename) | |
with open(out_filename + '.gz', 'wb') as wf: # The .gz is only for testing. | |
wf.writelines(payload) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment