Last active
January 22, 2020 00:28
-
-
Save drkane/b73cc460396397df211cf6b4aa3f3954 to your computer and use it in GitHub Desktop.
Fast streamer for BCP files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
def faststream(fp, blocksize=65536, line_delimiter='*@@*', field_delimiter='@**@'): | |
"""Read a BCP file and yield the rows | |
fp is a file pointer to a BCP file opened in 'r' mode. | |
It is assumed the file is opened with the correct encoding, so that· | |
fp.read() gives strings. A trailing newline may need to be added after this· | |
method to get exact results. | |
Author: Gertjan van den Burg | |
License: MIT | |
Copyright: 2020, The Alan Turing Institute | |
""" | |
RECORD_SEP = "\u241E" | |
block = fp.read(blocksize) | |
trail = None | |
while len(block) > 0: | |
lines = [] | |
if not trail is None: | |
block = trail + block | |
lines = block.replace(field_delimiter, RECORD_SEP).split(line_delimiter) | |
if block[-4:] == line_delimiter: | |
trail = "" | |
else: | |
trail = lines[-1] | |
lines.pop() | |
for line in lines: | |
if len(line): | |
yield line.split(RECORD_SEP) | |
block = fp.read(blocksize) | |
def main(): | |
parser = argparse.ArgumentParser(description='Convert BCP file to CSV') | |
parser.add_argument('infile', help='BCP file to convert') | |
parser.add_argument('outfile', help='Destination CSV file') | |
parser.add_argument('--column', '-c', action='append') | |
args = parser.parse_args() | |
with open(args.infile, 'r') as infile, open(args.outfile, 'w', newline='') as outfile: | |
writer = csv.writer(outfile) | |
if args.column: | |
writer.writerow(args.column) | |
for r in faststream(infile): | |
writer.writerow(r) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment