Skip to content

Instantly share code, notes, and snippets.

@drkane
Last active January 22, 2020 00:28
Show Gist options
  • Save drkane/b73cc460396397df211cf6b4aa3f3954 to your computer and use it in GitHub Desktop.
Save drkane/b73cc460396397df211cf6b4aa3f3954 to your computer and use it in GitHub Desktop.
Fast streamer for BCP files
import argparse
import csv
def faststream(fp, blocksize=65536, line_delimiter='*@@*', field_delimiter='@**@'):
"""Read a BCP file and yield the rows
fp is a file pointer to a BCP file opened in 'r' mode.
It is assumed the file is opened with the correct encoding, so that·
fp.read() gives strings. A trailing newline may need to be added after this·
method to get exact results.
Author: Gertjan van den Burg
License: MIT
Copyright: 2020, The Alan Turing Institute
"""
RECORD_SEP = "\u241E"
block = fp.read(blocksize)
trail = None
while len(block) > 0:
lines = []
if not trail is None:
block = trail + block
lines = block.replace(field_delimiter, RECORD_SEP).split(line_delimiter)
if block[-4:] == line_delimiter:
trail = ""
else:
trail = lines[-1]
lines.pop()
for line in lines:
if len(line):
yield line.split(RECORD_SEP)
block = fp.read(blocksize)
def main():
parser = argparse.ArgumentParser(description='Convert BCP file to CSV')
parser.add_argument('infile', help='BCP file to convert')
parser.add_argument('outfile', help='Destination CSV file')
parser.add_argument('--column', '-c', action='append')
args = parser.parse_args()
with open(args.infile, 'r') as infile, open(args.outfile, 'w', newline='') as outfile:
writer = csv.writer(outfile)
if args.column:
writer.writerow(args.column)
for r in faststream(infile):
writer.writerow(r)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment