Skip to content

Instantly share code, notes, and snippets.

@drkane
Created April 13, 2020 22:11
Show Gist options
  • Save drkane/f74fd325e1e193f5b9817b31be2a968d to your computer and use it in GitHub Desktop.
Save drkane/f74fd325e1e193f5b9817b31be2a968d to your computer and use it in GitHub Desktop.
Script for generating test Charity Commission zip files
import io
from zipfile import ZipFile, ZIP_DEFLATED
import random
import bcp
import click
import tqdm
@click.command()
@click.argument('infile')
def create_data(infile):
delimiter = '@**@'
lineterminator = '*@@*'
print(infile)
output_data = {}
with ZipFile(infile, 'r') as inzip:
charities = set()
with inzip.open('extract_charity.bcp', 'r') as infile:
reader = bcp.reader(io.TextIOWrapper(
infile, encoding='latin1'))
for r in reader:
charities.add(r[0])
random_charities = random.sample(charities, 500)
print(random_charities)
# with open
for f in inzip.namelist():
print(f)
rows = []
with inzip.open(f, 'r') as infile:
reader = bcp.reader(io.TextIOWrapper(
infile, encoding='latin1'))
for r in tqdm.tqdm(reader):
if r[0] in random_charities:
rows.append(r)
if rows:
output_data[f] = lineterminator.join([
delimiter.join(r) for r in rows
])
print(f"Found {:,.0f} sample rows")
else:
with inzip.open(f, 'r') as infile:
output_data[f] = infile.read()
with ZipFile('RegPlusExtract_TESTDATA.zip', 'w', compression=ZIP_DEFLATED) as outzip:
for f, contents in output_data.items():
outzip.writestr(f, contents)
if __name__ == '__main__':
create_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment