Skip to content

Instantly share code, notes, and snippets.

@ftfarias
Last active September 24, 2019 20:37
Show Gist options
  • Save ftfarias/57277c73ce9b7718b1fe74edc462b112 to your computer and use it in GitHub Desktop.
Save ftfarias/57277c73ce9b7718b1fe74edc462b112 to your computer and use it in GitHub Desktop.
# from tqdm import tqdm
import csv
with open('source.csv', 'r', encoding='utf-8', errors='replace') as input_file:
# protects from "null" bytes
input_file = (l.replace('\0' ,'') for l in input_file)
input_csv = csv.reader(input_file, delimiter=';', quotechar='"')
# remove header if necessary
header = next(input_csv)
print(header)
with open('destination.csv', 'w', encoding='utf-8', errors='replace') as output_file:
output_csv = csv.writer(output_file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# for i,row in tqdm(enumerate(input_csv), total=12345):
for i,row in enumerate(input_csv):
row_dict = {k:v for k,v in zip(header,row)}
if i % 1000 == 0:
print('Processadas {:,} linhas'.format(i))
output_csv.writerow(row) # just copy
# ---------- Iterator for files
class FileStream(object):
def __iter__(self):
with open('source.csv', 'r', encoding='utf-8', errors='replace') as input_file:
# protects from "null" bytes
input_file = (l.replace('\0' ,'') for l in input_file)
input_csv = csv.reader(input_file, delimiter=';', quotechar='"')
# remove header if necessary
header = next(input_csv)
# for i,row in tqdm(enumerate(input_csv), total=12345):
for row in input_csv:
yield row
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment