Last active
September 24, 2019 20:37
-
-
Save ftfarias/57277c73ce9b7718b1fe74edc462b112 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from tqdm import tqdm | |
import csv | |
with open('source.csv', 'r', encoding='utf-8', errors='replace') as input_file: | |
# protects from "null" bytes | |
input_file = (l.replace('\0' ,'') for l in input_file) | |
input_csv = csv.reader(input_file, delimiter=';', quotechar='"') | |
# remove header if necessary | |
header = next(input_csv) | |
print(header) | |
with open('destination.csv', 'w', encoding='utf-8', errors='replace') as output_file: | |
output_csv = csv.writer(output_file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) | |
# for i,row in tqdm(enumerate(input_csv), total=12345): | |
for i,row in enumerate(input_csv): | |
row_dict = {k:v for k,v in zip(header,row)} | |
if i % 1000 == 0: | |
print('Processadas {:,} linhas'.format(i)) | |
output_csv.writerow(row) # just copy | |
# ---------- Iterator for files | |
class FileStream(object): | |
def __iter__(self): | |
with open('source.csv', 'r', encoding='utf-8', errors='replace') as input_file: | |
# protects from "null" bytes | |
input_file = (l.replace('\0' ,'') for l in input_file) | |
input_csv = csv.reader(input_file, delimiter=';', quotechar='"') | |
# remove header if necessary | |
header = next(input_csv) | |
# for i,row in tqdm(enumerate(input_csv), total=12345): | |
for row in input_csv: | |
yield row | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment