Read file line by line with StreamReader
import sys | |
import codecs | |
from itertools import izip, count | |
from time import time | |
def iter_chunks(csvfile, chunk_size): | |
chunk = [] | |
for row in csvfile: | |
chunk.append(row) | |
if len(chunk) >= chunk_size: | |
yield chunk | |
chunk = [] | |
if chunk: | |
yield chunk | |
def main(args): | |
t0 = time() | |
with open(args[0], 'rb') as fd: | |
csvfile = codecs.getreader('latin-1')(fd) | |
#csvfile = fd | |
for i, chunk in izip(count(), iter_chunks(csvfile, 1000)): | |
if i % 100 == 0: | |
print(i) | |
print('took {} sec'.format((time() - t0))) | |
if __name__ == '__main__': | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment