Skip to content

Instantly share code, notes, and snippets.

@mathyourlife
Created September 29, 2014 15:24
Show Gist options
  • Save mathyourlife/01af1751477aa115e1f3 to your computer and use it in GitHub Desktop.
Save mathyourlife/01af1751477aa115e1f3 to your computer and use it in GitHub Desktop.
generate chunks of csv files
import csv
# cat First500000.lst
# 0,a
# 1,a
# 2,a
# 3,a
# 4,a
# 5,a
# 6,a
# 7,a
# 8,a
# 9,a
class EndOfCSV(Exception):
pass
def pull_lines(reader, N):
for idx, line in enumerate(reader):
if idx >= N:
break
yield line
try:
idx
except UnboundLocalError, e:
raise EndOfCSV()
def read_csv_chunk(filename, chunk_size):
with open(filename) as infile:
reader = csv.reader(infile)
try:
while True:
yield {row[0]:row[1] for row in pull_lines(reader, chunk_size)}
except EndOfCSV, e:
pass
for chunk in read_csv_chunk('First500000.lst', chunk_size=3):
print(chunk)
# {'1': 'a', '0': 'a', '2': 'a'}
# {'5': 'a', '4': 'a', '6': 'a'}
# {'9': 'a', '8': 'a'}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment