Skip to content

Instantly share code, notes, and snippets.

@naoyat
Created March 2, 2016 17:09
Show Gist options
  • Save naoyat/b1290d917638c412e140 to your computer and use it in GitHub Desktop.
Save naoyat/b1290d917638c412e140 to your computer and use it in GitHub Desktop.
Iterating CSV file with auto-field-type-conversion (requires field_converter.py) and click.progressbar...
import sys
import csv
import click
from field_converter import FieldConverter
class CSV_Iterator:
def __init__(self, path, skip_header=False, with_progress_bar=False,
field_converter=None):
self.path = path
self.with_progress_bar = with_progress_bar
self.field_converter = field_converter
self.f = open(path, 'r')
self.line_count = sum(1 for line in self.f)
self.f.seek(0) # rewind
self.r = csv.reader(self.f, dialect='excel')
if skip_header:
self.r.next()
self.line_count -= 1
print '(%d lines)' % (self.line_count,)
if self.with_progress_bar:
self.bar = click.progressbar(self.r, self.line_count)
def __iter__(self):
return self
def next(self):
try:
if self.with_progress_bar:
fields = self.bar.next()
else:
fields = self.r.next()
if self.field_converter:
try:
fields = self.field_converter.convert(fields)
except:
print sys.exc_info()
return fields
except:
raise StopIteration
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
if exc_type:
return False
if self.with_progress_bar:
print
self.f.close()
return True
"""
# usage
from csv_iterator import CSV_Iterator
def foobar(csv_path):
with CSV_Iterator(csv_path,
skip_header=True,
with_progress_bar=True,
field_converter=FieldConverter(int, int, 'iso-8859-1', 'iso-8859-1', float)) as line:
for id, uid, title, query, target in line:
...
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment