Skip to content

Instantly share code, notes, and snippets.

@rkhullar
Last active November 27, 2023 22:03
Show Gist options
  • Save rkhullar/8add90f85b15a1cd1e4d3475586dc9a6 to your computer and use it in GitHub Desktop.
Save rkhullar/8add90f85b15a1cd1e4d3475586dc9a6 to your computer and use it in GitHub Desktop.
csv util with flexible headers
import csv
from collections.abc import Iterator
from pathlib import Path
from tempfile import TemporaryDirectory
def _build_reverse_mapping(fields: list[str], mapping: dict[str, str] = None) -> dict[str, str]:
mapping = mapping or dict()
return {mapping.get(field, field): field for field in fields}
def read_csv(path: Path, fields: list, mapping: dict = None) -> Iterator[dict]:
"""
read csv with explicitly defined fields and optional mapping; the file must have a header.
example use case:
- csv file has header [a, b, x] and we want to read [b, c] where x is renamed to c
read_csv(path, fields=['b', 'c'], mapping={'c': 'x'})
"""
reverse_mapping = _build_reverse_mapping(fields, mapping)
with path.open('r') as f:
reader = csv.DictReader(f)
for row in reader:
yield {field: row.get(key) for key, field in reverse_mapping.items()}
def iter_csv_from_s3(source_object, fields: list, mapping: dict = None) -> Iterator[dict]:
with TemporaryDirectory() as temp_dir:
local_path = Path(temp_dir) / source_object.key
with local_path.open('wb') as f:
source_object.download_fileobj(f)
yield from read_csv(path=local_path, fields=fields, mapping=mapping)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment