Last active
January 6, 2023 17:53
-
-
Save xLaszlo/f09c46429239e78fd982e792f4adde67 to your computer and use it in GitHub Desktop.
File like class to store dataclasses or pydantic classes as gzipped JSONL-s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import json | |
from dataclasses import asdict | |
from pydantic import BaseModel | |
class DataFile: | |
def __init__(self, data_type, filename, mode, loader=None): | |
if mode not in ['r', 'w', 'rt', 'wt']: | |
raise ValueError(f"{mode} must be 'r', 'w', 'rt', 'wt'") | |
self.data_type = data_type | |
self.filename = filename | |
self.mode = mode[0] | |
self.file_handler = None | |
self.loader = loader | |
def open(self): | |
self.file_handler = gzip.open(self.filename, f'{self.mode}t') | |
def close(self): | |
self.file_handler.close() | |
self.file_handler = None | |
def __enter__(self): | |
self.open() | |
return self | |
def __exit__(self, *args): | |
self.close() | |
def __iter__(self): | |
if self.file_handler is None: | |
with self as f: | |
for data in f: | |
yield data | |
else: | |
for row in self.file_handler: | |
if self.loader is not None: | |
yield self.loader(json.loads(row)) | |
else: | |
yield self.data_type(**json.loads(row)) | |
def write(self, data): | |
if data.__class__ != self.data_type: | |
raise ValueError("Type of input data {data.__class__} doesn't match file's {self.data_type}") | |
if hasattr(data, '__dataclass_fields__'): | |
row = json.dumps(asdict(data), default=lambda dt: dt.isoformat()) | |
elif isinstance(data, BaseModel): | |
row = data.json() | |
else: | |
raise ValueError(f'Only pydantic BaseModel, dataclass and python dataclass types are allowed, got: {data.__type__}') | |
self.file_handler.write(f'{row}\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment