Skip to content

Instantly share code, notes, and snippets.

View freegor's full-sized avatar
💭
🧘

Igor Cherny freegor

💭
🧘
  • AT&T
  • Tel Aviv, Israel
View GitHub Profile
import pandas
from pydantic import BaseModel
class CustomBaseModel(BaseModel):
class Config:
arbitrary_types_allowed = True
json_encoders = {
pandas.DataFrame: lambda v: serialize_with_pyarrow(v)
}
import pandas as pd
from dataset import Dataset
data = {'Name':['Tom', 'nick', 'krish', 'jack'],'Age':[20, 21, 19, 18]}
df = pd.DataFrame(data)
dataset = Dataset(id='8fba0c5b-4792-4bc1-a8d6-3eea6cc5d086',
name='ppl_dataset',
dataframe=df)
encoded_dataset = dataset.json()
print(encoded_dataset)
import pandas
from pydantic import BaseModel
class Dataset(BaseModel):
id: str
name: constr(max_length=128)
dataframe: pandas.DataFrame
@validator('id')
def is_uuid4_string(cls, value):
import pandas
from pydantic import BaseModel
class Dataset(BaseModel):
id: str
name: str
dataframe: pandas.DataFrame
from pandas import DataFrame
import pyarrow as pa
import jsonpickle
def serialize_with_pyarrow(dataframe: DataFrame):
batch = pa.record_batch(dataframe)
write_options = pa.ipc.IpcWriteOptions(compression="zstd")
sink = pa.BufferOutputStream()
with pa.ipc.new_stream(sink, batch.schema, options=write_options) as writer:
writer.write_batch(batch)