Skip to content

Instantly share code, notes, and snippets.

@freegor
Created May 20, 2022 15:22
Show Gist options
  • Save freegor/22d71e1f95f77d42c583fe5919c03a9b to your computer and use it in GitHub Desktop.
Save freegor/22d71e1f95f77d42c583fe5919c03a9b to your computer and use it in GitHub Desktop.
from pandas import DataFrame
import pyarrow as pa
import jsonpickle
def serialize_with_pyarrow(dataframe: DataFrame):
batch = pa.record_batch(dataframe)
write_options = pa.ipc.IpcWriteOptions(compression="zstd")
sink = pa.BufferOutputStream()
with pa.ipc.new_stream(sink, batch.schema, options=write_options) as writer:
writer.write_batch(batch)
pybytes = sink.getvalue().to_pybytes()
pybytes_str = jsonpickle.encode(pybytes, unpicklable=True, make_refs=False)
return pybytes_str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment