Skip to content

Instantly share code, notes, and snippets.

@yenson-lau
Last active January 10, 2022 06:54
Show Gist options
  • Save yenson-lau/eddb1c32b1677f6340609bc25b4c5032 to your computer and use it in GitHub Desktop.
Save yenson-lau/eddb1c32b1677f6340609bc25b4c5032 to your computer and use it in GitHub Desktop.
Process Arrow bytestream in Julia
using Arrow, DataFrames
using PyCall
"""
Convert the Arrow bytestream into a DataFrame, process it,
and return the resulting DataFrame as a Python bytestream.
"""
function process_arrowbytes( bytes::Vector{UInt8} )::PyObject
df = DataFrame(Arrow.Table(bytes)) # convert bytestream into a DataFrame
result = process_dataframe( df ) # run some arbitrary function
result_bytes = df_to_arrowbytes(results) # convert result into Arrow bytestream
return PyCall.pybytes(result_bytes) # send bytestream back as a PyObject
end
"""Convert DataFrame into Arrow bytestream"""
function df_to_arrowbytes( df::DataFrame )::Vector{UInt8}
io = IOBuffer()
Arrow.write(io, df)
seekstart(io)
bytes = take!(io)
return bytes
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment