Skip to content

Instantly share code, notes, and snippets.

@pingsutw
Created February 15, 2022 15:53
Show Gist options
  • Save pingsutw/c663070ae4413f980d0a24cf4e7cd091 to your computer and use it in GitHub Desktop.
Save pingsutw/c663070ae4413f980d0a24cf4e7cd091 to your computer and use it in GitHub Desktop.
Read/Write by pandas and arrow
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from fsspec.core import strip_protocol
from flytekitplugins.fsspec import FSSpecPersistence
from flytekitplugins.fsspec.pandas import get_storage_options
uri = "s3://open-compute-playground/arrow"
filesystem = FSSpecPersistence._get_filesystem(uri)
df = pd.DataFrame({"Name": ["Tom", "Joseph"], "Age": [1, 22], "Height": [160, 178]})
padf = pa.Table.from_pandas(pd.DataFrame({"Name": ["Tom", "Joseph"], "Age": [20, 22]}))
pq.write_table(padf, strip_protocol(uri), filesystem=filesystem)
print(pq.read_table(uri, filesystem=filesystem))
uri = "s3://open-compute-playground/pandas"
df.to_parquet(uri, storage_options=get_storage_options(uri))
print(pd.read_parquet(uri, storage_options=get_storage_options(uri)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment