Last active
March 23, 2023 06:11
-
-
Save lmyyao/5e933c7600a92bafe0d4852248ac0900 to your computer and use it in GitHub Desktop.
pandas, polars, pyarrow read_csv from s3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import s3fs | |
import pandas as pd | |
import time | |
import polars | |
s3conf = dict( | |
key="<key>", | |
secret="<secret>", | |
client_kwargs={"endpoint_url": "<url>"} | |
) | |
path = "s3://admin-b96de1d22c16435e91bcf1558bbb8cd9/horizontal_slices/0.csv" | |
t = time.time() | |
df = pd.read_csv(path, storage_options=s3conf) | |
t1 = time.time() | |
print(t1-t) | |
t = time.time() | |
df = polars.read_csv(path, storage_options=s3conf) | |
t1 = time.time() | |
print(t1-t) | |
import pyarrow.csv | |
import pyarrow.fs | |
# s3 = pyarrow.fs.S3FileSystem( | |
# access_key="6bb3ec45fe2030a253f6c4a98ac6bb", | |
# secret_key="af339076180e43f597c3dc780b1d33", | |
# endpoint_override="10.43.187.77:80", | |
# scheme='http' | |
# ) | |
s3 = s3fs.S3FileSystem(**s3conf) | |
t = time.time() | |
df = pyarrow.csv.read_csv(s3.open("admin-b96de1d22c16435e91bcf1558bbb8cd9/horizontal_slices/0.csv")) | |
t1 = time.time() | |
print(t1-t) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
pyarrow read_csv is the fastest method