Skip to content

Instantly share code, notes, and snippets.

View pimlock's full-sized avatar

Piotr Mlocek pimlock

View GitHub Profile
df = pd.read_csv(
"s3://my-public-bucket/data.csv.gz",
storage_options={"anon": True},
# in this case pandas will infer it based on extension,
# but you can still specify it explicitely.
compression="gzip"
)
df = pd.read_csv(
"filecache::s3://my-public-bucket/data.csv",
storage_options={
"s3": {"anon": True},
"filecache": {"cache_storage": cache_dir}
},
)
# credentials are accessed automatically from one of the common locations
df = pd.read_csv("s3://my-private-bucket/data.csv")
# credentials are passed as arguments (use this only if you really have to!)
df = pd.read_csv(
"s3://my-private-bucket/data.csv",
storage_options={"key": "AKIAIOSFODNN7EXAMPLE", "secret": "SECRET"},
)
df = pd.read_csv(
"s3://my-public-bucket/data.csv",
storage_options={"anon": True},
)
import sqlite3
con = sqlite3.connect("2016-olympics-medals.db")
try:
df_sql = pd.read_sql_query("SELECT * FROM medals", con)
df_sql.info()
finally:
con.close()
df_parquet = pd.read_parquet("2016-olympics-medals.snappy.parquet")
df_parquet.info()
df_excel = pd.read_excel("2016-olympics-medals.xls", sheet_name="Medals")
df_excel.info()
import json
with open("2016-olympics-medals.json") as f:
data = json.load(f)
df = pd.json_normalize(data, record_path="Countries")
{
"Timestamp": "2021-05-11T14:38:10",
"Countries": [
{"Rank":1,"NOC":"United States (USA)","Gold":46},
{"Rank":2,"NOC":"Great Britain (GBR)","Gold":27},
{"Rank":3,"NOC":"China (CHN)","Gold":26},
{"...more data"}
]
}
df = pd.read_json("2016-olympics-medals.jsonl", lines=True)
df.info()