Skip to content

Instantly share code, notes, and snippets.

@lgloege
Last active June 28, 2022 02:04
Show Gist options
  • Save lgloege/1b2aa249519c8cf845051edf1c4c776b to your computer and use it in GitHub Desktop.
Save lgloege/1b2aa249519c8cf845051edf1c4c776b to your computer and use it in GitHub Desktop.
MEDIUM: Store Unlimited Data for Free on IPFS and Retrieve it with Python
[
{
"name": "sst.wkmean.1990-present.nc",
"cid": "bafybeie6rzt3d23ytcxyjjmmk2vjx73fk72zeauaotrsecyc37z6x63lxu"
},
{
"name": "sst.wkmean.1990-present_consolidated.zarr",
"cid": "bafybeiepna7ilkhdwykd65i7aovmrapnmaifullesxg5pdsjy6cos5qfrm"
},
{
"name": "sst.wkmean.1990-present_unconsolidated.zarr",
"cid": "bafybeiespqdrxcdu4nxjqzsktgsgh2k4voggnfexqcdnjhnxk3xc2xn6he"
}
]
import xarray as xr
import pymongo
import json
# connect to MongoDB
USERNAME = "username"
PASSWORD = "password"
conn = f'mongodb+srv://{USERNAME}:{PASSWORD}@cluster0.ibnvm.mongodb.net/?retryWrites=true&w=majority'
cluster = pymongo.MongoClient(conn)
# Create and database named ipfs and collection named repository
db = cluster.ipfs
coll = db['repository']
# open database json file
with open('./database.json') as f:
data = json.load(f)
# insert records into collection
coll.insert_many(data)
# Query the database and for CID
cid = list(coll.find({"name": {'$regex':'_consolidated'}}))[0]['cid']
# Retrieve data at the CID
ds = xr.open_zarr(f'ipfs://{cid}', consolidated=True)
import pandas as pd
# content ID
CID = "bafybeidfdcfjgi6rcca7ghqiwjcwaaz7verjgdbepfjonrksle3kc2avui"
# read the data into dataframe
df = pd.read_csv(f"https://dweb.link/ipfs/{CID}/test.csv")
import urllib.request
import json
# content ID
cid = "bafkreifacnxyt7p45fat7nirzcqhw45ac4lphyqb5ltyrnpttq3iqm4miu"
# read .json
with urllib.request.urlopen(f"https://dweb.link/ipfs/{cid}") as url:
data = json.loads(url.read())
# [{'name': 'sst.wkmean.1990-present.nc',
# 'cid': 'bafybeie6rzt3d23ytcxyjjmmk2vjx73fk72zeauaotrsecyc37z6x63lxu'},
# {'name': 'sst.wkmean.1990-present_consolidated.zarr',
# 'cid': 'bafybeiepna7ilkhdwykd65i7aovmrapnmaifullesxg5pdsjy6cos5qfrm'},
# {'name': 'sst.wkmean.1990-present_unconsolidated.zarr',
# 'cid': 'bafybeiespqdrxcdu4nxjqzsktgsgh2k4voggnfexqcdnjhnxk3xc2xn6he'}]
import xarray as xr
# content ID
CID = 'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'
# read zarr
ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_consolidated.zarr', consolidated=True)
%%timeit
import xarray as xr
# content ID
CID = 'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'
# read zarr
ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_consolidated.zarr', consolidated=True)
%%timeit
import xarray as xr
# content ID
CID = 'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'
# read zarr
ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_unconsolidated.zarr', consolidated=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment